Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ dbb24ec7

History | View | Annotate | Download (350.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
class LogicalUnit(object):
57
  """Logical Unit base class.
58

59
  Subclasses must follow these rules:
60
    - implement ExpandNames
61
    - implement CheckPrereq (except when tasklets are used)
62
    - implement Exec (except when tasklets are used)
63
    - implement BuildHooksEnv
64
    - redefine HPATH and HTYPE
65
    - optionally redefine their run requirements:
66
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
67

68
  Note that all commands require root permissions.
69

70
  @ivar dry_run_result: the value (if any) that will be returned to the caller
71
      in dry-run mode (signalled by opcode dry_run parameter)
72

73
  """
74
  HPATH = None
75
  HTYPE = None
76
  _OP_REQP = []
77
  REQ_BGL = True
78

    
79
  def __init__(self, processor, op, context, rpc):
80
    """Constructor for LogicalUnit.
81

82
    This needs to be overridden in derived classes in order to check op
83
    validity.
84

85
    """
86
    self.proc = processor
87
    self.op = op
88
    self.cfg = context.cfg
89
    self.context = context
90
    self.rpc = rpc
91
    # Dicts used to declare locking needs to mcpu
92
    self.needed_locks = None
93
    self.acquired_locks = {}
94
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
95
    self.add_locks = {}
96
    self.remove_locks = {}
97
    # Used to force good behavior when calling helper functions
98
    self.recalculate_locks = {}
99
    self.__ssh = None
100
    # logging
101
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
102
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
103
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
104
    # support for dry-run
105
    self.dry_run_result = None
106
    # support for generic debug attribute
107
    if (not hasattr(self.op, "debug_level") or
108
        not isinstance(self.op.debug_level, int)):
109
      self.op.debug_level = 0
110

    
111
    # Tasklets
112
    self.tasklets = None
113

    
114
    for attr_name in self._OP_REQP:
115
      attr_val = getattr(op, attr_name, None)
116
      if attr_val is None:
117
        raise errors.OpPrereqError("Required parameter '%s' missing" %
118
                                   attr_name, errors.ECODE_INVAL)
119

    
120
    self.CheckArguments()
121

    
122
  def __GetSSH(self):
123
    """Returns the SshRunner object
124

125
    """
126
    if not self.__ssh:
127
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128
    return self.__ssh
129

    
130
  ssh = property(fget=__GetSSH)
131

    
132
  def CheckArguments(self):
133
    """Check syntactic validity for the opcode arguments.
134

135
    This method is for doing a simple syntactic check and ensure
136
    validity of opcode parameters, without any cluster-related
137
    checks. While the same can be accomplished in ExpandNames and/or
138
    CheckPrereq, doing these separate is better because:
139

140
      - ExpandNames is left as as purely a lock-related function
141
      - CheckPrereq is run after we have acquired locks (and possible
142
        waited for them)
143

144
    The function is allowed to change the self.op attribute so that
145
    later methods can no longer worry about missing parameters.
146

147
    """
148
    pass
149

    
150
  def ExpandNames(self):
151
    """Expand names for this LU.
152

153
    This method is called before starting to execute the opcode, and it should
154
    update all the parameters of the opcode to their canonical form (e.g. a
155
    short node name must be fully expanded after this method has successfully
156
    completed). This way locking, hooks, logging, ecc. can work correctly.
157

158
    LUs which implement this method must also populate the self.needed_locks
159
    member, as a dict with lock levels as keys, and a list of needed lock names
160
    as values. Rules:
161

162
      - use an empty dict if you don't need any lock
163
      - if you don't need any lock at a particular level omit that level
164
      - don't put anything for the BGL level
165
      - if you want all locks at a level use locking.ALL_SET as a value
166

167
    If you need to share locks (rather than acquire them exclusively) at one
168
    level you can modify self.share_locks, setting a true value (usually 1) for
169
    that level. By default locks are not shared.
170

171
    This function can also define a list of tasklets, which then will be
172
    executed in order instead of the usual LU-level CheckPrereq and Exec
173
    functions, if those are not defined by the LU.
174

175
    Examples::
176

177
      # Acquire all nodes and one instance
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: locking.ALL_SET,
180
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
181
      }
182
      # Acquire just two nodes
183
      self.needed_locks = {
184
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185
      }
186
      # Acquire no locks
187
      self.needed_locks = {} # No, you can't leave it to the default value None
188

189
    """
190
    # The implementation of this method is mandatory only if the new LU is
191
    # concurrent, so that old LUs don't need to be changed all at the same
192
    # time.
193
    if self.REQ_BGL:
194
      self.needed_locks = {} # Exclusive LUs don't need locks.
195
    else:
196
      raise NotImplementedError
197

    
198
  def DeclareLocks(self, level):
199
    """Declare LU locking needs for a level
200

201
    While most LUs can just declare their locking needs at ExpandNames time,
202
    sometimes there's the need to calculate some locks after having acquired
203
    the ones before. This function is called just before acquiring locks at a
204
    particular level, but after acquiring the ones at lower levels, and permits
205
    such calculations. It can be used to modify self.needed_locks, and by
206
    default it does nothing.
207

208
    This function is only called if you have something already set in
209
    self.needed_locks for the level.
210

211
    @param level: Locking level which is going to be locked
212
    @type level: member of ganeti.locking.LEVELS
213

214
    """
215

    
216
  def CheckPrereq(self):
217
    """Check prerequisites for this LU.
218

219
    This method should check that the prerequisites for the execution
220
    of this LU are fulfilled. It can do internode communication, but
221
    it should be idempotent - no cluster or system changes are
222
    allowed.
223

224
    The method should raise errors.OpPrereqError in case something is
225
    not fulfilled. Its return value is ignored.
226

227
    This method should also update all the parameters of the opcode to
228
    their canonical form if it hasn't been done by ExpandNames before.
229

230
    """
231
    if self.tasklets is not None:
232
      for (idx, tl) in enumerate(self.tasklets):
233
        logging.debug("Checking prerequisites for tasklet %s/%s",
234
                      idx + 1, len(self.tasklets))
235
        tl.CheckPrereq()
236
    else:
237
      raise NotImplementedError
238

    
239
  def Exec(self, feedback_fn):
240
    """Execute the LU.
241

242
    This method should implement the actual work. It should raise
243
    errors.OpExecError for failures that are somewhat dealt with in
244
    code, or expected.
245

246
    """
247
    if self.tasklets is not None:
248
      for (idx, tl) in enumerate(self.tasklets):
249
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250
        tl.Exec(feedback_fn)
251
    else:
252
      raise NotImplementedError
253

    
254
  def BuildHooksEnv(self):
255
    """Build hooks environment for this LU.
256

257
    This method should return a three-node tuple consisting of: a dict
258
    containing the environment that will be used for running the
259
    specific hook for this LU, a list of node names on which the hook
260
    should run before the execution, and a list of node names on which
261
    the hook should run after the execution.
262

263
    The keys of the dict must not have 'GANETI_' prefixed as this will
264
    be handled in the hooks runner. Also note additional keys will be
265
    added by the hooks runner. If the LU doesn't define any
266
    environment, an empty dict (and not None) should be returned.
267

268
    No nodes should be returned as an empty list (and not None).
269

270
    Note that if the HPATH for a LU class is None, this function will
271
    not be called.
272

273
    """
274
    raise NotImplementedError
275

    
276
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
277
    """Notify the LU about the results of its hooks.
278

279
    This method is called every time a hooks phase is executed, and notifies
280
    the Logical Unit about the hooks' result. The LU can then use it to alter
281
    its result based on the hooks.  By default the method does nothing and the
282
    previous result is passed back unchanged but any LU can define it if it
283
    wants to use the local cluster hook-scripts somehow.
284

285
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
286
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
287
    @param hook_results: the results of the multi-node hooks rpc call
288
    @param feedback_fn: function used send feedback back to the caller
289
    @param lu_result: the previous Exec result this LU had, or None
290
        in the PRE phase
291
    @return: the new Exec result, based on the previous result
292
        and hook results
293

294
    """
295
    # API must be kept, thus we ignore the unused argument and could
296
    # be a function warnings
297
    # pylint: disable-msg=W0613,R0201
298
    return lu_result
299

    
300
  def _ExpandAndLockInstance(self):
301
    """Helper function to expand and lock an instance.
302

303
    Many LUs that work on an instance take its name in self.op.instance_name
304
    and need to expand it and then declare the expanded name for locking. This
305
    function does it, and then updates self.op.instance_name to the expanded
306
    name. It also initializes needed_locks as a dict, if this hasn't been done
307
    before.
308

309
    """
310
    if self.needed_locks is None:
311
      self.needed_locks = {}
312
    else:
313
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
314
        "_ExpandAndLockInstance called with instance-level locks set"
315
    self.op.instance_name = _ExpandInstanceName(self.cfg,
316
                                                self.op.instance_name)
317
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
318

    
319
  def _LockInstancesNodes(self, primary_only=False):
320
    """Helper function to declare instances' nodes for locking.
321

322
    This function should be called after locking one or more instances to lock
323
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
324
    with all primary or secondary nodes for instances already locked and
325
    present in self.needed_locks[locking.LEVEL_INSTANCE].
326

327
    It should be called from DeclareLocks, and for safety only works if
328
    self.recalculate_locks[locking.LEVEL_NODE] is set.
329

330
    In the future it may grow parameters to just lock some instance's nodes, or
331
    to just lock primaries or secondary nodes, if needed.
332

333
    If should be called in DeclareLocks in a way similar to::
334

335
      if level == locking.LEVEL_NODE:
336
        self._LockInstancesNodes()
337

338
    @type primary_only: boolean
339
    @param primary_only: only lock primary nodes of locked instances
340

341
    """
342
    assert locking.LEVEL_NODE in self.recalculate_locks, \
343
      "_LockInstancesNodes helper function called with no nodes to recalculate"
344

    
345
    # TODO: check if we're really been called with the instance locks held
346

    
347
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
348
    # future we might want to have different behaviors depending on the value
349
    # of self.recalculate_locks[locking.LEVEL_NODE]
350
    wanted_nodes = []
351
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
352
      instance = self.context.cfg.GetInstanceInfo(instance_name)
353
      wanted_nodes.append(instance.primary_node)
354
      if not primary_only:
355
        wanted_nodes.extend(instance.secondary_nodes)
356

    
357
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
358
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
359
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
360
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
361

    
362
    del self.recalculate_locks[locking.LEVEL_NODE]
363

    
364

    
365
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
366
  """Simple LU which runs no hooks.
367

368
  This LU is intended as a parent for other LogicalUnits which will
369
  run no hooks, in order to reduce duplicate code.
370

371
  """
372
  HPATH = None
373
  HTYPE = None
374

    
375
  def BuildHooksEnv(self):
376
    """Empty BuildHooksEnv for NoHooksLu.
377

378
    This just raises an error.
379

380
    """
381
    assert False, "BuildHooksEnv called for NoHooksLUs"
382

    
383

    
384
class Tasklet:
385
  """Tasklet base class.
386

387
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
388
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
389
  tasklets know nothing about locks.
390

391
  Subclasses must follow these rules:
392
    - Implement CheckPrereq
393
    - Implement Exec
394

395
  """
396
  def __init__(self, lu):
397
    self.lu = lu
398

    
399
    # Shortcuts
400
    self.cfg = lu.cfg
401
    self.rpc = lu.rpc
402

    
403
  def CheckPrereq(self):
404
    """Check prerequisites for this tasklets.
405

406
    This method should check whether the prerequisites for the execution of
407
    this tasklet are fulfilled. It can do internode communication, but it
408
    should be idempotent - no cluster or system changes are allowed.
409

410
    The method should raise errors.OpPrereqError in case something is not
411
    fulfilled. Its return value is ignored.
412

413
    This method should also update all parameters to their canonical form if it
414
    hasn't been done before.
415

416
    """
417
    raise NotImplementedError
418

    
419
  def Exec(self, feedback_fn):
420
    """Execute the tasklet.
421

422
    This method should implement the actual work. It should raise
423
    errors.OpExecError for failures that are somewhat dealt with in code, or
424
    expected.
425

426
    """
427
    raise NotImplementedError
428

    
429

    
430
def _GetWantedNodes(lu, nodes):
431
  """Returns list of checked and expanded node names.
432

433
  @type lu: L{LogicalUnit}
434
  @param lu: the logical unit on whose behalf we execute
435
  @type nodes: list
436
  @param nodes: list of node names or None for all nodes
437
  @rtype: list
438
  @return: the list of nodes, sorted
439
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
440

441
  """
442
  if not isinstance(nodes, list):
443
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
444
                               errors.ECODE_INVAL)
445

    
446
  if not nodes:
447
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
448
      " non-empty list of nodes whose name is to be expanded.")
449

    
450
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
451
  return utils.NiceSort(wanted)
452

    
453

    
454
def _GetWantedInstances(lu, instances):
455
  """Returns list of checked and expanded instance names.
456

457
  @type lu: L{LogicalUnit}
458
  @param lu: the logical unit on whose behalf we execute
459
  @type instances: list
460
  @param instances: list of instance names or None for all instances
461
  @rtype: list
462
  @return: the list of instances, sorted
463
  @raise errors.OpPrereqError: if the instances parameter is wrong type
464
  @raise errors.OpPrereqError: if any of the passed instances is not found
465

466
  """
467
  if not isinstance(instances, list):
468
    raise errors.OpPrereqError("Invalid argument type 'instances'",
469
                               errors.ECODE_INVAL)
470

    
471
  if instances:
472
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
473
  else:
474
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
475
  return wanted
476

    
477

    
478
def _GetUpdatedParams(old_params, update_dict):
479
  """Return the new version of a parameter dictionary.
480

481
  @type old_params: dict
482
  @param old_params: old parameters
483
  @type update_dict: dict
484
  @param update_dict: dict containing new parameter values, or
485
      constants.VALUE_DEFAULT to reset the parameter to its default
486
      value
487
  @rtype: dict
488
  @return: the new parameter dictionary
489

490
  """
491
  params_copy = copy.deepcopy(old_params)
492
  for key, val in update_dict.iteritems():
493
    if val == constants.VALUE_DEFAULT:
494
      try:
495
        del params_copy[key]
496
      except KeyError:
497
        pass
498
    else:
499
      params_copy[key] = val
500
  return params_copy
501

    
502

    
503
def _CheckOutputFields(static, dynamic, selected):
504
  """Checks whether all selected fields are valid.
505

506
  @type static: L{utils.FieldSet}
507
  @param static: static fields set
508
  @type dynamic: L{utils.FieldSet}
509
  @param dynamic: dynamic fields set
510

511
  """
512
  f = utils.FieldSet()
513
  f.Extend(static)
514
  f.Extend(dynamic)
515

    
516
  delta = f.NonMatching(selected)
517
  if delta:
518
    raise errors.OpPrereqError("Unknown output fields selected: %s"
519
                               % ",".join(delta), errors.ECODE_INVAL)
520

    
521

    
522
def _CheckBooleanOpField(op, name):
523
  """Validates boolean opcode parameters.
524

525
  This will ensure that an opcode parameter is either a boolean value,
526
  or None (but that it always exists).
527

528
  """
529
  val = getattr(op, name, None)
530
  if not (val is None or isinstance(val, bool)):
531
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
532
                               (name, str(val)), errors.ECODE_INVAL)
533
  setattr(op, name, val)
534

    
535

    
536
def _CheckGlobalHvParams(params):
537
  """Validates that given hypervisor params are not global ones.
538

539
  This will ensure that instances don't get customised versions of
540
  global params.
541

542
  """
543
  used_globals = constants.HVC_GLOBALS.intersection(params)
544
  if used_globals:
545
    msg = ("The following hypervisor parameters are global and cannot"
546
           " be customized at instance level, please modify them at"
547
           " cluster level: %s" % utils.CommaJoin(used_globals))
548
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
549

    
550

    
551
def _CheckNodeOnline(lu, node):
552
  """Ensure that a given node is online.
553

554
  @param lu: the LU on behalf of which we make the check
555
  @param node: the node to check
556
  @raise errors.OpPrereqError: if the node is offline
557

558
  """
559
  if lu.cfg.GetNodeInfo(node).offline:
560
    raise errors.OpPrereqError("Can't use offline node %s" % node,
561
                               errors.ECODE_INVAL)
562

    
563

    
564
def _CheckNodeNotDrained(lu, node):
565
  """Ensure that a given node is not drained.
566

567
  @param lu: the LU on behalf of which we make the check
568
  @param node: the node to check
569
  @raise errors.OpPrereqError: if the node is drained
570

571
  """
572
  if lu.cfg.GetNodeInfo(node).drained:
573
    raise errors.OpPrereqError("Can't use drained node %s" % node,
574
                               errors.ECODE_INVAL)
575

    
576

    
577
def _CheckNodeHasOS(lu, node, os_name, force_variant):
578
  """Ensure that a node supports a given OS.
579

580
  @param lu: the LU on behalf of which we make the check
581
  @param node: the node to check
582
  @param os_name: the OS to query about
583
  @param force_variant: whether to ignore variant errors
584
  @raise errors.OpPrereqError: if the node is not supporting the OS
585

586
  """
587
  result = lu.rpc.call_os_get(node, os_name)
588
  result.Raise("OS '%s' not in supported OS list for node %s" %
589
               (os_name, node),
590
               prereq=True, ecode=errors.ECODE_INVAL)
591
  if not force_variant:
592
    _CheckOSVariant(result.payload, os_name)
593

    
594

    
595
def _RequireFileStorage():
596
  """Checks that file storage is enabled.
597

598
  @raise errors.OpPrereqError: when file storage is disabled
599

600
  """
601
  if not constants.ENABLE_FILE_STORAGE:
602
    raise errors.OpPrereqError("File storage disabled at configure time",
603
                               errors.ECODE_INVAL)
604

    
605

    
606
def _CheckDiskTemplate(template):
607
  """Ensure a given disk template is valid.
608

609
  """
610
  if template not in constants.DISK_TEMPLATES:
611
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
612
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
613
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
614
  if template == constants.DT_FILE:
615
    _RequireFileStorage()
616

    
617

    
618
def _CheckStorageType(storage_type):
619
  """Ensure a given storage type is valid.
620

621
  """
622
  if storage_type not in constants.VALID_STORAGE_TYPES:
623
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
624
                               errors.ECODE_INVAL)
625
  if storage_type == constants.ST_FILE:
626
    _RequireFileStorage()
627

    
628

    
629
def _GetClusterDomainSecret():
630
  """Reads the cluster domain secret.
631

632
  """
633
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
634
                               strict=True)
635

    
636

    
637
def _CheckInstanceDown(lu, instance, reason):
638
  """Ensure that an instance is not running."""
639
  if instance.admin_up:
640
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
641
                               (instance.name, reason), errors.ECODE_STATE)
642

    
643
  pnode = instance.primary_node
644
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
645
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
646
              prereq=True, ecode=errors.ECODE_ENVIRON)
647

    
648
  if instance.name in ins_l.payload:
649
    raise errors.OpPrereqError("Instance %s is running, %s" %
650
                               (instance.name, reason), errors.ECODE_STATE)
651

    
652

    
653
def _ExpandItemName(fn, name, kind):
654
  """Expand an item name.
655

656
  @param fn: the function to use for expansion
657
  @param name: requested item name
658
  @param kind: text description ('Node' or 'Instance')
659
  @return: the resolved (full) name
660
  @raise errors.OpPrereqError: if the item is not found
661

662
  """
663
  full_name = fn(name)
664
  if full_name is None:
665
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
666
                               errors.ECODE_NOENT)
667
  return full_name
668

    
669

    
670
def _ExpandNodeName(cfg, name):
671
  """Wrapper over L{_ExpandItemName} for nodes."""
672
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
673

    
674

    
675
def _ExpandInstanceName(cfg, name):
676
  """Wrapper over L{_ExpandItemName} for instance."""
677
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
678

    
679

    
680
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
681
                          memory, vcpus, nics, disk_template, disks,
682
                          bep, hvp, hypervisor_name):
683
  """Builds instance related env variables for hooks
684

685
  This builds the hook environment from individual variables.
686

687
  @type name: string
688
  @param name: the name of the instance
689
  @type primary_node: string
690
  @param primary_node: the name of the instance's primary node
691
  @type secondary_nodes: list
692
  @param secondary_nodes: list of secondary nodes as strings
693
  @type os_type: string
694
  @param os_type: the name of the instance's OS
695
  @type status: boolean
696
  @param status: the should_run status of the instance
697
  @type memory: string
698
  @param memory: the memory size of the instance
699
  @type vcpus: string
700
  @param vcpus: the count of VCPUs the instance has
701
  @type nics: list
702
  @param nics: list of tuples (ip, mac, mode, link) representing
703
      the NICs the instance has
704
  @type disk_template: string
705
  @param disk_template: the disk template of the instance
706
  @type disks: list
707
  @param disks: the list of (size, mode) pairs
708
  @type bep: dict
709
  @param bep: the backend parameters for the instance
710
  @type hvp: dict
711
  @param hvp: the hypervisor parameters for the instance
712
  @type hypervisor_name: string
713
  @param hypervisor_name: the hypervisor for the instance
714
  @rtype: dict
715
  @return: the hook environment for this instance
716

717
  """
718
  if status:
719
    str_status = "up"
720
  else:
721
    str_status = "down"
722
  env = {
723
    "OP_TARGET": name,
724
    "INSTANCE_NAME": name,
725
    "INSTANCE_PRIMARY": primary_node,
726
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
727
    "INSTANCE_OS_TYPE": os_type,
728
    "INSTANCE_STATUS": str_status,
729
    "INSTANCE_MEMORY": memory,
730
    "INSTANCE_VCPUS": vcpus,
731
    "INSTANCE_DISK_TEMPLATE": disk_template,
732
    "INSTANCE_HYPERVISOR": hypervisor_name,
733
  }
734

    
735
  if nics:
736
    nic_count = len(nics)
737
    for idx, (ip, mac, mode, link) in enumerate(nics):
738
      if ip is None:
739
        ip = ""
740
      env["INSTANCE_NIC%d_IP" % idx] = ip
741
      env["INSTANCE_NIC%d_MAC" % idx] = mac
742
      env["INSTANCE_NIC%d_MODE" % idx] = mode
743
      env["INSTANCE_NIC%d_LINK" % idx] = link
744
      if mode == constants.NIC_MODE_BRIDGED:
745
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
746
  else:
747
    nic_count = 0
748

    
749
  env["INSTANCE_NIC_COUNT"] = nic_count
750

    
751
  if disks:
752
    disk_count = len(disks)
753
    for idx, (size, mode) in enumerate(disks):
754
      env["INSTANCE_DISK%d_SIZE" % idx] = size
755
      env["INSTANCE_DISK%d_MODE" % idx] = mode
756
  else:
757
    disk_count = 0
758

    
759
  env["INSTANCE_DISK_COUNT"] = disk_count
760

    
761
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
762
    for key, value in source.items():
763
      env["INSTANCE_%s_%s" % (kind, key)] = value
764

    
765
  return env
766

    
767

    
768
def _NICListToTuple(lu, nics):
769
  """Build a list of nic information tuples.
770

771
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
772
  value in LUQueryInstanceData.
773

774
  @type lu:  L{LogicalUnit}
775
  @param lu: the logical unit on whose behalf we execute
776
  @type nics: list of L{objects.NIC}
777
  @param nics: list of nics to convert to hooks tuples
778

779
  """
780
  hooks_nics = []
781
  cluster = lu.cfg.GetClusterInfo()
782
  for nic in nics:
783
    ip = nic.ip
784
    mac = nic.mac
785
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
786
    mode = filled_params[constants.NIC_MODE]
787
    link = filled_params[constants.NIC_LINK]
788
    hooks_nics.append((ip, mac, mode, link))
789
  return hooks_nics
790

    
791

    
792
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
793
  """Builds instance related env variables for hooks from an object.
794

795
  @type lu: L{LogicalUnit}
796
  @param lu: the logical unit on whose behalf we execute
797
  @type instance: L{objects.Instance}
798
  @param instance: the instance for which we should build the
799
      environment
800
  @type override: dict
801
  @param override: dictionary with key/values that will override
802
      our values
803
  @rtype: dict
804
  @return: the hook environment dictionary
805

806
  """
807
  cluster = lu.cfg.GetClusterInfo()
808
  bep = cluster.FillBE(instance)
809
  hvp = cluster.FillHV(instance)
810
  args = {
811
    'name': instance.name,
812
    'primary_node': instance.primary_node,
813
    'secondary_nodes': instance.secondary_nodes,
814
    'os_type': instance.os,
815
    'status': instance.admin_up,
816
    'memory': bep[constants.BE_MEMORY],
817
    'vcpus': bep[constants.BE_VCPUS],
818
    'nics': _NICListToTuple(lu, instance.nics),
819
    'disk_template': instance.disk_template,
820
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
821
    'bep': bep,
822
    'hvp': hvp,
823
    'hypervisor_name': instance.hypervisor,
824
  }
825
  if override:
826
    args.update(override)
827
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
828

    
829

    
830
def _AdjustCandidatePool(lu, exceptions):
831
  """Adjust the candidate pool after node operations.
832

833
  """
834
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
835
  if mod_list:
836
    lu.LogInfo("Promoted nodes to master candidate role: %s",
837
               utils.CommaJoin(node.name for node in mod_list))
838
    for name in mod_list:
839
      lu.context.ReaddNode(name)
840
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
841
  if mc_now > mc_max:
842
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
843
               (mc_now, mc_max))
844

    
845

    
846
def _DecideSelfPromotion(lu, exceptions=None):
847
  """Decide whether I should promote myself as a master candidate.
848

849
  """
850
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
851
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
852
  # the new node will increase mc_max with one, so:
853
  mc_should = min(mc_should + 1, cp_size)
854
  return mc_now < mc_should
855

    
856

    
857
def _CheckNicsBridgesExist(lu, target_nics, target_node):
858
  """Check that the brigdes needed by a list of nics exist.
859

860
  """
861
  cluster = lu.cfg.GetClusterInfo()
862
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
863
  brlist = [params[constants.NIC_LINK] for params in paramslist
864
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
865
  if brlist:
866
    result = lu.rpc.call_bridges_exist(target_node, brlist)
867
    result.Raise("Error checking bridges on destination node '%s'" %
868
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
869

    
870

    
871
def _CheckInstanceBridgesExist(lu, instance, node=None):
872
  """Check that the brigdes needed by an instance exist.
873

874
  """
875
  if node is None:
876
    node = instance.primary_node
877
  _CheckNicsBridgesExist(lu, instance.nics, node)
878

    
879

    
880
def _CheckOSVariant(os_obj, name):
881
  """Check whether an OS name conforms to the os variants specification.
882

883
  @type os_obj: L{objects.OS}
884
  @param os_obj: OS object to check
885
  @type name: string
886
  @param name: OS name passed by the user, to check for validity
887

888
  """
889
  if not os_obj.supported_variants:
890
    return
891
  try:
892
    variant = name.split("+", 1)[1]
893
  except IndexError:
894
    raise errors.OpPrereqError("OS name must include a variant",
895
                               errors.ECODE_INVAL)
896

    
897
  if variant not in os_obj.supported_variants:
898
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
899

    
900

    
901
def _GetNodeInstancesInner(cfg, fn):
902
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
903

    
904

    
905
def _GetNodeInstances(cfg, node_name):
906
  """Returns a list of all primary and secondary instances on a node.
907

908
  """
909

    
910
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
911

    
912

    
913
def _GetNodePrimaryInstances(cfg, node_name):
914
  """Returns primary instances on a node.
915

916
  """
917
  return _GetNodeInstancesInner(cfg,
918
                                lambda inst: node_name == inst.primary_node)
919

    
920

    
921
def _GetNodeSecondaryInstances(cfg, node_name):
922
  """Returns secondary instances on a node.
923

924
  """
925
  return _GetNodeInstancesInner(cfg,
926
                                lambda inst: node_name in inst.secondary_nodes)
927

    
928

    
929
def _GetStorageTypeArgs(cfg, storage_type):
930
  """Returns the arguments for a storage type.
931

932
  """
933
  # Special case for file storage
934
  if storage_type == constants.ST_FILE:
935
    # storage.FileStorage wants a list of storage directories
936
    return [[cfg.GetFileStorageDir()]]
937

    
938
  return []
939

    
940

    
941
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
942
  faulty = []
943

    
944
  for dev in instance.disks:
945
    cfg.SetDiskID(dev, node_name)
946

    
947
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
948
  result.Raise("Failed to get disk status from node %s" % node_name,
949
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
950

    
951
  for idx, bdev_status in enumerate(result.payload):
952
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
953
      faulty.append(idx)
954

    
955
  return faulty
956

    
957

    
958
class LUPostInitCluster(LogicalUnit):
959
  """Logical unit for running hooks after cluster initialization.
960

961
  """
962
  HPATH = "cluster-init"
963
  HTYPE = constants.HTYPE_CLUSTER
964
  _OP_REQP = []
965

    
966
  def BuildHooksEnv(self):
967
    """Build hooks env.
968

969
    """
970
    env = {"OP_TARGET": self.cfg.GetClusterName()}
971
    mn = self.cfg.GetMasterNode()
972
    return env, [], [mn]
973

    
974
  def CheckPrereq(self):
975
    """No prerequisites to check.
976

977
    """
978
    return True
979

    
980
  def Exec(self, feedback_fn):
981
    """Nothing to do.
982

983
    """
984
    return True
985

    
986

    
987
class LUDestroyCluster(LogicalUnit):
988
  """Logical unit for destroying the cluster.
989

990
  """
991
  HPATH = "cluster-destroy"
992
  HTYPE = constants.HTYPE_CLUSTER
993
  _OP_REQP = []
994

    
995
  def BuildHooksEnv(self):
996
    """Build hooks env.
997

998
    """
999
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1000
    return env, [], []
1001

    
1002
  def CheckPrereq(self):
1003
    """Check prerequisites.
1004

1005
    This checks whether the cluster is empty.
1006

1007
    Any errors are signaled by raising errors.OpPrereqError.
1008

1009
    """
1010
    master = self.cfg.GetMasterNode()
1011

    
1012
    nodelist = self.cfg.GetNodeList()
1013
    if len(nodelist) != 1 or nodelist[0] != master:
1014
      raise errors.OpPrereqError("There are still %d node(s) in"
1015
                                 " this cluster." % (len(nodelist) - 1),
1016
                                 errors.ECODE_INVAL)
1017
    instancelist = self.cfg.GetInstanceList()
1018
    if instancelist:
1019
      raise errors.OpPrereqError("There are still %d instance(s) in"
1020
                                 " this cluster." % len(instancelist),
1021
                                 errors.ECODE_INVAL)
1022

    
1023
  def Exec(self, feedback_fn):
1024
    """Destroys the cluster.
1025

1026
    """
1027
    master = self.cfg.GetMasterNode()
1028
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1029

    
1030
    # Run post hooks on master node before it's removed
1031
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1032
    try:
1033
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1034
    except:
1035
      # pylint: disable-msg=W0702
1036
      self.LogWarning("Errors occurred running hooks on %s" % master)
1037

    
1038
    result = self.rpc.call_node_stop_master(master, False)
1039
    result.Raise("Could not disable the master role")
1040

    
1041
    if modify_ssh_setup:
1042
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1043
      utils.CreateBackup(priv_key)
1044
      utils.CreateBackup(pub_key)
1045

    
1046
    return master
1047

    
1048

    
1049
def _VerifyCertificate(filename):
1050
  """Verifies a certificate for LUVerifyCluster.
1051

1052
  @type filename: string
1053
  @param filename: Path to PEM file
1054

1055
  """
1056
  try:
1057
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1058
                                           utils.ReadFile(filename))
1059
  except Exception, err: # pylint: disable-msg=W0703
1060
    return (LUVerifyCluster.ETYPE_ERROR,
1061
            "Failed to load X509 certificate %s: %s" % (filename, err))
1062

    
1063
  (errcode, msg) = \
1064
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1065
                                constants.SSL_CERT_EXPIRATION_ERROR)
1066

    
1067
  if msg:
1068
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1069
  else:
1070
    fnamemsg = None
1071

    
1072
  if errcode is None:
1073
    return (None, fnamemsg)
1074
  elif errcode == utils.CERT_WARNING:
1075
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1076
  elif errcode == utils.CERT_ERROR:
1077
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1078

    
1079
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1080

    
1081

    
1082
class LUVerifyCluster(LogicalUnit):
1083
  """Verifies the cluster status.
1084

1085
  """
1086
  HPATH = "cluster-verify"
1087
  HTYPE = constants.HTYPE_CLUSTER
1088
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1089
  REQ_BGL = False
1090

    
1091
  TCLUSTER = "cluster"
1092
  TNODE = "node"
1093
  TINSTANCE = "instance"
1094

    
1095
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1096
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1097
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1098
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1099
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1100
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1102
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1103
  ENODEDRBD = (TNODE, "ENODEDRBD")
1104
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1105
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1106
  ENODEHV = (TNODE, "ENODEHV")
1107
  ENODELVM = (TNODE, "ENODELVM")
1108
  ENODEN1 = (TNODE, "ENODEN1")
1109
  ENODENET = (TNODE, "ENODENET")
1110
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1111
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1112
  ENODERPC = (TNODE, "ENODERPC")
1113
  ENODESSH = (TNODE, "ENODESSH")
1114
  ENODEVERSION = (TNODE, "ENODEVERSION")
1115
  ENODESETUP = (TNODE, "ENODESETUP")
1116
  ENODETIME = (TNODE, "ENODETIME")
1117

    
1118
  ETYPE_FIELD = "code"
1119
  ETYPE_ERROR = "ERROR"
1120
  ETYPE_WARNING = "WARNING"
1121

    
1122
  class NodeImage(object):
1123
    """A class representing the logical and physical status of a node.
1124

1125
    @ivar volumes: a structure as returned from
1126
        L{ganeti.backend.GetVolumeList} (runtime)
1127
    @ivar instances: a list of running instances (runtime)
1128
    @ivar pinst: list of configured primary instances (config)
1129
    @ivar sinst: list of configured secondary instances (config)
1130
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1131
        of this node (config)
1132
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1133
    @ivar dfree: free disk, as reported by the node (runtime)
1134
    @ivar offline: the offline status (config)
1135
    @type rpc_fail: boolean
1136
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1137
        not whether the individual keys were correct) (runtime)
1138
    @type lvm_fail: boolean
1139
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1140
    @type hyp_fail: boolean
1141
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1142
    @type ghost: boolean
1143
    @ivar ghost: whether this is a known node or not (config)
1144

1145
    """
1146
    def __init__(self, offline=False):
1147
      self.volumes = {}
1148
      self.instances = []
1149
      self.pinst = []
1150
      self.sinst = []
1151
      self.sbp = {}
1152
      self.mfree = 0
1153
      self.dfree = 0
1154
      self.offline = offline
1155
      self.rpc_fail = False
1156
      self.lvm_fail = False
1157
      self.hyp_fail = False
1158
      self.ghost = False
1159

    
1160
  def ExpandNames(self):
1161
    self.needed_locks = {
1162
      locking.LEVEL_NODE: locking.ALL_SET,
1163
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1164
    }
1165
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166

    
1167
  def _Error(self, ecode, item, msg, *args, **kwargs):
1168
    """Format an error message.
1169

1170
    Based on the opcode's error_codes parameter, either format a
1171
    parseable error code, or a simpler error string.
1172

1173
    This must be called only from Exec and functions called from Exec.
1174

1175
    """
1176
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177
    itype, etxt = ecode
1178
    # first complete the msg
1179
    if args:
1180
      msg = msg % args
1181
    # then format the whole message
1182
    if self.op.error_codes:
1183
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1184
    else:
1185
      if item:
1186
        item = " " + item
1187
      else:
1188
        item = ""
1189
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1190
    # and finally report it via the feedback_fn
1191
    self._feedback_fn("  - %s" % msg)
1192

    
1193
  def _ErrorIf(self, cond, *args, **kwargs):
1194
    """Log an error message if the passed condition is True.
1195

1196
    """
1197
    cond = bool(cond) or self.op.debug_simulate_errors
1198
    if cond:
1199
      self._Error(*args, **kwargs)
1200
    # do not mark the operation as failed for WARN cases only
1201
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1202
      self.bad = self.bad or cond
1203

    
1204
  def _VerifyNode(self, ninfo, nresult):
1205
    """Run multiple tests against a node.
1206

1207
    Test list:
1208

1209
      - compares ganeti version
1210
      - checks vg existence and size > 20G
1211
      - checks config file checksum
1212
      - checks ssh to other nodes
1213

1214
    @type ninfo: L{objects.Node}
1215
    @param ninfo: the node to check
1216
    @param nresult: the results from the node
1217
    @rtype: boolean
1218
    @return: whether overall this call was successful (and we can expect
1219
         reasonable values in the respose)
1220

1221
    """
1222
    node = ninfo.name
1223
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224

    
1225
    # main result, nresult should be a non-empty dict
1226
    test = not nresult or not isinstance(nresult, dict)
1227
    _ErrorIf(test, self.ENODERPC, node,
1228
                  "unable to verify node: no data returned")
1229
    if test:
1230
      return False
1231

    
1232
    # compares ganeti version
1233
    local_version = constants.PROTOCOL_VERSION
1234
    remote_version = nresult.get("version", None)
1235
    test = not (remote_version and
1236
                isinstance(remote_version, (list, tuple)) and
1237
                len(remote_version) == 2)
1238
    _ErrorIf(test, self.ENODERPC, node,
1239
             "connection to node returned invalid data")
1240
    if test:
1241
      return False
1242

    
1243
    test = local_version != remote_version[0]
1244
    _ErrorIf(test, self.ENODEVERSION, node,
1245
             "incompatible protocol versions: master %s,"
1246
             " node %s", local_version, remote_version[0])
1247
    if test:
1248
      return False
1249

    
1250
    # node seems compatible, we can actually try to look into its results
1251

    
1252
    # full package version
1253
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1254
                  self.ENODEVERSION, node,
1255
                  "software version mismatch: master %s, node %s",
1256
                  constants.RELEASE_VERSION, remote_version[1],
1257
                  code=self.ETYPE_WARNING)
1258

    
1259
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1260
    if isinstance(hyp_result, dict):
1261
      for hv_name, hv_result in hyp_result.iteritems():
1262
        test = hv_result is not None
1263
        _ErrorIf(test, self.ENODEHV, node,
1264
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1265

    
1266

    
1267
    test = nresult.get(constants.NV_NODESETUP,
1268
                           ["Missing NODESETUP results"])
1269
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1270
             "; ".join(test))
1271

    
1272
    return True
1273

    
1274
  def _VerifyNodeTime(self, ninfo, nresult,
1275
                      nvinfo_starttime, nvinfo_endtime):
1276
    """Check the node time.
1277

1278
    @type ninfo: L{objects.Node}
1279
    @param ninfo: the node to check
1280
    @param nresult: the remote results for the node
1281
    @param nvinfo_starttime: the start time of the RPC call
1282
    @param nvinfo_endtime: the end time of the RPC call
1283

1284
    """
1285
    node = ninfo.name
1286
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287

    
1288
    ntime = nresult.get(constants.NV_TIME, None)
1289
    try:
1290
      ntime_merged = utils.MergeTime(ntime)
1291
    except (ValueError, TypeError):
1292
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1293
      return
1294

    
1295
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1296
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1297
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1298
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1299
    else:
1300
      ntime_diff = None
1301

    
1302
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1303
             "Node time diverges by at least %s from master node time",
1304
             ntime_diff)
1305

    
1306
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1307
    """Check the node time.
1308

1309
    @type ninfo: L{objects.Node}
1310
    @param ninfo: the node to check
1311
    @param nresult: the remote results for the node
1312
    @param vg_name: the configured VG name
1313

1314
    """
1315
    if vg_name is None:
1316
      return
1317

    
1318
    node = ninfo.name
1319
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320

    
1321
    # checks vg existence and size > 20G
1322
    vglist = nresult.get(constants.NV_VGLIST, None)
1323
    test = not vglist
1324
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325
    if not test:
1326
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1327
                                            constants.MIN_VG_SIZE)
1328
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1329

    
1330
    # check pv names
1331
    pvlist = nresult.get(constants.NV_PVLIST, None)
1332
    test = pvlist is None
1333
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334
    if not test:
1335
      # check that ':' is not present in PV names, since it's a
1336
      # special character for lvcreate (denotes the range of PEs to
1337
      # use on the PV)
1338
      for _, pvname, owner_vg in pvlist:
1339
        test = ":" in pvname
1340
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1341
                 " '%s' of VG '%s'", pvname, owner_vg)
1342

    
1343
  def _VerifyNodeNetwork(self, ninfo, nresult):
1344
    """Check the node time.
1345

1346
    @type ninfo: L{objects.Node}
1347
    @param ninfo: the node to check
1348
    @param nresult: the remote results for the node
1349

1350
    """
1351
    node = ninfo.name
1352
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353

    
1354
    test = constants.NV_NODELIST not in nresult
1355
    _ErrorIf(test, self.ENODESSH, node,
1356
             "node hasn't returned node ssh connectivity data")
1357
    if not test:
1358
      if nresult[constants.NV_NODELIST]:
1359
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1360
          _ErrorIf(True, self.ENODESSH, node,
1361
                   "ssh communication with node '%s': %s", a_node, a_msg)
1362

    
1363
    test = constants.NV_NODENETTEST not in nresult
1364
    _ErrorIf(test, self.ENODENET, node,
1365
             "node hasn't returned node tcp connectivity data")
1366
    if not test:
1367
      if nresult[constants.NV_NODENETTEST]:
1368
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369
        for anode in nlist:
1370
          _ErrorIf(True, self.ENODENET, node,
1371
                   "tcp communication with node '%s': %s",
1372
                   anode, nresult[constants.NV_NODENETTEST][anode])
1373

    
1374
    test = constants.NV_MASTERIP not in nresult
1375
    _ErrorIf(test, self.ENODENET, node,
1376
             "node hasn't returned node master IP reachability data")
1377
    if not test:
1378
      if not nresult[constants.NV_MASTERIP]:
1379
        if node == self.master_node:
1380
          msg = "the master node cannot reach the master IP (not configured?)"
1381
        else:
1382
          msg = "cannot reach the master IP"
1383
        _ErrorIf(True, self.ENODENET, node, msg)
1384

    
1385

    
1386
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1387
    """Verify an instance.
1388

1389
    This function checks to see if the required block devices are
1390
    available on the instance's node.
1391

1392
    """
1393
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1394
    node_current = instanceconfig.primary_node
1395

    
1396
    node_vol_should = {}
1397
    instanceconfig.MapLVsByNode(node_vol_should)
1398

    
1399
    for node in node_vol_should:
1400
      n_img = node_image[node]
1401
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1402
        # ignore missing volumes on offline or broken nodes
1403
        continue
1404
      for volume in node_vol_should[node]:
1405
        test = volume not in n_img.volumes
1406
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1407
                 "volume %s missing on node %s", volume, node)
1408

    
1409
    if instanceconfig.admin_up:
1410
      pri_img = node_image[node_current]
1411
      test = instance not in pri_img.instances and not pri_img.offline
1412
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1413
               "instance not running on its primary node %s",
1414
               node_current)
1415

    
1416
    for node, n_img in node_image.items():
1417
      if (not node == node_current):
1418
        test = instance in n_img.instances
1419
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1420
                 "instance should not run on node %s", node)
1421

    
1422
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1423
    """Verify if there are any unknown volumes in the cluster.
1424

1425
    The .os, .swap and backup volumes are ignored. All other volumes are
1426
    reported as unknown.
1427

1428
    """
1429
    for node, n_img in node_image.items():
1430
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1431
        # skip non-healthy nodes
1432
        continue
1433
      for volume in n_img.volumes:
1434
        test = (node not in node_vol_should or
1435
                volume not in node_vol_should[node])
1436
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1437
                      "volume %s is unknown", volume)
1438

    
1439
  def _VerifyOrphanInstances(self, instancelist, node_image):
1440
    """Verify the list of running instances.
1441

1442
    This checks what instances are running but unknown to the cluster.
1443

1444
    """
1445
    for node, n_img in node_image.items():
1446
      for o_inst in n_img.instances:
1447
        test = o_inst not in instancelist
1448
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1449
                      "instance %s on node %s should not exist", o_inst, node)
1450

    
1451
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1452
    """Verify N+1 Memory Resilience.
1453

1454
    Check that if one single node dies we can still start all the
1455
    instances it was primary for.
1456

1457
    """
1458
    for node, n_img in node_image.items():
1459
      # This code checks that every node which is now listed as
1460
      # secondary has enough memory to host all instances it is
1461
      # supposed to should a single other node in the cluster fail.
1462
      # FIXME: not ready for failover to an arbitrary node
1463
      # FIXME: does not support file-backed instances
1464
      # WARNING: we currently take into account down instances as well
1465
      # as up ones, considering that even if they're down someone
1466
      # might want to start them even in the event of a node failure.
1467
      for prinode, instances in n_img.sbp.items():
1468
        needed_mem = 0
1469
        for instance in instances:
1470
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1471
          if bep[constants.BE_AUTO_BALANCE]:
1472
            needed_mem += bep[constants.BE_MEMORY]
1473
        test = n_img.mfree < needed_mem
1474
        self._ErrorIf(test, self.ENODEN1, node,
1475
                      "not enough memory on to accommodate"
1476
                      " failovers should peer node %s fail", prinode)
1477

    
1478
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1479
                       master_files):
1480
    """Verifies and computes the node required file checksums.
1481

1482
    @type ninfo: L{objects.Node}
1483
    @param ninfo: the node to check
1484
    @param nresult: the remote results for the node
1485
    @param file_list: required list of files
1486
    @param local_cksum: dictionary of local files and their checksums
1487
    @param master_files: list of files that only masters should have
1488

1489
    """
1490
    node = ninfo.name
1491
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492

    
1493
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1494
    test = not isinstance(remote_cksum, dict)
1495
    _ErrorIf(test, self.ENODEFILECHECK, node,
1496
             "node hasn't returned file checksum data")
1497
    if test:
1498
      return
1499

    
1500
    for file_name in file_list:
1501
      node_is_mc = ninfo.master_candidate
1502
      must_have = (file_name not in master_files) or node_is_mc
1503
      # missing
1504
      test1 = file_name not in remote_cksum
1505
      # invalid checksum
1506
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1507
      # existing and good
1508
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1509
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1510
               "file '%s' missing", file_name)
1511
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1512
               "file '%s' has wrong checksum", file_name)
1513
      # not candidate and this is not a must-have file
1514
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1515
               "file '%s' should not exist on non master"
1516
               " candidates (and the file is outdated)", file_name)
1517
      # all good, except non-master/non-must have combination
1518
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1519
               "file '%s' should not exist"
1520
               " on non master candidates", file_name)
1521

    
1522
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1523
    """Verifies and the node DRBD status.
1524

1525
    @type ninfo: L{objects.Node}
1526
    @param ninfo: the node to check
1527
    @param nresult: the remote results for the node
1528
    @param instanceinfo: the dict of instances
1529
    @param drbd_map: the DRBD map as returned by
1530
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1531

1532
    """
1533
    node = ninfo.name
1534
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535

    
1536
    # compute the DRBD minors
1537
    node_drbd = {}
1538
    for minor, instance in drbd_map[node].items():
1539
      test = instance not in instanceinfo
1540
      _ErrorIf(test, self.ECLUSTERCFG, None,
1541
               "ghost instance '%s' in temporary DRBD map", instance)
1542
        # ghost instance should not be running, but otherwise we
1543
        # don't give double warnings (both ghost instance and
1544
        # unallocated minor in use)
1545
      if test:
1546
        node_drbd[minor] = (instance, False)
1547
      else:
1548
        instance = instanceinfo[instance]
1549
        node_drbd[minor] = (instance.name, instance.admin_up)
1550

    
1551
    # and now check them
1552
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1553
    test = not isinstance(used_minors, (tuple, list))
1554
    _ErrorIf(test, self.ENODEDRBD, node,
1555
             "cannot parse drbd status file: %s", str(used_minors))
1556
    if test:
1557
      # we cannot check drbd status
1558
      return
1559

    
1560
    for minor, (iname, must_exist) in node_drbd.items():
1561
      test = minor not in used_minors and must_exist
1562
      _ErrorIf(test, self.ENODEDRBD, node,
1563
               "drbd minor %d of instance %s is not active", minor, iname)
1564
    for minor in used_minors:
1565
      test = minor not in node_drbd
1566
      _ErrorIf(test, self.ENODEDRBD, node,
1567
               "unallocated drbd minor %d is in use", minor)
1568

    
1569
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1570
    """Verifies and updates the node volume data.
1571

1572
    This function will update a L{NodeImage}'s internal structures
1573
    with data from the remote call.
1574

1575
    @type ninfo: L{objects.Node}
1576
    @param ninfo: the node to check
1577
    @param nresult: the remote results for the node
1578
    @param nimg: the node image object
1579
    @param vg_name: the configured VG name
1580

1581
    """
1582
    node = ninfo.name
1583
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584

    
1585
    nimg.lvm_fail = True
1586
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1587
    if vg_name is None:
1588
      pass
1589
    elif isinstance(lvdata, basestring):
1590
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1591
               utils.SafeEncode(lvdata))
1592
    elif not isinstance(lvdata, dict):
1593
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1594
    else:
1595
      nimg.volumes = lvdata
1596
      nimg.lvm_fail = False
1597

    
1598
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1599
    """Verifies and updates the node instance list.
1600

1601
    If the listing was successful, then updates this node's instance
1602
    list. Otherwise, it marks the RPC call as failed for the instance
1603
    list key.
1604

1605
    @type ninfo: L{objects.Node}
1606
    @param ninfo: the node to check
1607
    @param nresult: the remote results for the node
1608
    @param nimg: the node image object
1609

1610
    """
1611
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1612
    test = not isinstance(idata, list)
1613
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1614
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1615
    if test:
1616
      nimg.hyp_fail = True
1617
    else:
1618
      nimg.instances = idata
1619

    
1620
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1621
    """Verifies and computes a node information map
1622

1623
    @type ninfo: L{objects.Node}
1624
    @param ninfo: the node to check
1625
    @param nresult: the remote results for the node
1626
    @param nimg: the node image object
1627
    @param vg_name: the configured VG name
1628

1629
    """
1630
    node = ninfo.name
1631
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632

    
1633
    # try to read free memory (from the hypervisor)
1634
    hv_info = nresult.get(constants.NV_HVINFO, None)
1635
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1636
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1637
    if not test:
1638
      try:
1639
        nimg.mfree = int(hv_info["memory_free"])
1640
      except (ValueError, TypeError):
1641
        _ErrorIf(True, self.ENODERPC, node,
1642
                 "node returned invalid nodeinfo, check hypervisor")
1643

    
1644
    # FIXME: devise a free space model for file based instances as well
1645
    if vg_name is not None:
1646
      test = (constants.NV_VGLIST not in nresult or
1647
              vg_name not in nresult[constants.NV_VGLIST])
1648
      _ErrorIf(test, self.ENODELVM, node,
1649
               "node didn't return data for the volume group '%s'"
1650
               " - it is either missing or broken", vg_name)
1651
      if not test:
1652
        try:
1653
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1654
        except (ValueError, TypeError):
1655
          _ErrorIf(True, self.ENODERPC, node,
1656
                   "node returned invalid LVM info, check LVM status")
1657

    
1658
  def CheckPrereq(self):
1659
    """Check prerequisites.
1660

1661
    Transform the list of checks we're going to skip into a set and check that
1662
    all its members are valid.
1663

1664
    """
1665
    self.skip_set = frozenset(self.op.skip_checks)
1666
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1667
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1668
                                 errors.ECODE_INVAL)
1669

    
1670
  def BuildHooksEnv(self):
1671
    """Build hooks env.
1672

1673
    Cluster-Verify hooks just ran in the post phase and their failure makes
1674
    the output be logged in the verify output and the verification to fail.
1675

1676
    """
1677
    all_nodes = self.cfg.GetNodeList()
1678
    env = {
1679
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1680
      }
1681
    for node in self.cfg.GetAllNodesInfo().values():
1682
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1683

    
1684
    return env, [], all_nodes
1685

    
1686
  def Exec(self, feedback_fn):
1687
    """Verify integrity of cluster, performing various test on nodes.
1688

1689
    """
1690
    self.bad = False
1691
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1692
    verbose = self.op.verbose
1693
    self._feedback_fn = feedback_fn
1694
    feedback_fn("* Verifying global settings")
1695
    for msg in self.cfg.VerifyConfig():
1696
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1697

    
1698
    # Check the cluster certificates
1699
    for cert_filename in constants.ALL_CERT_FILES:
1700
      (errcode, msg) = _VerifyCertificate(cert_filename)
1701
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1702

    
1703
    vg_name = self.cfg.GetVGName()
1704
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1705
    cluster = self.cfg.GetClusterInfo()
1706
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1707
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1708
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1709
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1710
                        for iname in instancelist)
1711
    i_non_redundant = [] # Non redundant instances
1712
    i_non_a_balanced = [] # Non auto-balanced instances
1713
    n_offline = 0 # Count of offline nodes
1714
    n_drained = 0 # Count of nodes being drained
1715
    node_vol_should = {}
1716

    
1717
    # FIXME: verify OS list
1718
    # do local checksums
1719
    master_files = [constants.CLUSTER_CONF_FILE]
1720
    master_node = self.master_node = self.cfg.GetMasterNode()
1721
    master_ip = self.cfg.GetMasterIP()
1722

    
1723
    file_names = ssconf.SimpleStore().GetFileList()
1724
    file_names.extend(constants.ALL_CERT_FILES)
1725
    file_names.extend(master_files)
1726
    if cluster.modify_etc_hosts:
1727
      file_names.append(constants.ETC_HOSTS)
1728

    
1729
    local_checksums = utils.FingerprintFiles(file_names)
1730

    
1731
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1732
    node_verify_param = {
1733
      constants.NV_FILELIST: file_names,
1734
      constants.NV_NODELIST: [node.name for node in nodeinfo
1735
                              if not node.offline],
1736
      constants.NV_HYPERVISOR: hypervisors,
1737
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1738
                                  node.secondary_ip) for node in nodeinfo
1739
                                 if not node.offline],
1740
      constants.NV_INSTANCELIST: hypervisors,
1741
      constants.NV_VERSION: None,
1742
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1743
      constants.NV_NODESETUP: None,
1744
      constants.NV_TIME: None,
1745
      constants.NV_MASTERIP: (master_node, master_ip),
1746
      }
1747

    
1748
    if vg_name is not None:
1749
      node_verify_param[constants.NV_VGLIST] = None
1750
      node_verify_param[constants.NV_LVLIST] = vg_name
1751
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1752
      node_verify_param[constants.NV_DRBDLIST] = None
1753

    
1754
    # Build our expected cluster state
1755
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1756
                      for node in nodeinfo)
1757

    
1758
    for instance in instancelist:
1759
      inst_config = instanceinfo[instance]
1760

    
1761
      for nname in inst_config.all_nodes:
1762
        if nname not in node_image:
1763
          # ghost node
1764
          gnode = self.NodeImage()
1765
          gnode.ghost = True
1766
          node_image[nname] = gnode
1767

    
1768
      inst_config.MapLVsByNode(node_vol_should)
1769

    
1770
      pnode = inst_config.primary_node
1771
      node_image[pnode].pinst.append(instance)
1772

    
1773
      for snode in inst_config.secondary_nodes:
1774
        nimg = node_image[snode]
1775
        nimg.sinst.append(instance)
1776
        if pnode not in nimg.sbp:
1777
          nimg.sbp[pnode] = []
1778
        nimg.sbp[pnode].append(instance)
1779

    
1780
    # At this point, we have the in-memory data structures complete,
1781
    # except for the runtime information, which we'll gather next
1782

    
1783
    # Due to the way our RPC system works, exact response times cannot be
1784
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1785
    # time before and after executing the request, we can at least have a time
1786
    # window.
1787
    nvinfo_starttime = time.time()
1788
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1789
                                           self.cfg.GetClusterName())
1790
    nvinfo_endtime = time.time()
1791

    
1792
    all_drbd_map = self.cfg.ComputeDRBDMap()
1793

    
1794
    feedback_fn("* Verifying node status")
1795
    for node_i in nodeinfo:
1796
      node = node_i.name
1797
      nimg = node_image[node]
1798

    
1799
      if node_i.offline:
1800
        if verbose:
1801
          feedback_fn("* Skipping offline node %s" % (node,))
1802
        n_offline += 1
1803
        continue
1804

    
1805
      if node == master_node:
1806
        ntype = "master"
1807
      elif node_i.master_candidate:
1808
        ntype = "master candidate"
1809
      elif node_i.drained:
1810
        ntype = "drained"
1811
        n_drained += 1
1812
      else:
1813
        ntype = "regular"
1814
      if verbose:
1815
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1816

    
1817
      msg = all_nvinfo[node].fail_msg
1818
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1819
      if msg:
1820
        nimg.rpc_fail = True
1821
        continue
1822

    
1823
      nresult = all_nvinfo[node].payload
1824

    
1825
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1826
      self._VerifyNodeNetwork(node_i, nresult)
1827
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1828
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1829
                            master_files)
1830
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1831
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1832

    
1833
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1834
      self._UpdateNodeInstances(node_i, nresult, nimg)
1835
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1836

    
1837
    feedback_fn("* Verifying instance status")
1838
    for instance in instancelist:
1839
      if verbose:
1840
        feedback_fn("* Verifying instance %s" % instance)
1841
      inst_config = instanceinfo[instance]
1842
      self._VerifyInstance(instance, inst_config, node_image)
1843
      inst_nodes_offline = []
1844

    
1845
      pnode = inst_config.primary_node
1846
      pnode_img = node_image[pnode]
1847
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1848
               self.ENODERPC, pnode, "instance %s, connection to"
1849
               " primary node failed", instance)
1850

    
1851
      if pnode_img.offline:
1852
        inst_nodes_offline.append(pnode)
1853

    
1854
      # If the instance is non-redundant we cannot survive losing its primary
1855
      # node, so we are not N+1 compliant. On the other hand we have no disk
1856
      # templates with more than one secondary so that situation is not well
1857
      # supported either.
1858
      # FIXME: does not support file-backed instances
1859
      if not inst_config.secondary_nodes:
1860
        i_non_redundant.append(instance)
1861
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1862
               instance, "instance has multiple secondary nodes: %s",
1863
               utils.CommaJoin(inst_config.secondary_nodes),
1864
               code=self.ETYPE_WARNING)
1865

    
1866
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1867
        i_non_a_balanced.append(instance)
1868

    
1869
      for snode in inst_config.secondary_nodes:
1870
        s_img = node_image[snode]
1871
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1872
                 "instance %s, connection to secondary node failed", instance)
1873

    
1874
        if s_img.offline:
1875
          inst_nodes_offline.append(snode)
1876

    
1877
      # warn that the instance lives on offline nodes
1878
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1879
               "instance lives on offline node(s) %s",
1880
               utils.CommaJoin(inst_nodes_offline))
1881
      # ... or ghost nodes
1882
      for node in inst_config.all_nodes:
1883
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1884
                 "instance lives on ghost node %s", node)
1885

    
1886
    feedback_fn("* Verifying orphan volumes")
1887
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1888

    
1889
    feedback_fn("* Verifying orphan instances")
1890
    self._VerifyOrphanInstances(instancelist, node_image)
1891

    
1892
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1893
      feedback_fn("* Verifying N+1 Memory redundancy")
1894
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1895

    
1896
    feedback_fn("* Other Notes")
1897
    if i_non_redundant:
1898
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1899
                  % len(i_non_redundant))
1900

    
1901
    if i_non_a_balanced:
1902
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1903
                  % len(i_non_a_balanced))
1904

    
1905
    if n_offline:
1906
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1907

    
1908
    if n_drained:
1909
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1910

    
1911
    return not self.bad
1912

    
1913
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1914
    """Analyze the post-hooks' result
1915

1916
    This method analyses the hook result, handles it, and sends some
1917
    nicely-formatted feedback back to the user.
1918

1919
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1920
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1921
    @param hooks_results: the results of the multi-node hooks rpc call
1922
    @param feedback_fn: function used send feedback back to the caller
1923
    @param lu_result: previous Exec result
1924
    @return: the new Exec result, based on the previous result
1925
        and hook results
1926

1927
    """
1928
    # We only really run POST phase hooks, and are only interested in
1929
    # their results
1930
    if phase == constants.HOOKS_PHASE_POST:
1931
      # Used to change hooks' output to proper indentation
1932
      indent_re = re.compile('^', re.M)
1933
      feedback_fn("* Hooks Results")
1934
      assert hooks_results, "invalid result from hooks"
1935

    
1936
      for node_name in hooks_results:
1937
        res = hooks_results[node_name]
1938
        msg = res.fail_msg
1939
        test = msg and not res.offline
1940
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1941
                      "Communication failure in hooks execution: %s", msg)
1942
        if res.offline or msg:
1943
          # No need to investigate payload if node is offline or gave an error.
1944
          # override manually lu_result here as _ErrorIf only
1945
          # overrides self.bad
1946
          lu_result = 1
1947
          continue
1948
        for script, hkr, output in res.payload:
1949
          test = hkr == constants.HKR_FAIL
1950
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1951
                        "Script %s failed, output:", script)
1952
          if test:
1953
            output = indent_re.sub('      ', output)
1954
            feedback_fn("%s" % output)
1955
            lu_result = 0
1956

    
1957
      return lu_result
1958

    
1959

    
1960
class LUVerifyDisks(NoHooksLU):
1961
  """Verifies the cluster disks status.
1962

1963
  """
1964
  _OP_REQP = []
1965
  REQ_BGL = False
1966

    
1967
  def ExpandNames(self):
1968
    self.needed_locks = {
1969
      locking.LEVEL_NODE: locking.ALL_SET,
1970
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1971
    }
1972
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1973

    
1974
  def CheckPrereq(self):
1975
    """Check prerequisites.
1976

1977
    This has no prerequisites.
1978

1979
    """
1980
    pass
1981

    
1982
  def Exec(self, feedback_fn):
1983
    """Verify integrity of cluster disks.
1984

1985
    @rtype: tuple of three items
1986
    @return: a tuple of (dict of node-to-node_error, list of instances
1987
        which need activate-disks, dict of instance: (node, volume) for
1988
        missing volumes
1989

1990
    """
1991
    result = res_nodes, res_instances, res_missing = {}, [], {}
1992

    
1993
    vg_name = self.cfg.GetVGName()
1994
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1995
    instances = [self.cfg.GetInstanceInfo(name)
1996
                 for name in self.cfg.GetInstanceList()]
1997

    
1998
    nv_dict = {}
1999
    for inst in instances:
2000
      inst_lvs = {}
2001
      if (not inst.admin_up or
2002
          inst.disk_template not in constants.DTS_NET_MIRROR):
2003
        continue
2004
      inst.MapLVsByNode(inst_lvs)
2005
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2006
      for node, vol_list in inst_lvs.iteritems():
2007
        for vol in vol_list:
2008
          nv_dict[(node, vol)] = inst
2009

    
2010
    if not nv_dict:
2011
      return result
2012

    
2013
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2014

    
2015
    for node in nodes:
2016
      # node_volume
2017
      node_res = node_lvs[node]
2018
      if node_res.offline:
2019
        continue
2020
      msg = node_res.fail_msg
2021
      if msg:
2022
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2023
        res_nodes[node] = msg
2024
        continue
2025

    
2026
      lvs = node_res.payload
2027
      for lv_name, (_, _, lv_online) in lvs.items():
2028
        inst = nv_dict.pop((node, lv_name), None)
2029
        if (not lv_online and inst is not None
2030
            and inst.name not in res_instances):
2031
          res_instances.append(inst.name)
2032

    
2033
    # any leftover items in nv_dict are missing LVs, let's arrange the
2034
    # data better
2035
    for key, inst in nv_dict.iteritems():
2036
      if inst.name not in res_missing:
2037
        res_missing[inst.name] = []
2038
      res_missing[inst.name].append(key)
2039

    
2040
    return result
2041

    
2042

    
2043
class LURepairDiskSizes(NoHooksLU):
2044
  """Verifies the cluster disks sizes.
2045

2046
  """
2047
  _OP_REQP = ["instances"]
2048
  REQ_BGL = False
2049

    
2050
  def ExpandNames(self):
2051
    if not isinstance(self.op.instances, list):
2052
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2053
                                 errors.ECODE_INVAL)
2054

    
2055
    if self.op.instances:
2056
      self.wanted_names = []
2057
      for name in self.op.instances:
2058
        full_name = _ExpandInstanceName(self.cfg, name)
2059
        self.wanted_names.append(full_name)
2060
      self.needed_locks = {
2061
        locking.LEVEL_NODE: [],
2062
        locking.LEVEL_INSTANCE: self.wanted_names,
2063
        }
2064
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2065
    else:
2066
      self.wanted_names = None
2067
      self.needed_locks = {
2068
        locking.LEVEL_NODE: locking.ALL_SET,
2069
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2070
        }
2071
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2072

    
2073
  def DeclareLocks(self, level):
2074
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2075
      self._LockInstancesNodes(primary_only=True)
2076

    
2077
  def CheckPrereq(self):
2078
    """Check prerequisites.
2079

2080
    This only checks the optional instance list against the existing names.
2081

2082
    """
2083
    if self.wanted_names is None:
2084
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2085

    
2086
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2087
                             in self.wanted_names]
2088

    
2089
  def _EnsureChildSizes(self, disk):
2090
    """Ensure children of the disk have the needed disk size.
2091

2092
    This is valid mainly for DRBD8 and fixes an issue where the
2093
    children have smaller disk size.
2094

2095
    @param disk: an L{ganeti.objects.Disk} object
2096

2097
    """
2098
    if disk.dev_type == constants.LD_DRBD8:
2099
      assert disk.children, "Empty children for DRBD8?"
2100
      fchild = disk.children[0]
2101
      mismatch = fchild.size < disk.size
2102
      if mismatch:
2103
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2104
                     fchild.size, disk.size)
2105
        fchild.size = disk.size
2106

    
2107
      # and we recurse on this child only, not on the metadev
2108
      return self._EnsureChildSizes(fchild) or mismatch
2109
    else:
2110
      return False
2111

    
2112
  def Exec(self, feedback_fn):
2113
    """Verify the size of cluster disks.
2114

2115
    """
2116
    # TODO: check child disks too
2117
    # TODO: check differences in size between primary/secondary nodes
2118
    per_node_disks = {}
2119
    for instance in self.wanted_instances:
2120
      pnode = instance.primary_node
2121
      if pnode not in per_node_disks:
2122
        per_node_disks[pnode] = []
2123
      for idx, disk in enumerate(instance.disks):
2124
        per_node_disks[pnode].append((instance, idx, disk))
2125

    
2126
    changed = []
2127
    for node, dskl in per_node_disks.items():
2128
      newl = [v[2].Copy() for v in dskl]
2129
      for dsk in newl:
2130
        self.cfg.SetDiskID(dsk, node)
2131
      result = self.rpc.call_blockdev_getsizes(node, newl)
2132
      if result.fail_msg:
2133
        self.LogWarning("Failure in blockdev_getsizes call to node"
2134
                        " %s, ignoring", node)
2135
        continue
2136
      if len(result.data) != len(dskl):
2137
        self.LogWarning("Invalid result from node %s, ignoring node results",
2138
                        node)
2139
        continue
2140
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2141
        if size is None:
2142
          self.LogWarning("Disk %d of instance %s did not return size"
2143
                          " information, ignoring", idx, instance.name)
2144
          continue
2145
        if not isinstance(size, (int, long)):
2146
          self.LogWarning("Disk %d of instance %s did not return valid"
2147
                          " size information, ignoring", idx, instance.name)
2148
          continue
2149
        size = size >> 20
2150
        if size != disk.size:
2151
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2152
                       " correcting: recorded %d, actual %d", idx,
2153
                       instance.name, disk.size, size)
2154
          disk.size = size
2155
          self.cfg.Update(instance, feedback_fn)
2156
          changed.append((instance.name, idx, size))
2157
        if self._EnsureChildSizes(disk):
2158
          self.cfg.Update(instance, feedback_fn)
2159
          changed.append((instance.name, idx, disk.size))
2160
    return changed
2161

    
2162

    
2163
class LURenameCluster(LogicalUnit):
2164
  """Rename the cluster.
2165

2166
  """
2167
  HPATH = "cluster-rename"
2168
  HTYPE = constants.HTYPE_CLUSTER
2169
  _OP_REQP = ["name"]
2170

    
2171
  def BuildHooksEnv(self):
2172
    """Build hooks env.
2173

2174
    """
2175
    env = {
2176
      "OP_TARGET": self.cfg.GetClusterName(),
2177
      "NEW_NAME": self.op.name,
2178
      }
2179
    mn = self.cfg.GetMasterNode()
2180
    all_nodes = self.cfg.GetNodeList()
2181
    return env, [mn], all_nodes
2182

    
2183
  def CheckPrereq(self):
2184
    """Verify that the passed name is a valid one.
2185

2186
    """
2187
    hostname = utils.GetHostInfo(self.op.name)
2188

    
2189
    new_name = hostname.name
2190
    self.ip = new_ip = hostname.ip
2191
    old_name = self.cfg.GetClusterName()
2192
    old_ip = self.cfg.GetMasterIP()
2193
    if new_name == old_name and new_ip == old_ip:
2194
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2195
                                 " cluster has changed",
2196
                                 errors.ECODE_INVAL)
2197
    if new_ip != old_ip:
2198
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2199
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2200
                                   " reachable on the network. Aborting." %
2201
                                   new_ip, errors.ECODE_NOTUNIQUE)
2202

    
2203
    self.op.name = new_name
2204

    
2205
  def Exec(self, feedback_fn):
2206
    """Rename the cluster.
2207

2208
    """
2209
    clustername = self.op.name
2210
    ip = self.ip
2211

    
2212
    # shutdown the master IP
2213
    master = self.cfg.GetMasterNode()
2214
    result = self.rpc.call_node_stop_master(master, False)
2215
    result.Raise("Could not disable the master role")
2216

    
2217
    try:
2218
      cluster = self.cfg.GetClusterInfo()
2219
      cluster.cluster_name = clustername
2220
      cluster.master_ip = ip
2221
      self.cfg.Update(cluster, feedback_fn)
2222

    
2223
      # update the known hosts file
2224
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2225
      node_list = self.cfg.GetNodeList()
2226
      try:
2227
        node_list.remove(master)
2228
      except ValueError:
2229
        pass
2230
      result = self.rpc.call_upload_file(node_list,
2231
                                         constants.SSH_KNOWN_HOSTS_FILE)
2232
      for to_node, to_result in result.iteritems():
2233
        msg = to_result.fail_msg
2234
        if msg:
2235
          msg = ("Copy of file %s to node %s failed: %s" %
2236
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2237
          self.proc.LogWarning(msg)
2238

    
2239
    finally:
2240
      result = self.rpc.call_node_start_master(master, False, False)
2241
      msg = result.fail_msg
2242
      if msg:
2243
        self.LogWarning("Could not re-enable the master role on"
2244
                        " the master, please restart manually: %s", msg)
2245

    
2246

    
2247
def _RecursiveCheckIfLVMBased(disk):
2248
  """Check if the given disk or its children are lvm-based.
2249

2250
  @type disk: L{objects.Disk}
2251
  @param disk: the disk to check
2252
  @rtype: boolean
2253
  @return: boolean indicating whether a LD_LV dev_type was found or not
2254

2255
  """
2256
  if disk.children:
2257
    for chdisk in disk.children:
2258
      if _RecursiveCheckIfLVMBased(chdisk):
2259
        return True
2260
  return disk.dev_type == constants.LD_LV
2261

    
2262

    
2263
class LUSetClusterParams(LogicalUnit):
2264
  """Change the parameters of the cluster.
2265

2266
  """
2267
  HPATH = "cluster-modify"
2268
  HTYPE = constants.HTYPE_CLUSTER
2269
  _OP_REQP = []
2270
  REQ_BGL = False
2271

    
2272
  def CheckArguments(self):
2273
    """Check parameters
2274

2275
    """
2276
    for attr in ["candidate_pool_size",
2277
                 "uid_pool", "add_uids", "remove_uids"]:
2278
      if not hasattr(self.op, attr):
2279
        setattr(self.op, attr, None)
2280

    
2281
    if self.op.candidate_pool_size is not None:
2282
      try:
2283
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2284
      except (ValueError, TypeError), err:
2285
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2286
                                   str(err), errors.ECODE_INVAL)
2287
      if self.op.candidate_pool_size < 1:
2288
        raise errors.OpPrereqError("At least one master candidate needed",
2289
                                   errors.ECODE_INVAL)
2290

    
2291
    _CheckBooleanOpField(self.op, "maintain_node_health")
2292

    
2293
    if self.op.uid_pool:
2294
      uidpool.CheckUidPool(self.op.uid_pool)
2295

    
2296
    if self.op.add_uids:
2297
      uidpool.CheckUidPool(self.op.add_uids)
2298

    
2299
    if self.op.remove_uids:
2300
      uidpool.CheckUidPool(self.op.remove_uids)
2301

    
2302
  def ExpandNames(self):
2303
    # FIXME: in the future maybe other cluster params won't require checking on
2304
    # all nodes to be modified.
2305
    self.needed_locks = {
2306
      locking.LEVEL_NODE: locking.ALL_SET,
2307
    }
2308
    self.share_locks[locking.LEVEL_NODE] = 1
2309

    
2310
  def BuildHooksEnv(self):
2311
    """Build hooks env.
2312

2313
    """
2314
    env = {
2315
      "OP_TARGET": self.cfg.GetClusterName(),
2316
      "NEW_VG_NAME": self.op.vg_name,
2317
      }
2318
    mn = self.cfg.GetMasterNode()
2319
    return env, [mn], [mn]
2320

    
2321
  def CheckPrereq(self):
2322
    """Check prerequisites.
2323

2324
    This checks whether the given params don't conflict and
2325
    if the given volume group is valid.
2326

2327
    """
2328
    if self.op.vg_name is not None and not self.op.vg_name:
2329
      instances = self.cfg.GetAllInstancesInfo().values()
2330
      for inst in instances:
2331
        for disk in inst.disks:
2332
          if _RecursiveCheckIfLVMBased(disk):
2333
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2334
                                       " lvm-based instances exist",
2335
                                       errors.ECODE_INVAL)
2336

    
2337
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2338

    
2339
    # if vg_name not None, checks given volume group on all nodes
2340
    if self.op.vg_name:
2341
      vglist = self.rpc.call_vg_list(node_list)
2342
      for node in node_list:
2343
        msg = vglist[node].fail_msg
2344
        if msg:
2345
          # ignoring down node
2346
          self.LogWarning("Error while gathering data on node %s"
2347
                          " (ignoring node): %s", node, msg)
2348
          continue
2349
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2350
                                              self.op.vg_name,
2351
                                              constants.MIN_VG_SIZE)
2352
        if vgstatus:
2353
          raise errors.OpPrereqError("Error on node '%s': %s" %
2354
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2355

    
2356
    self.cluster = cluster = self.cfg.GetClusterInfo()
2357
    # validate params changes
2358
    if self.op.beparams:
2359
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2360
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2361

    
2362
    if self.op.nicparams:
2363
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2365
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2366
      nic_errors = []
2367

    
2368
      # check all instances for consistency
2369
      for instance in self.cfg.GetAllInstancesInfo().values():
2370
        for nic_idx, nic in enumerate(instance.nics):
2371
          params_copy = copy.deepcopy(nic.nicparams)
2372
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2373

    
2374
          # check parameter syntax
2375
          try:
2376
            objects.NIC.CheckParameterSyntax(params_filled)
2377
          except errors.ConfigurationError, err:
2378
            nic_errors.append("Instance %s, nic/%d: %s" %
2379
                              (instance.name, nic_idx, err))
2380

    
2381
          # if we're moving instances to routed, check that they have an ip
2382
          target_mode = params_filled[constants.NIC_MODE]
2383
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2384
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2385
                              (instance.name, nic_idx))
2386
      if nic_errors:
2387
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2388
                                   "\n".join(nic_errors))
2389

    
2390
    # hypervisor list/parameters
2391
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2392
    if self.op.hvparams:
2393
      if not isinstance(self.op.hvparams, dict):
2394
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2395
                                   errors.ECODE_INVAL)
2396
      for hv_name, hv_dict in self.op.hvparams.items():
2397
        if hv_name not in self.new_hvparams:
2398
          self.new_hvparams[hv_name] = hv_dict
2399
        else:
2400
          self.new_hvparams[hv_name].update(hv_dict)
2401

    
2402
    # os hypervisor parameters
2403
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2404
    if self.op.os_hvp:
2405
      if not isinstance(self.op.os_hvp, dict):
2406
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2407
                                   errors.ECODE_INVAL)
2408
      for os_name, hvs in self.op.os_hvp.items():
2409
        if not isinstance(hvs, dict):
2410
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2411
                                      " input"), errors.ECODE_INVAL)
2412
        if os_name not in self.new_os_hvp:
2413
          self.new_os_hvp[os_name] = hvs
2414
        else:
2415
          for hv_name, hv_dict in hvs.items():
2416
            if hv_name not in self.new_os_hvp[os_name]:
2417
              self.new_os_hvp[os_name][hv_name] = hv_dict
2418
            else:
2419
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2420

    
2421
    # changes to the hypervisor list
2422
    if self.op.enabled_hypervisors is not None:
2423
      self.hv_list = self.op.enabled_hypervisors
2424
      if not self.hv_list:
2425
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2426
                                   " least one member",
2427
                                   errors.ECODE_INVAL)
2428
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2429
      if invalid_hvs:
2430
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2431
                                   " entries: %s" %
2432
                                   utils.CommaJoin(invalid_hvs),
2433
                                   errors.ECODE_INVAL)
2434
      for hv in self.hv_list:
2435
        # if the hypervisor doesn't already exist in the cluster
2436
        # hvparams, we initialize it to empty, and then (in both
2437
        # cases) we make sure to fill the defaults, as we might not
2438
        # have a complete defaults list if the hypervisor wasn't
2439
        # enabled before
2440
        if hv not in new_hvp:
2441
          new_hvp[hv] = {}
2442
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2443
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2444
    else:
2445
      self.hv_list = cluster.enabled_hypervisors
2446

    
2447
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2448
      # either the enabled list has changed, or the parameters have, validate
2449
      for hv_name, hv_params in self.new_hvparams.items():
2450
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2451
            (self.op.enabled_hypervisors and
2452
             hv_name in self.op.enabled_hypervisors)):
2453
          # either this is a new hypervisor, or its parameters have changed
2454
          hv_class = hypervisor.GetHypervisor(hv_name)
2455
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2456
          hv_class.CheckParameterSyntax(hv_params)
2457
          _CheckHVParams(self, node_list, hv_name, hv_params)
2458

    
2459
    if self.op.os_hvp:
2460
      # no need to check any newly-enabled hypervisors, since the
2461
      # defaults have already been checked in the above code-block
2462
      for os_name, os_hvp in self.new_os_hvp.items():
2463
        for hv_name, hv_params in os_hvp.items():
2464
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2465
          # we need to fill in the new os_hvp on top of the actual hv_p
2466
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2467
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2468
          hv_class = hypervisor.GetHypervisor(hv_name)
2469
          hv_class.CheckParameterSyntax(new_osp)
2470
          _CheckHVParams(self, node_list, hv_name, new_osp)
2471

    
2472

    
2473
  def Exec(self, feedback_fn):
2474
    """Change the parameters of the cluster.
2475

2476
    """
2477
    if self.op.vg_name is not None:
2478
      new_volume = self.op.vg_name
2479
      if not new_volume:
2480
        new_volume = None
2481
      if new_volume != self.cfg.GetVGName():
2482
        self.cfg.SetVGName(new_volume)
2483
      else:
2484
        feedback_fn("Cluster LVM configuration already in desired"
2485
                    " state, not changing")
2486
    if self.op.hvparams:
2487
      self.cluster.hvparams = self.new_hvparams
2488
    if self.op.os_hvp:
2489
      self.cluster.os_hvp = self.new_os_hvp
2490
    if self.op.enabled_hypervisors is not None:
2491
      self.cluster.hvparams = self.new_hvparams
2492
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2493
    if self.op.beparams:
2494
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2495
    if self.op.nicparams:
2496
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2497

    
2498
    if self.op.candidate_pool_size is not None:
2499
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2500
      # we need to update the pool size here, otherwise the save will fail
2501
      _AdjustCandidatePool(self, [])
2502

    
2503
    if self.op.maintain_node_health is not None:
2504
      self.cluster.maintain_node_health = self.op.maintain_node_health
2505

    
2506
    if self.op.add_uids is not None:
2507
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2508

    
2509
    if self.op.remove_uids is not None:
2510
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2511

    
2512
    if self.op.uid_pool is not None:
2513
      self.cluster.uid_pool = self.op.uid_pool
2514

    
2515
    self.cfg.Update(self.cluster, feedback_fn)
2516

    
2517

    
2518
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2519
  """Distribute additional files which are part of the cluster configuration.
2520

2521
  ConfigWriter takes care of distributing the config and ssconf files, but
2522
  there are more files which should be distributed to all nodes. This function
2523
  makes sure those are copied.
2524

2525
  @param lu: calling logical unit
2526
  @param additional_nodes: list of nodes not in the config to distribute to
2527

2528
  """
2529
  # 1. Gather target nodes
2530
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2531
  dist_nodes = lu.cfg.GetOnlineNodeList()
2532
  if additional_nodes is not None:
2533
    dist_nodes.extend(additional_nodes)
2534
  if myself.name in dist_nodes:
2535
    dist_nodes.remove(myself.name)
2536

    
2537
  # 2. Gather files to distribute
2538
  dist_files = set([constants.ETC_HOSTS,
2539
                    constants.SSH_KNOWN_HOSTS_FILE,
2540
                    constants.RAPI_CERT_FILE,
2541
                    constants.RAPI_USERS_FILE,
2542
                    constants.CONFD_HMAC_KEY,
2543
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2544
                   ])
2545

    
2546
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547
  for hv_name in enabled_hypervisors:
2548
    hv_class = hypervisor.GetHypervisor(hv_name)
2549
    dist_files.update(hv_class.GetAncillaryFiles())
2550

    
2551
  # 3. Perform the files upload
2552
  for fname in dist_files:
2553
    if os.path.exists(fname):
2554
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2555
      for to_node, to_result in result.items():
2556
        msg = to_result.fail_msg
2557
        if msg:
2558
          msg = ("Copy of file %s to node %s failed: %s" %
2559
                 (fname, to_node, msg))
2560
          lu.proc.LogWarning(msg)
2561

    
2562

    
2563
class LURedistributeConfig(NoHooksLU):
2564
  """Force the redistribution of cluster configuration.
2565

2566
  This is a very simple LU.
2567

2568
  """
2569
  _OP_REQP = []
2570
  REQ_BGL = False
2571

    
2572
  def ExpandNames(self):
2573
    self.needed_locks = {
2574
      locking.LEVEL_NODE: locking.ALL_SET,
2575
    }
2576
    self.share_locks[locking.LEVEL_NODE] = 1
2577

    
2578
  def CheckPrereq(self):
2579
    """Check prerequisites.
2580

2581
    """
2582

    
2583
  def Exec(self, feedback_fn):
2584
    """Redistribute the configuration.
2585

2586
    """
2587
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588
    _RedistributeAncillaryFiles(self)
2589

    
2590

    
2591
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592
  """Sleep and poll for an instance's disk to sync.
2593

2594
  """
2595
  if not instance.disks or disks is not None and not disks:
2596
    return True
2597

    
2598
  disks = _ExpandCheckDisks(instance, disks)
2599

    
2600
  if not oneshot:
2601
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2602

    
2603
  node = instance.primary_node
2604

    
2605
  for dev in disks:
2606
    lu.cfg.SetDiskID(dev, node)
2607

    
2608
  # TODO: Convert to utils.Retry
2609

    
2610
  retries = 0
2611
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2612
  while True:
2613
    max_time = 0
2614
    done = True
2615
    cumul_degraded = False
2616
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617
    msg = rstats.fail_msg
2618
    if msg:
2619
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2620
      retries += 1
2621
      if retries >= 10:
2622
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2623
                                 " aborting." % node)
2624
      time.sleep(6)
2625
      continue
2626
    rstats = rstats.payload
2627
    retries = 0
2628
    for i, mstat in enumerate(rstats):
2629
      if mstat is None:
2630
        lu.LogWarning("Can't compute data for node %s/%s",
2631
                           node, disks[i].iv_name)
2632
        continue
2633

    
2634
      cumul_degraded = (cumul_degraded or
2635
                        (mstat.is_degraded and mstat.sync_percent is None))
2636
      if mstat.sync_percent is not None:
2637
        done = False
2638
        if mstat.estimated_time is not None:
2639
          rem_time = ("%s remaining (estimated)" %
2640
                      utils.FormatSeconds(mstat.estimated_time))
2641
          max_time = mstat.estimated_time
2642
        else:
2643
          rem_time = "no time estimate"
2644
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2645
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2646

    
2647
    # if we're done but degraded, let's do a few small retries, to
2648
    # make sure we see a stable and not transient situation; therefore
2649
    # we force restart of the loop
2650
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2651
      logging.info("Degraded disks found, %d retries left", degr_retries)
2652
      degr_retries -= 1
2653
      time.sleep(1)
2654
      continue
2655

    
2656
    if done or oneshot:
2657
      break
2658

    
2659
    time.sleep(min(60, max_time))
2660

    
2661
  if done:
2662
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2663
  return not cumul_degraded
2664

    
2665

    
2666
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2667
  """Check that mirrors are not degraded.
2668

2669
  The ldisk parameter, if True, will change the test from the
2670
  is_degraded attribute (which represents overall non-ok status for
2671
  the device(s)) to the ldisk (representing the local storage status).
2672

2673
  """
2674
  lu.cfg.SetDiskID(dev, node)
2675

    
2676
  result = True
2677

    
2678
  if on_primary or dev.AssembleOnSecondary():
2679
    rstats = lu.rpc.call_blockdev_find(node, dev)
2680
    msg = rstats.fail_msg
2681
    if msg:
2682
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2683
      result = False
2684
    elif not rstats.payload:
2685
      lu.LogWarning("Can't find disk on node %s", node)
2686
      result = False
2687
    else:
2688
      if ldisk:
2689
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2690
      else:
2691
        result = result and not rstats.payload.is_degraded
2692

    
2693
  if dev.children:
2694
    for child in dev.children:
2695
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2696

    
2697
  return result
2698

    
2699

    
2700
class LUDiagnoseOS(NoHooksLU):
2701
  """Logical unit for OS diagnose/query.
2702

2703
  """
2704
  _OP_REQP = ["output_fields", "names"]
2705
  REQ_BGL = False
2706
  _FIELDS_STATIC = utils.FieldSet()
2707
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2708
  # Fields that need calculation of global os validity
2709
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2710

    
2711
  def ExpandNames(self):
2712
    if self.op.names:
2713
      raise errors.OpPrereqError("Selective OS query not supported",
2714
                                 errors.ECODE_INVAL)
2715

    
2716
    _CheckOutputFields(static=self._FIELDS_STATIC,
2717
                       dynamic=self._FIELDS_DYNAMIC,
2718
                       selected=self.op.output_fields)
2719

    
2720
    # Lock all nodes, in shared mode
2721
    # Temporary removal of locks, should be reverted later
2722
    # TODO: reintroduce locks when they are lighter-weight
2723
    self.needed_locks = {}
2724
    #self.share_locks[locking.LEVEL_NODE] = 1
2725
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2726

    
2727
  def CheckPrereq(self):
2728
    """Check prerequisites.
2729

2730
    """
2731

    
2732
  @staticmethod
2733
  def _DiagnoseByOS(rlist):
2734
    """Remaps a per-node return list into an a per-os per-node dictionary
2735

2736
    @param rlist: a map with node names as keys and OS objects as values
2737

2738
    @rtype: dict
2739
    @return: a dictionary with osnames as keys and as value another
2740
        map, with nodes as keys and tuples of (path, status, diagnose,
2741
        variants, parameters) as values, eg::
2742

2743
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2744
                                     (/srv/..., False, "invalid api")],
2745
                           "node2": [(/srv/..., True, "", [], [])]}
2746
          }
2747

2748
    """
2749
    all_os = {}
2750
    # we build here the list of nodes that didn't fail the RPC (at RPC
2751
    # level), so that nodes with a non-responding node daemon don't
2752
    # make all OSes invalid
2753
    good_nodes = [node_name for node_name in rlist
2754
                  if not rlist[node_name].fail_msg]
2755
    for node_name, nr in rlist.items():
2756
      if nr.fail_msg or not nr.payload:
2757
        continue
2758
      for name, path, status, diagnose, variants, params in nr.payload:
2759
        if name not in all_os:
2760
          # build a list of nodes for this os containing empty lists
2761
          # for each node in node_list
2762
          all_os[name] = {}
2763
          for nname in good_nodes:
2764
            all_os[name][nname] = []
2765
        all_os[name][node_name].append((path, status, diagnose,
2766
                                        variants, params))
2767
    return all_os
2768

    
2769
  def Exec(self, feedback_fn):
2770
    """Compute the list of OSes.
2771

2772
    """
2773
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2774
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2775
    pol = self._DiagnoseByOS(node_data)
2776
    output = []
2777
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2778
    calc_variants = "variants" in self.op.output_fields
2779

    
2780
    for os_name, os_data in pol.items():
2781
      row = []
2782
      if calc_valid:
2783
        valid = True
2784
        variants = None
2785
        for osl in os_data.values():
2786
          valid = bool(valid and osl and osl[0][1])
2787
          if not valid:
2788
            variants = set()
2789
            break
2790
          if calc_variants:
2791
            node_variants = osl[0][3]
2792
            if variants is None:
2793
              variants = set(node_variants)
2794
            else:
2795
              variants.intersection_update(node_variants)
2796

    
2797
      for field in self.op.output_fields:
2798
        if field == "name":
2799
          val = os_name
2800
        elif field == "valid":
2801
          val = valid
2802
        elif field == "node_status":
2803
          # this is just a copy of the dict
2804
          val = {}
2805
          for node_name, nos_list in os_data.items():
2806
            val[node_name] = nos_list
2807
        elif field == "variants":
2808
          val = list(variants)
2809
        else:
2810
          raise errors.ParameterError(field)
2811
        row.append(val)
2812
      output.append(row)
2813

    
2814
    return output
2815

    
2816

    
2817
class LURemoveNode(LogicalUnit):
2818
  """Logical unit for removing a node.
2819

2820
  """
2821
  HPATH = "node-remove"
2822
  HTYPE = constants.HTYPE_NODE
2823
  _OP_REQP = ["node_name"]
2824

    
2825
  def BuildHooksEnv(self):
2826
    """Build hooks env.
2827

2828
    This doesn't run on the target node in the pre phase as a failed
2829
    node would then be impossible to remove.
2830

2831
    """
2832
    env = {
2833
      "OP_TARGET": self.op.node_name,
2834
      "NODE_NAME": self.op.node_name,
2835
      }
2836
    all_nodes = self.cfg.GetNodeList()
2837
    try:
2838
      all_nodes.remove(self.op.node_name)
2839
    except ValueError:
2840
      logging.warning("Node %s which is about to be removed not found"
2841
                      " in the all nodes list", self.op.node_name)
2842
    return env, all_nodes, all_nodes
2843

    
2844
  def CheckPrereq(self):
2845
    """Check prerequisites.
2846

2847
    This checks:
2848
     - the node exists in the configuration
2849
     - it does not have primary or secondary instances
2850
     - it's not the master
2851

2852
    Any errors are signaled by raising errors.OpPrereqError.
2853

2854
    """
2855
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2856
    node = self.cfg.GetNodeInfo(self.op.node_name)
2857
    assert node is not None
2858

    
2859
    instance_list = self.cfg.GetInstanceList()
2860

    
2861
    masternode = self.cfg.GetMasterNode()
2862
    if node.name == masternode:
2863
      raise errors.OpPrereqError("Node is the master node,"
2864
                                 " you need to failover first.",
2865
                                 errors.ECODE_INVAL)
2866

    
2867
    for instance_name in instance_list:
2868
      instance = self.cfg.GetInstanceInfo(instance_name)
2869
      if node.name in instance.all_nodes:
2870
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2871
                                   " please remove first." % instance_name,
2872
                                   errors.ECODE_INVAL)
2873
    self.op.node_name = node.name
2874
    self.node = node
2875

    
2876
  def Exec(self, feedback_fn):
2877
    """Removes the node from the cluster.
2878

2879
    """
2880
    node = self.node
2881
    logging.info("Stopping the node daemon and removing configs from node %s",
2882
                 node.name)
2883

    
2884
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2885

    
2886
    # Promote nodes to master candidate as needed
2887
    _AdjustCandidatePool(self, exceptions=[node.name])
2888
    self.context.RemoveNode(node.name)
2889

    
2890
    # Run post hooks on the node before it's removed
2891
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2892
    try:
2893
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2894
    except:
2895
      # pylint: disable-msg=W0702
2896
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2897

    
2898
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2899
    msg = result.fail_msg
2900
    if msg:
2901
      self.LogWarning("Errors encountered on the remote node while leaving"
2902
                      " the cluster: %s", msg)
2903

    
2904
    # Remove node from our /etc/hosts
2905
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2906
      # FIXME: this should be done via an rpc call to node daemon
2907
      utils.RemoveHostFromEtcHosts(node.name)
2908
      _RedistributeAncillaryFiles(self)
2909

    
2910

    
2911
class LUQueryNodes(NoHooksLU):
2912
  """Logical unit for querying nodes.
2913

2914
  """
2915
  # pylint: disable-msg=W0142
2916
  _OP_REQP = ["output_fields", "names", "use_locking"]
2917
  REQ_BGL = False
2918

    
2919
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2920
                    "master_candidate", "offline", "drained"]
2921

    
2922
  _FIELDS_DYNAMIC = utils.FieldSet(
2923
    "dtotal", "dfree",
2924
    "mtotal", "mnode", "mfree",
2925
    "bootid",
2926
    "ctotal", "cnodes", "csockets",
2927
    )
2928

    
2929
  _FIELDS_STATIC = utils.FieldSet(*[
2930
    "pinst_cnt", "sinst_cnt",
2931
    "pinst_list", "sinst_list",
2932
    "pip", "sip", "tags",
2933
    "master",
2934
    "role"] + _SIMPLE_FIELDS
2935
    )
2936

    
2937
  def ExpandNames(self):
2938
    _CheckOutputFields(static=self._FIELDS_STATIC,
2939
                       dynamic=self._FIELDS_DYNAMIC,
2940
                       selected=self.op.output_fields)
2941

    
2942
    self.needed_locks = {}
2943
    self.share_locks[locking.LEVEL_NODE] = 1
2944

    
2945
    if self.op.names:
2946
      self.wanted = _GetWantedNodes(self, self.op.names)
2947
    else:
2948
      self.wanted = locking.ALL_SET
2949

    
2950
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2951
    self.do_locking = self.do_node_query and self.op.use_locking
2952
    if self.do_locking:
2953
      # if we don't request only static fields, we need to lock the nodes
2954
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2955

    
2956
  def CheckPrereq(self):
2957
    """Check prerequisites.
2958

2959
    """
2960
    # The validation of the node list is done in the _GetWantedNodes,
2961
    # if non empty, and if empty, there's no validation to do
2962
    pass
2963

    
2964
  def Exec(self, feedback_fn):
2965
    """Computes the list of nodes and their attributes.
2966

2967
    """
2968
    all_info = self.cfg.GetAllNodesInfo()
2969
    if self.do_locking:
2970
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2971
    elif self.wanted != locking.ALL_SET:
2972
      nodenames = self.wanted
2973
      missing = set(nodenames).difference(all_info.keys())
2974
      if missing:
2975
        raise errors.OpExecError(
2976
          "Some nodes were removed before retrieving their data: %s" % missing)
2977
    else:
2978
      nodenames = all_info.keys()
2979

    
2980
    nodenames = utils.NiceSort(nodenames)
2981
    nodelist = [all_info[name] for name in nodenames]
2982

    
2983
    # begin data gathering
2984

    
2985
    if self.do_node_query:
2986
      live_data = {}
2987
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2988
                                          self.cfg.GetHypervisorType())
2989
      for name in nodenames:
2990
        nodeinfo = node_data[name]
2991
        if not nodeinfo.fail_msg and nodeinfo.payload:
2992
          nodeinfo = nodeinfo.payload
2993
          fn = utils.TryConvert
2994
          live_data[name] = {
2995
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2996
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2997
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2998
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2999
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3000
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3001
            "bootid": nodeinfo.get('bootid', None),
3002
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3003
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3004
            }
3005
        else:
3006
          live_data[name] = {}
3007
    else:
3008
      live_data = dict.fromkeys(nodenames, {})
3009

    
3010
    node_to_primary = dict([(name, set()) for name in nodenames])
3011
    node_to_secondary = dict([(name, set()) for name in nodenames])
3012

    
3013
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3014
                             "sinst_cnt", "sinst_list"))
3015
    if inst_fields & frozenset(self.op.output_fields):
3016
      inst_data = self.cfg.GetAllInstancesInfo()
3017

    
3018
      for inst in inst_data.values():
3019
        if inst.primary_node in node_to_primary:
3020
          node_to_primary[inst.primary_node].add(inst.name)
3021
        for secnode in inst.secondary_nodes:
3022
          if secnode in node_to_secondary:
3023
            node_to_secondary[secnode].add(inst.name)
3024

    
3025
    master_node = self.cfg.GetMasterNode()
3026

    
3027
    # end data gathering
3028

    
3029
    output = []
3030
    for node in nodelist:
3031
      node_output = []
3032
      for field in self.op.output_fields:
3033
        if field in self._SIMPLE_FIELDS:
3034
          val = getattr(node, field)
3035
        elif field == "pinst_list":
3036
          val = list(node_to_primary[node.name])
3037
        elif field == "sinst_list":
3038
          val = list(node_to_secondary[node.name])
3039
        elif field == "pinst_cnt":
3040
          val = len(node_to_primary[node.name])
3041
        elif field == "sinst_cnt":
3042
          val = len(node_to_secondary[node.name])
3043
        elif field == "pip":
3044
          val = node.primary_ip
3045
        elif field == "sip":
3046
          val = node.secondary_ip
3047
        elif field == "tags":
3048
          val = list(node.GetTags())
3049
        elif field == "master":
3050
          val = node.name == master_node
3051
        elif self._FIELDS_DYNAMIC.Matches(field):
3052
          val = live_data[node.name].get(field, None)
3053
        elif field == "role":
3054
          if node.name == master_node:
3055
            val = "M"
3056
          elif node.master_candidate:
3057
            val = "C"
3058
          elif node.drained:
3059
            val = "D"
3060
          elif node.offline:
3061
            val = "O"
3062
          else:
3063
            val = "R"
3064
        else:
3065
          raise errors.ParameterError(field)
3066
        node_output.append(val)
3067
      output.append(node_output)
3068

    
3069
    return output
3070

    
3071

    
3072
class LUQueryNodeVolumes(NoHooksLU):
3073
  """Logical unit for getting volumes on node(s).
3074

3075
  """
3076
  _OP_REQP = ["nodes", "output_fields"]
3077
  REQ_BGL = False
3078
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3079
  _FIELDS_STATIC = utils.FieldSet("node")
3080

    
3081
  def ExpandNames(self):
3082
    _CheckOutputFields(static=self._FIELDS_STATIC,
3083
                       dynamic=self._FIELDS_DYNAMIC,
3084
                       selected=self.op.output_fields)
3085

    
3086
    self.needed_locks = {}
3087
    self.share_locks[locking.LEVEL_NODE] = 1
3088
    if not self.op.nodes:
3089
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3090
    else:
3091
      self.needed_locks[locking.LEVEL_NODE] = \
3092
        _GetWantedNodes(self, self.op.nodes)
3093

    
3094
  def CheckPrereq(self):
3095
    """Check prerequisites.
3096

3097
    This checks that the fields required are valid output fields.
3098

3099
    """
3100
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3101

    
3102
  def Exec(self, feedback_fn):
3103
    """Computes the list of nodes and their attributes.
3104

3105
    """
3106
    nodenames = self.nodes
3107
    volumes = self.rpc.call_node_volumes(nodenames)
3108

    
3109
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3110
             in self.cfg.GetInstanceList()]
3111

    
3112
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3113

    
3114
    output = []
3115
    for node in nodenames:
3116
      nresult = volumes[node]
3117
      if nresult.offline:
3118
        continue
3119
      msg = nresult.fail_msg
3120
      if msg:
3121
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3122
        continue
3123

    
3124
      node_vols = nresult.payload[:]
3125
      node_vols.sort(key=lambda vol: vol['dev'])
3126

    
3127
      for vol in node_vols:
3128
        node_output = []
3129
        for field in self.op.output_fields:
3130
          if field == "node":
3131
            val = node
3132
          elif field == "phys":
3133
            val = vol['dev']
3134
          elif field == "vg":
3135
            val = vol['vg']
3136
          elif field == "name":
3137
            val = vol['name']
3138
          elif field == "size":
3139
            val = int(float(vol['size']))
3140
          elif field == "instance":
3141
            for inst in ilist:
3142
              if node not in lv_by_node[inst]:
3143
                continue
3144
              if vol['name'] in lv_by_node[inst][node]:
3145
                val = inst.name
3146
                break
3147
            else:
3148
              val = '-'
3149
          else:
3150
            raise errors.ParameterError(field)
3151
          node_output.append(str(val))
3152

    
3153
        output.append(node_output)
3154

    
3155
    return output
3156

    
3157

    
3158
class LUQueryNodeStorage(NoHooksLU):
3159
  """Logical unit for getting information on storage units on node(s).
3160

3161
  """
3162
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3163
  REQ_BGL = False
3164
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3165

    
3166
  def CheckArguments(self):
3167
    _CheckStorageType(self.op.storage_type)
3168

    
3169
    _CheckOutputFields(static=self._FIELDS_STATIC,
3170
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3171
                       selected=self.op.output_fields)
3172

    
3173
  def ExpandNames(self):
3174
    self.needed_locks = {}
3175
    self.share_locks[locking.LEVEL_NODE] = 1
3176

    
3177
    if self.op.nodes:
3178
      self.needed_locks[locking.LEVEL_NODE] = \
3179
        _GetWantedNodes(self, self.op.nodes)
3180
    else:
3181
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3182

    
3183
  def CheckPrereq(self):
3184
    """Check prerequisites.
3185

3186
    This checks that the fields required are valid output fields.
3187

3188
    """
3189
    self.op.name = getattr(self.op, "name", None)
3190

    
3191
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3192

    
3193
  def Exec(self, feedback_fn):
3194
    """Computes the list of nodes and their attributes.
3195

3196
    """
3197
    # Always get name to sort by
3198
    if constants.SF_NAME in self.op.output_fields:
3199
      fields = self.op.output_fields[:]
3200
    else:
3201
      fields = [constants.SF_NAME] + self.op.output_fields
3202

    
3203
    # Never ask for node or type as it's only known to the LU
3204
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3205
      while extra in fields:
3206
        fields.remove(extra)
3207

    
3208
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3209
    name_idx = field_idx[constants.SF_NAME]
3210

    
3211
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3212
    data = self.rpc.call_storage_list(self.nodes,
3213
                                      self.op.storage_type, st_args,
3214
                                      self.op.name, fields)
3215

    
3216
    result = []
3217

    
3218
    for node in utils.NiceSort(self.nodes):
3219
      nresult = data[node]
3220
      if nresult.offline:
3221
        continue
3222

    
3223
      msg = nresult.fail_msg
3224
      if msg:
3225
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3226
        continue
3227

    
3228
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3229

    
3230
      for name in utils.NiceSort(rows.keys()):
3231
        row = rows[name]
3232

    
3233
        out = []
3234

    
3235
        for field in self.op.output_fields:
3236
          if field == constants.SF_NODE:
3237
            val = node
3238
          elif field == constants.SF_TYPE:
3239
            val = self.op.storage_type
3240
          elif field in field_idx:
3241
            val = row[field_idx[field]]
3242
          else:
3243
            raise errors.ParameterError(field)
3244

    
3245
          out.append(val)
3246

    
3247
        result.append(out)
3248

    
3249
    return result
3250

    
3251

    
3252
class LUModifyNodeStorage(NoHooksLU):
3253
  """Logical unit for modifying a storage volume on a node.
3254

3255
  """
3256
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3257
  REQ_BGL = False
3258

    
3259
  def CheckArguments(self):
3260
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3261

    
3262
    _CheckStorageType(self.op.storage_type)
3263

    
3264
  def ExpandNames(self):
3265
    self.needed_locks = {
3266
      locking.LEVEL_NODE: self.op.node_name,
3267
      }
3268

    
3269
  def CheckPrereq(self):
3270
    """Check prerequisites.
3271

3272
    """
3273
    storage_type = self.op.storage_type
3274

    
3275
    try:
3276
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3277
    except KeyError:
3278
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3279
                                 " modified" % storage_type,
3280
                                 errors.ECODE_INVAL)
3281

    
3282
    diff = set(self.op.changes.keys()) - modifiable
3283
    if diff:
3284
      raise errors.OpPrereqError("The following fields can not be modified for"
3285
                                 " storage units of type '%s': %r" %
3286
                                 (storage_type, list(diff)),
3287
                                 errors.ECODE_INVAL)
3288

    
3289
  def Exec(self, feedback_fn):
3290
    """Computes the list of nodes and their attributes.
3291

3292
    """
3293
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3294
    result = self.rpc.call_storage_modify(self.op.node_name,
3295
                                          self.op.storage_type, st_args,
3296
                                          self.op.name, self.op.changes)
3297
    result.Raise("Failed to modify storage unit '%s' on %s" %
3298
                 (self.op.name, self.op.node_name))
3299

    
3300

    
3301
class LUAddNode(LogicalUnit):
3302
  """Logical unit for adding node to the cluster.
3303

3304
  """
3305
  HPATH = "node-add"
3306
  HTYPE = constants.HTYPE_NODE
3307
  _OP_REQP = ["node_name"]
3308

    
3309
  def CheckArguments(self):
3310
    # validate/normalize the node name
3311
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3312

    
3313
  def BuildHooksEnv(self):
3314
    """Build hooks env.
3315

3316
    This will run on all nodes before, and on all nodes + the new node after.
3317

3318
    """
3319
    env = {
3320
      "OP_TARGET": self.op.node_name,
3321
      "NODE_NAME": self.op.node_name,
3322
      "NODE_PIP": self.op.primary_ip,
3323
      "NODE_SIP": self.op.secondary_ip,
3324
      }
3325
    nodes_0 = self.cfg.GetNodeList()
3326
    nodes_1 = nodes_0 + [self.op.node_name, ]
3327
    return env, nodes_0, nodes_1
3328

    
3329
  def CheckPrereq(self):
3330
    """Check prerequisites.
3331

3332
    This checks:
3333
     - the new node is not already in the config
3334
     - it is resolvable
3335
     - its parameters (single/dual homed) matches the cluster
3336

3337
    Any errors are signaled by raising errors.OpPrereqError.
3338

3339
    """
3340
    node_name = self.op.node_name
3341
    cfg = self.cfg
3342

    
3343
    dns_data = utils.GetHostInfo(node_name)
3344

    
3345
    node = dns_data.name
3346
    primary_ip = self.op.primary_ip = dns_data.ip
3347
    secondary_ip = getattr(self.op, "secondary_ip", None)
3348
    if secondary_ip is None:
3349
      secondary_ip = primary_ip
3350
    if not utils.IsValidIP(secondary_ip):
3351
      raise errors.OpPrereqError("Invalid secondary IP given",
3352
                                 errors.ECODE_INVAL)
3353
    self.op.secondary_ip = secondary_ip
3354

    
3355
    node_list = cfg.GetNodeList()
3356
    if not self.op.readd and node in node_list:
3357
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3358
                                 node, errors.ECODE_EXISTS)
3359
    elif self.op.readd and node not in node_list:
3360
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3361
                                 errors.ECODE_NOENT)
3362

    
3363
    self.changed_primary_ip = False
3364

    
3365
    for existing_node_name in node_list:
3366
      existing_node = cfg.GetNodeInfo(existing_node_name)
3367

    
3368
      if self.op.readd and node == existing_node_name:
3369
        if existing_node.secondary_ip != secondary_ip:
3370
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3371
                                     " address configuration as before",
3372
                                     errors.ECODE_INVAL)
3373
        if existing_node.primary_ip != primary_ip:
3374
          self.changed_primary_ip = True
3375

    
3376
        continue
3377

    
3378
      if (existing_node.primary_ip == primary_ip or
3379
          existing_node.secondary_ip == primary_ip or
3380
          existing_node.primary_ip == secondary_ip or
3381
          existing_node.secondary_ip == secondary_ip):
3382
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3383
                                   " existing node %s" % existing_node.name,
3384
                                   errors.ECODE_NOTUNIQUE)
3385

    
3386
    # check that the type of the node (single versus dual homed) is the
3387
    # same as for the master
3388
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3389
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3390
    newbie_singlehomed = secondary_ip == primary_ip
3391
    if master_singlehomed != newbie_singlehomed:
3392
      if master_singlehomed:
3393
        raise errors.OpPrereqError("The master has no private ip but the"
3394
                                   " new node has one",
3395
                                   errors.ECODE_INVAL)
3396
      else:
3397
        raise errors.OpPrereqError("The master has a private ip but the"
3398
                                   " new node doesn't have one",
3399
                                   errors.ECODE_INVAL)
3400

    
3401
    # checks reachability
3402
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3403
      raise errors.OpPrereqError("Node not reachable by ping",
3404
                                 errors.ECODE_ENVIRON)
3405

    
3406
    if not newbie_singlehomed:
3407
      # check reachability from my secondary ip to newbie's secondary ip
3408
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3409
                           source=myself.secondary_ip):
3410
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3411
                                   " based ping to noded port",
3412
                                   errors.ECODE_ENVIRON)
3413

    
3414
    if self.op.readd:
3415
      exceptions = [node]
3416
    else:
3417
      exceptions = []
3418

    
3419
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3420

    
3421
    if self.op.readd:
3422
      self.new_node = self.cfg.GetNodeInfo(node)
3423
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3424
    else:
3425
      self.new_node = objects.Node(name=node,
3426
                                   primary_ip=primary_ip,
3427
                                   secondary_ip=secondary_ip,
3428
                                   master_candidate=self.master_candidate,
3429
                                   offline=False, drained=False)
3430

    
3431
  def Exec(self, feedback_fn):
3432
    """Adds the new node to the cluster.
3433

3434
    """
3435
    new_node = self.new_node
3436
    node = new_node.name
3437

    
3438
    # for re-adds, reset the offline/drained/master-candidate flags;
3439
    # we need to reset here, otherwise offline would prevent RPC calls
3440
    # later in the procedure; this also means that if the re-add
3441
    # fails, we are left with a non-offlined, broken node
3442
    if self.op.readd:
3443
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3444
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3445
      # if we demote the node, we do cleanup later in the procedure
3446
      new_node.master_candidate = self.master_candidate
3447
      if self.changed_primary_ip:
3448
        new_node.primary_ip = self.op.primary_ip
3449

    
3450
    # notify the user about any possible mc promotion
3451
    if new_node.master_candidate:
3452
      self.LogInfo("Node will be a master candidate")
3453

    
3454
    # check connectivity
3455
    result = self.rpc.call_version([node])[node]
3456
    result.Raise("Can't get version information from node %s" % node)
3457
    if constants.PROTOCOL_VERSION == result.payload:
3458
      logging.info("Communication to node %s fine, sw version %s match",
3459
                   node, result.payload)
3460
    else:
3461
      raise errors.OpExecError("Version mismatch master version %s,"
3462
                               " node version %s" %
3463
                               (constants.PROTOCOL_VERSION, result.payload))
3464

    
3465
    # setup ssh on node
3466
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3467
      logging.info("Copy ssh key to node %s", node)
3468
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3469
      keyarray = []
3470
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3471
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3472
                  priv_key, pub_key]
3473

    
3474
      for i in keyfiles:
3475
        keyarray.append(utils.ReadFile(i))
3476

    
3477
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3478
                                      keyarray[2], keyarray[3], keyarray[4],
3479
                                      keyarray[5])
3480
      result.Raise("Cannot transfer ssh keys to the new node")
3481

    
3482
    # Add node to our /etc/hosts, and add key to known_hosts
3483
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3484
      # FIXME: this should be done via an rpc call to node daemon
3485
      utils.AddHostToEtcHosts(new_node.name)
3486

    
3487
    if new_node.secondary_ip != new_node.primary_ip:
3488
      result = self.rpc.call_node_has_ip_address(new_node.name,
3489
                                                 new_node.secondary_ip)
3490
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3491
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3492
      if not result.payload:
3493
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3494
                                 " you gave (%s). Please fix and re-run this"
3495
                                 " command." % new_node.secondary_ip)
3496

    
3497
    node_verify_list = [self.cfg.GetMasterNode()]
3498
    node_verify_param = {
3499
      constants.NV_NODELIST: [node],
3500
      # TODO: do a node-net-test as well?
3501
    }
3502

    
3503
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3504
                                       self.cfg.GetClusterName())
3505
    for verifier in node_verify_list:
3506
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3507
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3508
      if nl_payload:
3509
        for failed in nl_payload:
3510
          feedback_fn("ssh/hostname verification failed"
3511
                      " (checking from %s): %s" %
3512
                      (verifier, nl_payload[failed]))
3513
        raise errors.OpExecError("ssh/hostname verification failed.")
3514

    
3515
    if self.op.readd:
3516
      _RedistributeAncillaryFiles(self)
3517
      self.context.ReaddNode(new_node)
3518
      # make sure we redistribute the config
3519
      self.cfg.Update(new_node, feedback_fn)
3520
      # and make sure the new node will not have old files around
3521
      if not new_node.master_candidate:
3522
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3523
        msg = result.fail_msg
3524
        if msg:
3525
          self.LogWarning("Node failed to demote itself from master"
3526
                          " candidate status: %s" % msg)
3527
    else:
3528
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3529
      self.context.AddNode(new_node, self.proc.GetECId())
3530

    
3531

    
3532
class LUSetNodeParams(LogicalUnit):
3533
  """Modifies the parameters of a node.
3534

3535
  """
3536
  HPATH = "node-modify"
3537
  HTYPE = constants.HTYPE_NODE
3538
  _OP_REQP = ["node_name"]
3539
  REQ_BGL = False
3540

    
3541
  def CheckArguments(self):
3542
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3543
    _CheckBooleanOpField(self.op, 'master_candidate')
3544
    _CheckBooleanOpField(self.op, 'offline')
3545
    _CheckBooleanOpField(self.op, 'drained')
3546
    _CheckBooleanOpField(self.op, 'auto_promote')
3547
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3548
    if all_mods.count(None) == 3:
3549
      raise errors.OpPrereqError("Please pass at least one modification",
3550
                                 errors.ECODE_INVAL)
3551
    if all_mods.count(True) > 1:
3552
      raise errors.OpPrereqError("Can't set the node into more than one"
3553
                                 " state at the same time",
3554
                                 errors.ECODE_INVAL)
3555

    
3556
    # Boolean value that tells us whether we're offlining or draining the node
3557
    self.offline_or_drain = (self.op.offline == True or
3558
                             self.op.drained == True)
3559
    self.deoffline_or_drain = (self.op.offline == False or
3560
                               self.op.drained == False)
3561
    self.might_demote = (self.op.master_candidate == False or
3562
                         self.offline_or_drain)
3563

    
3564
    self.lock_all = self.op.auto_promote and self.might_demote
3565

    
3566

    
3567
  def ExpandNames(self):
3568
    if self.lock_all:
3569
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3570
    else:
3571
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3572

    
3573
  def BuildHooksEnv(self):
3574
    """Build hooks env.
3575

3576
    This runs on the master node.
3577

3578
    """
3579
    env = {
3580
      "OP_TARGET": self.op.node_name,
3581
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3582
      "OFFLINE": str(self.op.offline),
3583
      "DRAINED": str(self.op.drained),
3584
      }
3585
    nl = [self.cfg.GetMasterNode(),
3586
          self.op.node_name]
3587
    return env, nl, nl
3588

    
3589
  def CheckPrereq(self):
3590
    """Check prerequisites.
3591

3592
    This only checks the instance list against the existing names.
3593

3594
    """
3595
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3596

    
3597
    if (self.op.master_candidate is not None or
3598
        self.op.drained is not None or
3599
        self.op.offline is not None):
3600
      # we can't change the master's node flags
3601
      if self.op.node_name == self.cfg.GetMasterNode():
3602
        raise errors.OpPrereqError("The master role can be changed"
3603
                                   " only via masterfailover",
3604
                                   errors.ECODE_INVAL)
3605

    
3606

    
3607
    if node.master_candidate and self.might_demote and not self.lock_all:
3608
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3609
      # check if after removing the current node, we're missing master
3610
      # candidates
3611
      (mc_remaining, mc_should, _) = \
3612
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3613
      if mc_remaining < mc_should:
3614
        raise errors.OpPrereqError("Not enough master candidates, please"
3615
                                   " pass auto_promote to allow promotion",
3616
                                   errors.ECODE_INVAL)
3617

    
3618
    if (self.op.master_candidate == True and
3619
        ((node.offline and not self.op.offline == False) or
3620
         (node.drained and not self.op.drained == False))):
3621
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3622
                                 " to master_candidate" % node.name,
3623
                                 errors.ECODE_INVAL)
3624

    
3625
    # If we're being deofflined/drained, we'll MC ourself if needed
3626
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3627
        self.op.master_candidate == True and not node.master_candidate):
3628
      self.op.master_candidate = _DecideSelfPromotion(self)
3629
      if self.op.master_candidate:
3630
        self.LogInfo("Autopromoting node to master candidate")
3631

    
3632
    return
3633

    
3634
  def Exec(self, feedback_fn):
3635
    """Modifies a node.
3636

3637
    """
3638
    node = self.node
3639

    
3640
    result = []
3641
    changed_mc = False
3642

    
3643
    if self.op.offline is not None:
3644
      node.offline = self.op.offline
3645
      result.append(("offline", str(self.op.offline)))
3646
      if self.op.offline == True:
3647
        if node.master_candidate:
3648
          node.master_candidate = False
3649
          changed_mc = True
3650
          result.append(("master_candidate", "auto-demotion due to offline"))
3651
        if node.drained:
3652
          node.drained = False
3653
          result.append(("drained", "clear drained status due to offline"))
3654

    
3655
    if self.op.master_candidate is not None:
3656
      node.master_candidate = self.op.master_candidate
3657
      changed_mc = True
3658
      result.append(("master_candidate", str(self.op.master_candidate)))
3659
      if self.op.master_candidate == False:
3660
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3661
        msg = rrc.fail_msg
3662
        if msg:
3663
          self.LogWarning("Node failed to demote itself: %s" % msg)
3664

    
3665
    if self.op.drained is not None:
3666
      node.drained = self.op.drained
3667
      result.append(("drained", str(self.op.drained)))
3668
      if self.op.drained == True:
3669
        if node.master_candidate:
3670
          node.master_candidate = False
3671
          changed_mc = True
3672
          result.append(("master_candidate", "auto-demotion due to drain"))
3673
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3674
          msg = rrc.fail_msg
3675
          if msg:
3676
            self.LogWarning("Node failed to demote itself: %s" % msg)
3677
        if node.offline:
3678
          node.offline = False
3679
          result.append(("offline", "clear offline status due to drain"))
3680

    
3681
    # we locked all nodes, we adjust the CP before updating this node
3682
    if self.lock_all:
3683
      _AdjustCandidatePool(self, [node.name])
3684

    
3685
    # this will trigger configuration file update, if needed
3686
    self.cfg.Update(node, feedback_fn)
3687

    
3688
    # this will trigger job queue propagation or cleanup
3689
    if changed_mc:
3690
      self.context.ReaddNode(node)
3691

    
3692
    return result
3693

    
3694

    
3695
class LUPowercycleNode(NoHooksLU):
3696
  """Powercycles a node.
3697

3698
  """
3699
  _OP_REQP = ["node_name", "force"]
3700
  REQ_BGL = False
3701

    
3702
  def CheckArguments(self):
3703
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3704
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3705
      raise errors.OpPrereqError("The node is the master and the force"
3706
                                 " parameter was not set",
3707
                                 errors.ECODE_INVAL)
3708

    
3709
  def ExpandNames(self):
3710
    """Locking for PowercycleNode.
3711

3712
    This is a last-resort option and shouldn't block on other
3713
    jobs. Therefore, we grab no locks.
3714

3715
    """
3716
    self.needed_locks = {}
3717

    
3718
  def CheckPrereq(self):
3719
    """Check prerequisites.
3720

3721
    This LU has no prereqs.
3722

3723
    """
3724
    pass
3725

    
3726
  def Exec(self, feedback_fn):
3727
    """Reboots a node.
3728

3729
    """
3730
    result = self.rpc.call_node_powercycle(self.op.node_name,
3731
                                           self.cfg.GetHypervisorType())
3732
    result.Raise("Failed to schedule the reboot")
3733
    return result.payload
3734

    
3735

    
3736
class LUQueryClusterInfo(NoHooksLU):
3737
  """Query cluster configuration.
3738

3739
  """
3740
  _OP_REQP = []
3741
  REQ_BGL = False
3742

    
3743
  def ExpandNames(self):
3744
    self.needed_locks = {}
3745

    
3746
  def CheckPrereq(self):
3747
    """No prerequsites needed for this LU.
3748

3749
    """
3750
    pass
3751

    
3752
  def Exec(self, feedback_fn):
3753
    """Return cluster config.
3754

3755
    """
3756
    cluster = self.cfg.GetClusterInfo()
3757
    os_hvp = {}
3758

    
3759
    # Filter just for enabled hypervisors
3760
    for os_name, hv_dict in cluster.os_hvp.items():
3761
      os_hvp[os_name] = {}
3762
      for hv_name, hv_params in hv_dict.items():
3763
        if hv_name in cluster.enabled_hypervisors:
3764
          os_hvp[os_name][hv_name] = hv_params
3765

    
3766
    result = {
3767
      "software_version": constants.RELEASE_VERSION,
3768
      "protocol_version": constants.PROTOCOL_VERSION,
3769
      "config_version": constants.CONFIG_VERSION,
3770
      "os_api_version": max(constants.OS_API_VERSIONS),
3771
      "export_version": constants.EXPORT_VERSION,
3772
      "architecture": (platform.architecture()[0], platform.machine()),
3773
      "name": cluster.cluster_name,
3774
      "master": cluster.master_node,
3775
      "default_hypervisor": cluster.enabled_hypervisors[0],
3776
      "enabled_hypervisors": cluster.enabled_hypervisors,
3777
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3778
                        for hypervisor_name in cluster.enabled_hypervisors]),
3779
      "os_hvp": os_hvp,
3780
      "beparams": cluster.beparams,
3781
      "osparams": cluster.osparams,
3782
      "nicparams": cluster.nicparams,
3783
      "candidate_pool_size": cluster.candidate_pool_size,
3784
      "master_netdev": cluster.master_netdev,
3785
      "volume_group_name": cluster.volume_group_name,
3786
      "file_storage_dir": cluster.file_storage_dir,
3787
      "maintain_node_health": cluster.maintain_node_health,
3788
      "ctime": cluster.ctime,
3789
      "mtime": cluster.mtime,
3790
      "uuid": cluster.uuid,
3791
      "tags": list(cluster.GetTags()),
3792
      "uid_pool": cluster.uid_pool,
3793
      }
3794

    
3795
    return result
3796

    
3797

    
3798
class LUQueryConfigValues(NoHooksLU):
3799
  """Return configuration values.
3800

3801
  """
3802
  _OP_REQP = []
3803
  REQ_BGL = False
3804
  _FIELDS_DYNAMIC = utils.FieldSet()
3805
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3806
                                  "watcher_pause")
3807

    
3808
  def ExpandNames(self):
3809
    self.needed_locks = {}
3810

    
3811
    _CheckOutputFields(static=self._FIELDS_STATIC,
3812
                       dynamic=self._FIELDS_DYNAMIC,
3813
                       selected=self.op.output_fields)
3814

    
3815
  def CheckPrereq(self):
3816
    """No prerequisites.
3817

3818
    """
3819
    pass
3820

    
3821
  def Exec(self, feedback_fn):
3822
    """Dump a representation of the cluster config to the standard output.
3823

3824
    """
3825
    values = []
3826
    for field in self.op.output_fields:
3827
      if field == "cluster_name":
3828
        entry = self.cfg.GetClusterName()
3829
      elif field == "master_node":
3830
        entry = self.cfg.GetMasterNode()
3831
      elif field == "drain_flag":
3832
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3833
      elif field == "watcher_pause":
3834
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3835
      else:
3836
        raise errors.ParameterError(field)
3837
      values.append(entry)
3838
    return values
3839

    
3840

    
3841
class LUActivateInstanceDisks(NoHooksLU):
3842
  """Bring up an instance's disks.
3843

3844
  """
3845
  _OP_REQP = ["instance_name"]
3846
  REQ_BGL = False
3847

    
3848
  def ExpandNames(self):
3849
    self._ExpandAndLockInstance()
3850
    self.needed_locks[locking.LEVEL_NODE] = []
3851
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3852

    
3853
  def DeclareLocks(self, level):
3854
    if level == locking.LEVEL_NODE:
3855
      self._LockInstancesNodes()
3856

    
3857
  def CheckPrereq(self):
3858
    """Check prerequisites.
3859

3860
    This checks that the instance is in the cluster.
3861

3862
    """
3863
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3864
    assert self.instance is not None, \
3865
      "Cannot retrieve locked instance %s" % self.op.instance_name
3866
    _CheckNodeOnline(self, self.instance.primary_node)
3867
    if not hasattr(self.op, "ignore_size"):
3868
      self.op.ignore_size = False
3869

    
3870
  def Exec(self, feedback_fn):
3871
    """Activate the disks.
3872

3873
    """
3874
    disks_ok, disks_info = \
3875
              _AssembleInstanceDisks(self, self.instance,
3876
                                     ignore_size=self.op.ignore_size)
3877
    if not disks_ok:
3878
      raise errors.OpExecError("Cannot activate block devices")
3879

    
3880
    return disks_info
3881

    
3882

    
3883
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3884
                           ignore_size=False):
3885
  """Prepare the block devices for an instance.
3886

3887
  This sets up the block devices on all nodes.
3888

3889
  @type lu: L{LogicalUnit}
3890
  @param lu: the logical unit on whose behalf we execute
3891
  @type instance: L{objects.Instance}
3892
  @param instance: the instance for whose disks we assemble
3893
  @type disks: list of L{objects.Disk} or None
3894
  @param disks: which disks to assemble (or all, if None)
3895
  @type ignore_secondaries: boolean
3896
  @param ignore_secondaries: if true, errors on secondary nodes
3897
      won't result in an error return from the function
3898
  @type ignore_size: boolean
3899
  @param ignore_size: if true, the current known size of the disk
3900
      will not be used during the disk activation, useful for cases
3901
      when the size is wrong
3902
  @return: False if the operation failed, otherwise a list of
3903
      (host, instance_visible_name, node_visible_name)
3904
      with the mapping from node devices to instance devices
3905

3906
  """
3907
  device_info = []
3908
  disks_ok = True
3909
  iname = instance.name
3910
  disks = _ExpandCheckDisks(instance, disks)
3911

    
3912
  # With the two passes mechanism we try to reduce the window of
3913
  # opportunity for the race condition of switching DRBD to primary
3914
  # before handshaking occured, but we do not eliminate it
3915

    
3916
  # The proper fix would be to wait (with some limits) until the
3917
  # connection has been made and drbd transitions from WFConnection
3918
  # into any other network-connected state (Connected, SyncTarget,
3919
  # SyncSource, etc.)
3920

    
3921
  # 1st pass, assemble on all nodes in secondary mode
3922
  for inst_disk in disks:
3923
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3924
      if ignore_size:
3925
        node_disk = node_disk.Copy()
3926
        node_disk.UnsetSize()
3927
      lu.cfg.SetDiskID(node_disk, node)
3928
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3929
      msg = result.fail_msg
3930
      if msg:
3931
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3932
                           " (is_primary=False, pass=1): %s",
3933
                           inst_disk.iv_name, node, msg)
3934
        if not ignore_secondaries:
3935
          disks_ok = False
3936

    
3937
  # FIXME: race condition on drbd migration to primary
3938

    
3939
  # 2nd pass, do only the primary node
3940
  for inst_disk in disks:
3941
    dev_path = None
3942

    
3943
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3944
      if node != instance.primary_node:
3945
        continue
3946
      if ignore_size:
3947
        node_disk = node_disk.Copy()
3948
        node_disk.UnsetSize()
3949
      lu.cfg.SetDiskID(node_disk, node)
3950
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3951
      msg = result.fail_msg
3952
      if msg:
3953
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3954
                           " (is_primary=True, pass=2): %s",
3955
                           inst_disk.iv_name, node, msg)
3956
        disks_ok = False
3957
      else:
3958
        dev_path = result.payload
3959

    
3960
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3961

    
3962
  # leave the disks configured for the primary node
3963
  # this is a workaround that would be fixed better by
3964
  # improving the logical/physical id handling
3965
  for disk in disks:
3966
    lu.cfg.SetDiskID(disk, instance.primary_node)
3967

    
3968
  return disks_ok, device_info
3969

    
3970

    
3971
def _StartInstanceDisks(lu, instance, force):
3972
  """Start the disks of an instance.
3973

3974
  """
3975
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3976
                                           ignore_secondaries=force)
3977
  if not disks_ok:
3978
    _ShutdownInstanceDisks(lu, instance)
3979
    if force is not None and not force:
3980
      lu.proc.LogWarning("", hint="If the message above refers to a"
3981
                         " secondary node,"
3982
                         " you can retry the operation using '--force'.")
3983
    raise errors.OpExecError("Disk consistency error")
3984

    
3985

    
3986
class LUDeactivateInstanceDisks(NoHooksLU):
3987
  """Shutdown an instance's disks.
3988

3989
  """
3990
  _OP_REQP = ["instance_name"]
3991
  REQ_BGL = False
3992

    
3993
  def ExpandNames(self):
3994
    self._ExpandAndLockInstance()
3995
    self.needed_locks[locking.LEVEL_NODE] = []
3996
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3997

    
3998
  def DeclareLocks(self, level):
3999
    if level == locking.LEVEL_NODE:
4000
      self._LockInstancesNodes()
4001

    
4002
  def CheckPrereq(self):
4003
    """Check prerequisites.
4004

4005
    This checks that the instance is in the cluster.
4006

4007
    """
4008
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4009
    assert self.instance is not None, \
4010
      "Cannot retrieve locked instance %s" % self.op.instance_name
4011

    
4012
  def Exec(self, feedback_fn):
4013
    """Deactivate the disks
4014

4015
    """
4016
    instance = self.instance
4017
    _SafeShutdownInstanceDisks(self, instance)
4018

    
4019

    
4020
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4021
  """Shutdown block devices of an instance.
4022

4023
  This function checks if an instance is running, before calling
4024
  _ShutdownInstanceDisks.
4025

4026
  """
4027
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4028
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4029

    
4030

    
4031
def _ExpandCheckDisks(instance, disks):
4032
  """Return the instance disks selected by the disks list
4033

4034
  @type disks: list of L{objects.Disk} or None
4035
  @param disks: selected disks
4036
  @rtype: list of L{objects.Disk}
4037
  @return: selected instance disks to act on
4038

4039
  """
4040
  if disks is None:
4041
    return instance.disks
4042
  else:
4043
    if not set(disks).issubset(instance.disks):
4044
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4045
                                   " target instance")
4046
    return disks
4047

    
4048

    
4049
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4050
  """Shutdown block devices of an instance.
4051

4052
  This does the shutdown on all nodes of the instance.
4053

4054
  If the ignore_primary is false, errors on the primary node are
4055
  ignored.
4056

4057
  """
4058
  all_result = True
4059
  disks = _ExpandCheckDisks(instance, disks)
4060

    
4061
  for disk in disks:
4062
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4063
      lu.cfg.SetDiskID(top_disk, node)
4064
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4065
      msg = result.fail_msg
4066
      if msg:
4067
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4068
                      disk.iv_name, node, msg)
4069
        if not ignore_primary or node != instance.primary_node:
4070
          all_result = False
4071
  return all_result
4072

    
4073

    
4074
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4075
  """Checks if a node has enough free memory.
4076

4077
  This function check if a given node has the needed amount of free
4078
  memory. In case the node has less memory or we cannot get the
4079
  information from the node, this function raise an OpPrereqError
4080
  exception.
4081

4082
  @type lu: C{LogicalUnit}
4083
  @param lu: a logical unit from which we get configuration data
4084
  @type node: C{str}
4085
  @param node: the node to check
4086
  @type reason: C{str}
4087
  @param reason: string to use in the error message
4088
  @type requested: C{int}
4089
  @param requested: the amount of memory in MiB to check for
4090
  @type hypervisor_name: C{str}
4091
  @param hypervisor_name: the hypervisor to ask for memory stats
4092
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4093
      we cannot check the node
4094

4095
  """
4096
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4097
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4098
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4099
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4100
  if not isinstance(free_mem, int):
4101
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4102
                               " was '%s'" % (node, free_mem),
4103
                               errors.ECODE_ENVIRON)
4104
  if requested > free_mem:
4105
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4106
                               " needed %s MiB, available %s MiB" %
4107
                               (node, reason, requested, free_mem),
4108
                               errors.ECODE_NORES)
4109

    
4110

    
4111
def _CheckNodesFreeDisk(lu, nodenames, requested):
4112
  """Checks if nodes have enough free disk space in the default VG.
4113

4114
  This function check if all given nodes have the needed amount of
4115
  free disk. In case any node has less disk or we cannot get the
4116
  information from the node, this function raise an OpPrereqError
4117
  exception.
4118

4119
  @type lu: C{LogicalUnit}
4120
  @param lu: a logical unit from which we get configuration data
4121
  @type nodenames: C{list}
4122
  @param nodenames: the list of node names to check
4123
  @type requested: C{int}
4124
  @param requested: the amount of disk in MiB to check for
4125
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4126
      we cannot check the node
4127

4128
  """
4129
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4130
                                   lu.cfg.GetHypervisorType())
4131
  for node in nodenames:
4132
    info = nodeinfo[node]
4133
    info.Raise("Cannot get current information from node %s" % node,
4134
               prereq=True, ecode=errors.ECODE_ENVIRON)
4135
    vg_free = info.payload.get("vg_free", None)
4136
    if not isinstance(vg_free, int):
4137
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4138
                                 " result was '%s'" % (node, vg_free),
4139
                                 errors.ECODE_ENVIRON)
4140
    if requested > vg_free:
4141
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4142
                                 " required %d MiB, available %d MiB" %
4143
                                 (node, requested, vg_free),
4144
                                 errors.ECODE_NORES)
4145

    
4146

    
4147
class LUStartupInstance(LogicalUnit):
4148
  """Starts an instance.
4149

4150
  """
4151
  HPATH = "instance-start"
4152
  HTYPE = constants.HTYPE_INSTANCE
4153
  _OP_REQP = ["instance_name", "force"]
4154
  REQ_BGL = False
4155

    
4156
  def ExpandNames(self):
4157
    self._ExpandAndLockInstance()
4158

    
4159
  def BuildHooksEnv(self):
4160
    """Build hooks env.
4161

4162
    This runs on master, primary and secondary nodes of the instance.
4163

4164
    """
4165
    env = {
4166
      "FORCE": self.op.force,
4167
      }
4168
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4169
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4170
    return env, nl, nl
4171

    
4172
  def CheckPrereq(self):
4173
    """Check prerequisites.
4174

4175
    This checks that the instance is in the cluster.
4176

4177
    """
4178
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4179
    assert self.instance is not None, \
4180
      "Cannot retrieve locked instance %s" % self.op.instance_name
4181

    
4182
    # extra beparams
4183
    self.beparams = getattr(self.op, "beparams", {})
4184
    if self.beparams:
4185
      if not isinstance(self.beparams, dict):
4186
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4187
                                   " dict" % (type(self.beparams), ),
4188
                                   errors.ECODE_INVAL)
4189
      # fill the beparams dict
4190
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4191
      self.op.beparams = self.beparams
4192

    
4193
    # extra hvparams
4194
    self.hvparams = getattr(self.op, "hvparams", {})
4195
    if self.hvparams:
4196
      if not isinstance(self.hvparams, dict):
4197
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4198
                                   " dict" % (type(self.hvparams), ),
4199
                                   errors.ECODE_INVAL)
4200

    
4201
      # check hypervisor parameter syntax (locally)
4202
      cluster = self.cfg.GetClusterInfo()
4203
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4204
      filled_hvp = cluster.FillHV(instance)
4205
      filled_hvp.update(self.hvparams)
4206
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4207
      hv_type.CheckParameterSyntax(filled_hvp)
4208
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4209
      self.op.hvparams = self.hvparams
4210

    
4211
    _CheckNodeOnline(self, instance.primary_node)
4212

    
4213
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4214
    # check bridges existence
4215
    _CheckInstanceBridgesExist(self, instance)
4216

    
4217
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4218
                                              instance.name,
4219
                                              instance.hypervisor)
4220
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4221
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4222
    if not remote_info.payload: # not running already
4223
      _CheckNodeFreeMemory(self, instance.primary_node,
4224
                           "starting instance %s" % instance.name,
4225
                           bep[constants.BE_MEMORY], instance.hypervisor)
4226

    
4227
  def Exec(self, feedback_fn):
4228
    """Start the instance.
4229

4230
    """
4231
    instance = self.instance
4232
    force = self.op.force
4233

    
4234
    self.cfg.MarkInstanceUp(instance.name)
4235

    
4236
    node_current = instance.primary_node
4237

    
4238
    _StartInstanceDisks(self, instance, force)
4239

    
4240
    result = self.rpc.call_instance_start(node_current, instance,
4241
                                          self.hvparams, self.beparams)
4242
    msg = result.fail_msg
4243
    if msg:
4244
      _ShutdownInstanceDisks(self, instance)
4245
      raise errors.OpExecError("Could not start instance: %s" % msg)
4246

    
4247

    
4248
class LURebootInstance(LogicalUnit):
4249
  """Reboot an instance.
4250

4251
  """
4252
  HPATH = "instance-reboot"
4253
  HTYPE = constants.HTYPE_INSTANCE
4254
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4255
  REQ_BGL = False
4256

    
4257
  def CheckArguments(self):
4258
    """Check the arguments.
4259

4260
    """
4261
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4262
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4263

    
4264
  def ExpandNames(self):
4265
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4266
                                   constants.INSTANCE_REBOOT_HARD,
4267
                                   constants.INSTANCE_REBOOT_FULL]:
4268
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4269
                                  (constants.INSTANCE_REBOOT_SOFT,
4270
                                   constants.INSTANCE_REBOOT_HARD,
4271
                                   constants.INSTANCE_REBOOT_FULL))
4272
    self._ExpandAndLockInstance()
4273

    
4274
  def BuildHooksEnv(self):
4275
    """Build hooks env.
4276

4277
    This runs on master, primary and secondary nodes of the instance.
4278

4279
    """
4280
    env = {
4281
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4282
      "REBOOT_TYPE": self.op.reboot_type,
4283
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4284
      }
4285
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4286
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4287
    return env, nl, nl
4288

    
4289
  def CheckPrereq(self):
4290
    """Check prerequisites.
4291

4292
    This checks that the instance is in the cluster.
4293

4294
    """
4295
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4296
    assert self.instance is not None, \
4297
      "Cannot retrieve locked instance %s" % self.op.instance_name
4298

    
4299
    _CheckNodeOnline(self, instance.primary_node)
4300

    
4301
    # check bridges existence
4302
    _CheckInstanceBridgesExist(self, instance)
4303

    
4304
  def Exec(self, feedback_fn):
4305
    """Reboot the instance.
4306

4307
    """
4308
    instance = self.instance
4309
    ignore_secondaries = self.op.ignore_secondaries
4310
    reboot_type = self.op.reboot_type
4311

    
4312
    node_current = instance.primary_node
4313

    
4314
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4315
                       constants.INSTANCE_REBOOT_HARD]:
4316
      for disk in instance.disks:
4317
        self.cfg.SetDiskID(disk, node_current)
4318
      result = self.rpc.call_instance_reboot(node_current, instance,
4319
                                             reboot_type,
4320
                                             self.shutdown_timeout)
4321
      result.Raise("Could not reboot instance")
4322
    else:
4323
      result = self.rpc.call_instance_shutdown(node_current, instance,
4324
                                               self.shutdown_timeout)
4325
      result.Raise("Could not shutdown instance for full reboot")
4326
      _ShutdownInstanceDisks(self, instance)
4327
      _StartInstanceDisks(self, instance, ignore_secondaries)
4328
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4329
      msg = result.fail_msg
4330
      if msg:
4331
        _ShutdownInstanceDisks(self, instance)
4332
        raise errors.OpExecError("Could not start instance for"
4333
                                 " full reboot: %s" % msg)
4334

    
4335
    self.cfg.MarkInstanceUp(instance.name)
4336

    
4337

    
4338
class LUShutdownInstance(LogicalUnit):
4339
  """Shutdown an instance.
4340

4341
  """
4342
  HPATH = "instance-stop"
4343
  HTYPE = constants.HTYPE_INSTANCE
4344
  _OP_REQP = ["instance_name"]
4345
  REQ_BGL = False
4346

    
4347
  def CheckArguments(self):
4348
    """Check the arguments.
4349

4350
    """
4351
    self.timeout = getattr(self.op, "timeout",
4352
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4353

    
4354
  def ExpandNames(self):
4355
    self._ExpandAndLockInstance()
4356

    
4357
  def BuildHooksEnv(self):
4358
    """Build hooks env.
4359

4360
    This runs on master, primary and secondary nodes of the instance.
4361

4362
    """
4363
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4364
    env["TIMEOUT"] = self.timeout
4365
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4366
    return env, nl, nl
4367

    
4368
  def CheckPrereq(self):
4369
    """Check prerequisites.
4370

4371
    This checks that the instance is in the cluster.
4372

4373
    """
4374
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4375
    assert self.instance is not None, \
4376
      "Cannot retrieve locked instance %s" % self.op.instance_name
4377
    _CheckNodeOnline(self, self.instance.primary_node)
4378

    
4379
  def Exec(self, feedback_fn):
4380
    """Shutdown the instance.
4381

4382
    """
4383
    instance = self.instance
4384
    node_current = instance.primary_node
4385
    timeout = self.timeout
4386
    self.cfg.MarkInstanceDown(instance.name)
4387
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4388
    msg = result.fail_msg
4389
    if msg:
4390
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4391

    
4392
    _ShutdownInstanceDisks(self, instance)
4393

    
4394

    
4395
class LUReinstallInstance(LogicalUnit):
4396
  """Reinstall an instance.
4397

4398
  """
4399
  HPATH = "instance-reinstall"
4400
  HTYPE = constants.HTYPE_INSTANCE
4401
  _OP_REQP = ["instance_name"]
4402
  REQ_BGL = False
4403

    
4404
  def ExpandNames(self):
4405
    self._ExpandAndLockInstance()
4406

    
4407
  def BuildHooksEnv(self):
4408
    """Build hooks env.
4409

4410
    This runs on master, primary and secondary nodes of the instance.
4411

4412
    """
4413
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4414
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4415
    return env, nl, nl
4416

    
4417
  def CheckPrereq(self):
4418
    """Check prerequisites.
4419

4420
    This checks that the instance is in the cluster and is not running.
4421

4422
    """
4423
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4424
    assert instance is not None, \
4425
      "Cannot retrieve locked instance %s" % self.op.instance_name
4426
    _CheckNodeOnline(self, instance.primary_node)
4427

    
4428
    if instance.disk_template == constants.DT_DISKLESS:
4429
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4430
                                 self.op.instance_name,
4431
                                 errors.ECODE_INVAL)
4432
    _CheckInstanceDown(self, instance, "cannot reinstall")
4433

    
4434
    self.op.os_type = getattr(self.op, "os_type", None)
4435
    self.op.force_variant = getattr(self.op, "force_variant", False)
4436
    if self.op.os_type is not None:
4437
      # OS verification
4438
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4439
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4440

    
4441
    self.instance = instance
4442

    
4443
  def Exec(self, feedback_fn):
4444
    """Reinstall the instance.
4445

4446
    """
4447
    inst = self.instance
4448

    
4449
    if self.op.os_type is not None:
4450
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4451
      inst.os = self.op.os_type
4452
      self.cfg.Update(inst, feedback_fn)
4453

    
4454
    _StartInstanceDisks(self, inst, None)
4455
    try:
4456
      feedback_fn("Running the instance OS create scripts...")
4457
      # FIXME: pass debug option from opcode to backend
4458
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4459
                                             self.op.debug_level)
4460
      result.Raise("Could not install OS for instance %s on node %s" %
4461
                   (inst.name, inst.primary_node))
4462
    finally:
4463
      _ShutdownInstanceDisks(self, inst)
4464

    
4465

    
4466
class LURecreateInstanceDisks(LogicalUnit):
4467
  """Recreate an instance's missing disks.
4468

4469
  """
4470
  HPATH = "instance-recreate-disks"
4471
  HTYPE = constants.HTYPE_INSTANCE
4472
  _OP_REQP = ["instance_name", "disks"]
4473
  REQ_BGL = False
4474

    
4475
  def CheckArguments(self):
4476
    """Check the arguments.
4477

4478
    """
4479
    if not isinstance(self.op.disks, list):
4480
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4481
    for item in self.op.disks:
4482
      if (not isinstance(item, int) or
4483
          item < 0):
4484
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4485
                                   str(item), errors.ECODE_INVAL)
4486

    
4487
  def ExpandNames(self):
4488
    self._ExpandAndLockInstance()
4489

    
4490
  def BuildHooksEnv(self):
4491
    """Build hooks env.
4492

4493
    This runs on master, primary and secondary nodes of the instance.
4494

4495
    """
4496
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4497
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4498
    return env, nl, nl
4499

    
4500
  def CheckPrereq(self):
4501
    """Check prerequisites.
4502

4503
    This checks that the instance is in the cluster and is not running.
4504

4505
    """
4506
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4507
    assert instance is not None, \
4508
      "Cannot retrieve locked instance %s" % self.op.instance_name
4509
    _CheckNodeOnline(self, instance.primary_node)
4510

    
4511
    if instance.disk_template == constants.DT_DISKLESS:
4512
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4513
                                 self.op.instance_name, errors.ECODE_INVAL)
4514
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4515

    
4516
    if not self.op.disks:
4517
      self.op.disks = range(len(instance.disks))
4518
    else:
4519
      for idx in self.op.disks:
4520
        if idx >= len(instance.disks):
4521
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4522
                                     errors.ECODE_INVAL)
4523

    
4524
    self.instance = instance
4525

    
4526
  def Exec(self, feedback_fn):
4527
    """Recreate the disks.
4528

4529
    """
4530
    to_skip = []
4531
    for idx, _ in enumerate(self.instance.disks):
4532
      if idx not in self.op.disks: # disk idx has not been passed in
4533
        to_skip.append(idx)
4534
        continue
4535

    
4536
    _CreateDisks(self, self.instance, to_skip=to_skip)
4537

    
4538

    
4539
class LURenameInstance(LogicalUnit):
4540
  """Rename an instance.
4541

4542
  """
4543
  HPATH = "instance-rename"
4544
  HTYPE = constants.HTYPE_INSTANCE
4545
  _OP_REQP = ["instance_name", "new_name"]
4546

    
4547
  def BuildHooksEnv(self):
4548
    """Build hooks env.
4549

4550
    This runs on master, primary and secondary nodes of the instance.
4551

4552
    """
4553
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4554
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4555
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4556
    return env, nl, nl
4557

    
4558
  def CheckPrereq(self):
4559
    """Check prerequisites.
4560

4561
    This checks that the instance is in the cluster and is not running.
4562

4563
    """
4564
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4565
                                                self.op.instance_name)
4566
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4567
    assert instance is not None
4568
    _CheckNodeOnline(self, instance.primary_node)
4569
    _CheckInstanceDown(self, instance, "cannot rename")
4570
    self.instance = instance
4571

    
4572
    # new name verification
4573
    name_info = utils.GetHostInfo(self.op.new_name)
4574

    
4575
    self.op.new_name = new_name = name_info.name
4576
    instance_list = self.cfg.GetInstanceList()
4577
    if new_name in instance_list:
4578
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4579
                                 new_name, errors.ECODE_EXISTS)
4580

    
4581
    if not getattr(self.op, "ignore_ip", False):
4582
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4583
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4584
                                   (name_info.ip, new_name),
4585
                                   errors.ECODE_NOTUNIQUE)
4586

    
4587

    
4588
  def Exec(self, feedback_fn):
4589
    """Reinstall the instance.
4590

4591
    """
4592
    inst = self.instance
4593
    old_name = inst.name
4594

    
4595
    if inst.disk_template == constants.DT_FILE:
4596
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4597

    
4598
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4599
    # Change the instance lock. This is definitely safe while we hold the BGL
4600
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4601
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4602

    
4603
    # re-read the instance from the configuration after rename
4604
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4605

    
4606
    if inst.disk_template == constants.DT_FILE:
4607
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4608
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4609
                                                     old_file_storage_dir,
4610
                                                     new_file_storage_dir)
4611
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4612
                   " (but the instance has been renamed in Ganeti)" %
4613
                   (inst.primary_node, old_file_storage_dir,
4614
                    new_file_storage_dir))
4615

    
4616
    _StartInstanceDisks(self, inst, None)
4617
    try:
4618
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4619
                                                 old_name, self.op.debug_level)
4620
      msg = result.fail_msg
4621
      if msg:
4622
        msg = ("Could not run OS rename script for instance %s on node %s"
4623
               " (but the instance has been renamed in Ganeti): %s" %
4624
               (inst.name, inst.primary_node, msg))
4625
        self.proc.LogWarning(msg)
4626
    finally:
4627
      _ShutdownInstanceDisks(self, inst)
4628

    
4629

    
4630
class LURemoveInstance(LogicalUnit):
4631
  """Remove an instance.
4632

4633
  """
4634
  HPATH = "instance-remove"
4635
  HTYPE = constants.HTYPE_INSTANCE
4636
  _OP_REQP = ["instance_name", "ignore_failures"]
4637
  REQ_BGL = False
4638

    
4639
  def CheckArguments(self):
4640
    """Check the arguments.
4641

4642
    """
4643
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4644
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4645

    
4646
  def ExpandNames(self):
4647
    self._ExpandAndLockInstance()
4648
    self.needed_locks[locking.LEVEL_NODE] = []
4649
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4650

    
4651
  def DeclareLocks(self, level):
4652
    if level == locking.LEVEL_NODE:
4653
      self._LockInstancesNodes()
4654

    
4655
  def BuildHooksEnv(self):
4656
    """Build hooks env.
4657

4658
    This runs on master, primary and secondary nodes of the instance.
4659

4660
    """
4661
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4662
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4663
    nl = [self.cfg.GetMasterNode()]
4664
    nl_post = list(self.instance.all_nodes) + nl
4665
    return env, nl, nl_post
4666

    
4667
  def CheckPrereq(self):
4668
    """Check prerequisites.
4669

4670
    This checks that the instance is in the cluster.
4671

4672
    """
4673
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4674
    assert self.instance is not None, \
4675
      "Cannot retrieve locked instance %s" % self.op.instance_name
4676

    
4677
  def Exec(self, feedback_fn):
4678
    """Remove the instance.
4679

4680
    """
4681
    instance = self.instance
4682
    logging.info("Shutting down instance %s on node %s",
4683
                 instance.name, instance.primary_node)
4684

    
4685
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4686
                                             self.shutdown_timeout)
4687
    msg = result.fail_msg
4688
    if msg:
4689
      if self.op.ignore_failures:
4690
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4691
      else:
4692
        raise errors.OpExecError("Could not shutdown instance %s on"
4693
                                 " node %s: %s" %
4694
                                 (instance.name, instance.primary_node, msg))
4695

    
4696
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4697

    
4698

    
4699
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4700
  """Utility function to remove an instance.
4701

4702
  """
4703
  logging.info("Removing block devices for instance %s", instance.name)
4704

    
4705
  if not _RemoveDisks(lu, instance):
4706
    if not ignore_failures:
4707
      raise errors.OpExecError("Can't remove instance's disks")
4708
    feedback_fn("Warning: can't remove instance's disks")
4709

    
4710
  logging.info("Removing instance %s out of cluster config", instance.name)
4711

    
4712
  lu.cfg.RemoveInstance(instance.name)
4713

    
4714
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4715
    "Instance lock removal conflict"
4716

    
4717
  # Remove lock for the instance
4718
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4719

    
4720

    
4721
class LUQueryInstances(NoHooksLU):
4722
  """Logical unit for querying instances.
4723

4724
  """
4725
  # pylint: disable-msg=W0142
4726
  _OP_REQP = ["output_fields", "names", "use_locking"]
4727
  REQ_BGL = False
4728
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4729
                    "serial_no", "ctime", "mtime", "uuid"]
4730
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4731
                                    "admin_state",
4732
                                    "disk_template", "ip", "mac", "bridge",
4733
                                    "nic_mode", "nic_link",
4734
                                    "sda_size", "sdb_size", "vcpus", "tags",
4735
                                    "network_port", "beparams",
4736
                                    r"(disk)\.(size)/([0-9]+)",
4737
                                    r"(disk)\.(sizes)", "disk_usage",
4738
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4739
                                    r"(nic)\.(bridge)/([0-9]+)",
4740
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4741
                                    r"(disk|nic)\.(count)",
4742
                                    "hvparams",
4743
                                    ] + _SIMPLE_FIELDS +
4744
                                  ["hv/%s" % name
4745
                                   for name in constants.HVS_PARAMETERS
4746
                                   if name not in constants.HVC_GLOBALS] +
4747
                                  ["be/%s" % name
4748
                                   for name in constants.BES_PARAMETERS])
4749
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4750

    
4751

    
4752
  def ExpandNames(self):
4753
    _CheckOutputFields(static=self._FIELDS_STATIC,
4754
                       dynamic=self._FIELDS_DYNAMIC,
4755
                       selected=self.op.output_fields)
4756

    
4757
    self.needed_locks = {}
4758
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4759
    self.share_locks[locking.LEVEL_NODE] = 1
4760

    
4761
    if self.op.names:
4762
      self.wanted = _GetWantedInstances(self, self.op.names)
4763
    else:
4764
      self.wanted = locking.ALL_SET
4765

    
4766
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4767
    self.do_locking = self.do_node_query and self.op.use_locking
4768
    if self.do_locking:
4769
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4770
      self.needed_locks[locking.LEVEL_NODE] = []
4771
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4772

    
4773
  def DeclareLocks(self, level):
4774
    if level == locking.LEVEL_NODE and self.do_locking:
4775
      self._LockInstancesNodes()
4776

    
4777
  def CheckPrereq(self):
4778
    """Check prerequisites.
4779

4780
    """
4781
    pass
4782

    
4783
  def Exec(self, feedback_fn):
4784
    """Computes the list of nodes and their attributes.
4785

4786
    """
4787
    # pylint: disable-msg=R0912
4788
    # way too many branches here
4789
    all_info = self.cfg.GetAllInstancesInfo()
4790
    if self.wanted == locking.ALL_SET:
4791
      # caller didn't specify instance names, so ordering is not important
4792
      if self.do_locking:
4793
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4794
      else:
4795
        instance_names = all_info.keys()
4796
      instance_names = utils.NiceSort(instance_names)
4797
    else:
4798
      # caller did specify names, so we must keep the ordering
4799
      if self.do_locking:
4800
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4801
      else:
4802
        tgt_set = all_info.keys()
4803
      missing = set(self.wanted).difference(tgt_set)
4804
      if missing:
4805
        raise errors.OpExecError("Some instances were removed before"
4806
                                 " retrieving their data: %s" % missing)
4807
      instance_names = self.wanted
4808

    
4809
    instance_list = [all_info[iname] for iname in instance_names]
4810

    
4811
    # begin data gathering
4812

    
4813
    nodes = frozenset([inst.primary_node for inst in instance_list])
4814
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4815

    
4816
    bad_nodes = []
4817
    off_nodes = []
4818
    if self.do_node_query:
4819
      live_data = {}
4820
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4821
      for name in nodes:
4822
        result = node_data[name]
4823
        if result.offline:
4824
          # offline nodes will be in both lists
4825
          off_nodes.append(name)
4826
        if result.fail_msg:
4827
          bad_nodes.append(name)
4828
        else:
4829
          if result.payload:
4830
            live_data.update(result.payload)
4831
          # else no instance is alive
4832
    else:
4833
      live_data = dict([(name, {}) for name in instance_names])
4834

    
4835
    # end data gathering
4836

    
4837
    HVPREFIX = "hv/"
4838
    BEPREFIX = "be/"
4839
    output = []
4840
    cluster = self.cfg.GetClusterInfo()
4841
    for instance in instance_list:
4842
      iout = []
4843
      i_hv = cluster.FillHV(instance, skip_globals=True)
4844
      i_be = cluster.FillBE(instance)
4845
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4846
      for field in self.op.output_fields:
4847
        st_match = self._FIELDS_STATIC.Matches(field)
4848
        if field in self._SIMPLE_FIELDS:
4849
          val = getattr(instance, field)
4850
        elif field == "pnode":
4851
          val = instance.primary_node
4852
        elif field == "snodes":
4853
          val = list(instance.secondary_nodes)
4854
        elif field == "admin_state":
4855
          val = instance.admin_up
4856
        elif field == "oper_state":
4857
          if instance.primary_node in bad_nodes:
4858
            val = None
4859
          else:
4860
            val = bool(live_data.get(instance.name))
4861
        elif field == "status":
4862
          if instance.primary_node in off_nodes:
4863
            val = "ERROR_nodeoffline"
4864
          elif instance.primary_node in bad_nodes:
4865
            val = "ERROR_nodedown"
4866
          else:
4867
            running = bool(live_data.get(instance.name))
4868
            if running:
4869
              if instance.admin_up:
4870
                val = "running"
4871
              else:
4872
                val = "ERROR_up"
4873
            else:
4874
              if instance.admin_up:
4875
                val = "ERROR_down"
4876
              else:
4877
                val = "ADMIN_down"
4878
        elif field == "oper_ram":
4879
          if instance.primary_node in bad_nodes:
4880
            val = None
4881
          elif instance.name in live_data:
4882
            val = live_data[instance.name].get("memory", "?")
4883
          else:
4884
            val = "-"
4885
        elif field == "vcpus":
4886
          val = i_be[constants.BE_VCPUS]
4887
        elif field == "disk_template":
4888
          val = instance.disk_template
4889
        elif field == "ip":
4890
          if instance.nics:
4891
            val = instance.nics[0].ip
4892
          else:
4893
            val = None
4894
        elif field == "nic_mode":
4895
          if instance.nics:
4896
            val = i_nicp[0][constants.NIC_MODE]
4897
          else:
4898
            val = None
4899
        elif field == "nic_link":
4900
          if instance.nics:
4901
            val = i_nicp[0][constants.NIC_LINK]
4902
          else:
4903
            val = None
4904
        elif field == "bridge":
4905
          if (instance.nics and
4906
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4907
            val = i_nicp[0][constants.NIC_LINK]
4908
          else:
4909
            val = None
4910
        elif field == "mac":
4911
          if instance.nics:
4912
            val = instance.nics[0].mac
4913
          else:
4914
            val = None
4915
        elif field == "sda_size" or field == "sdb_size":
4916
          idx = ord(field[2]) - ord('a')
4917
          try:
4918
            val = instance.FindDisk(idx).size
4919
          except errors.OpPrereqError:
4920
            val = None
4921
        elif field == "disk_usage": # total disk usage per node
4922
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4923
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4924
        elif field == "tags":
4925
          val = list(instance.GetTags())
4926
        elif field == "hvparams":
4927
          val = i_hv
4928
        elif (field.startswith(HVPREFIX) and
4929
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4930
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4931
          val = i_hv.get(field[len(HVPREFIX):], None)
4932
        elif field == "beparams":
4933
          val = i_be
4934
        elif (field.startswith(BEPREFIX) and
4935
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4936
          val = i_be.get(field[len(BEPREFIX):], None)
4937
        elif st_match and st_match.groups():
4938
          # matches a variable list
4939
          st_groups = st_match.groups()
4940
          if st_groups and st_groups[0] == "disk":
4941
            if st_groups[1] == "count":
4942
              val = len(instance.disks)
4943
            elif st_groups[1] == "sizes":
4944
              val = [disk.size for disk in instance.disks]
4945
            elif st_groups[1] == "size":
4946
              try:
4947
                val = instance.FindDisk(st_groups[2]).size
4948
              except errors.OpPrereqError:
4949
                val = None
4950
            else:
4951
              assert False, "Unhandled disk parameter"
4952
          elif st_groups[0] == "nic":
4953
            if st_groups[1] == "count":
4954
              val = len(instance.nics)
4955
            elif st_groups[1] == "macs":
4956
              val = [nic.mac for nic in instance.nics]
4957
            elif st_groups[1] == "ips":
4958
              val = [nic.ip for nic in instance.nics]
4959
            elif st_groups[1] == "modes":
4960
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4961
            elif st_groups[1] == "links":
4962
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4963
            elif st_groups[1] == "bridges":
4964
              val = []
4965
              for nicp in i_nicp:
4966
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4967
                  val.append(nicp[constants.NIC_LINK])
4968
                else:
4969
                  val.append(None)
4970
            else:
4971
              # index-based item
4972
              nic_idx = int(st_groups[2])
4973
              if nic_idx >= len(instance.nics):
4974
                val = None
4975
              else:
4976
                if st_groups[1] == "mac":
4977
                  val = instance.nics[nic_idx].mac
4978
                elif st_groups[1] == "ip":
4979
                  val = instance.nics[nic_idx].ip
4980
                elif st_groups[1] == "mode":
4981
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4982
                elif st_groups[1] == "link":
4983
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4984
                elif st_groups[1] == "bridge":
4985
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4986
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4987
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4988
                  else:
4989
                    val = None
4990
                else:
4991
                  assert False, "Unhandled NIC parameter"
4992
          else:
4993
            assert False, ("Declared but unhandled variable parameter '%s'" %
4994
                           field)
4995
        else:
4996
          assert False, "Declared but unhandled parameter '%s'" % field
4997
        iout.append(val)
4998
      output.append(iout)
4999

    
5000
    return output
5001

    
5002

    
5003
class LUFailoverInstance(LogicalUnit):
5004
  """Failover an instance.
5005

5006
  """
5007
  HPATH = "instance-failover"
5008
  HTYPE = constants.HTYPE_INSTANCE
5009
  _OP_REQP = ["instance_name", "ignore_consistency"]
5010
  REQ_BGL = False
5011

    
5012
  def CheckArguments(self):
5013
    """Check the arguments.
5014

5015
    """
5016
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5017
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5018

    
5019
  def ExpandNames(self):
5020
    self._ExpandAndLockInstance()
5021
    self.needed_locks[locking.LEVEL_NODE] = []
5022
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5023

    
5024
  def DeclareLocks(self, level):
5025
    if level == locking.LEVEL_NODE:
5026
      self._LockInstancesNodes()
5027

    
5028
  def BuildHooksEnv(self):
5029
    """Build hooks env.
5030

5031
    This runs on master, primary and secondary nodes of the instance.
5032

5033
    """
5034
    instance = self.instance
5035
    source_node = instance.primary_node
5036
    target_node = instance.secondary_nodes[0]
5037
    env = {
5038
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5039
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5040
      "OLD_PRIMARY": source_node,
5041
      "OLD_SECONDARY": target_node,
5042
      "NEW_PRIMARY": target_node,
5043
      "NEW_SECONDARY": source_node,
5044
      }
5045
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5046
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5047
    nl_post = list(nl)
5048
    nl_post.append(source_node)
5049
    return env, nl, nl_post
5050

    
5051
  def CheckPrereq(self):
5052
    """Check prerequisites.
5053

5054
    This checks that the instance is in the cluster.
5055

5056
    """
5057
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5058
    assert self.instance is not None, \
5059
      "Cannot retrieve locked instance %s" % self.op.instance_name
5060

    
5061
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5062
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5063
      raise errors.OpPrereqError("Instance's disk layout is not"
5064
                                 " network mirrored, cannot failover.",
5065
                                 errors.ECODE_STATE)
5066

    
5067
    secondary_nodes = instance.secondary_nodes
5068
    if not secondary_nodes:
5069
      raise errors.ProgrammerError("no secondary node but using "
5070
                                   "a mirrored disk template")
5071

    
5072
    target_node = secondary_nodes[0]
5073
    _CheckNodeOnline(self, target_node)
5074
    _CheckNodeNotDrained(self, target_node)
5075
    if instance.admin_up:
5076
      # check memory requirements on the secondary node
5077
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5078
                           instance.name, bep[constants.BE_MEMORY],
5079
                           instance.hypervisor)
5080
    else:
5081
      self.LogInfo("Not checking memory on the secondary node as"
5082
                   " instance will not be started")
5083

    
5084
    # check bridge existance
5085
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5086

    
5087
  def Exec(self, feedback_fn):
5088
    """Failover an instance.
5089

5090
    The failover is done by shutting it down on its present node and
5091
    starting it on the secondary.
5092

5093
    """
5094
    instance = self.instance
5095

    
5096
    source_node = instance.primary_node
5097
    target_node = instance.secondary_nodes[0]
5098

    
5099
    if instance.admin_up:
5100
      feedback_fn("* checking disk consistency between source and target")
5101
      for dev in instance.disks:
5102
        # for drbd, these are drbd over lvm
5103
        if not _CheckDiskConsistency(self, dev, target_node, False):
5104
          if not self.op.ignore_consistency:
5105
            raise errors.OpExecError("Disk %s is degraded on target node,"
5106
                                     " aborting failover." % dev.iv_name)
5107
    else:
5108
      feedback_fn("* not checking disk consistency as instance is not running")
5109

    
5110
    feedback_fn("* shutting down instance on source node")
5111
    logging.info("Shutting down instance %s on node %s",
5112
                 instance.name, source_node)
5113

    
5114
    result = self.rpc.call_instance_shutdown(source_node, instance,
5115
                                             self.shutdown_timeout)
5116
    msg = result.fail_msg
5117
    if msg:
5118
      if self.op.ignore_consistency:
5119
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5120
                             " Proceeding anyway. Please make sure node"
5121
                             " %s is down. Error details: %s",
5122
                             instance.name, source_node, source_node, msg)
5123
      else:
5124
        raise errors.OpExecError("Could not shutdown instance %s on"
5125
                                 " node %s: %s" %
5126
                                 (instance.name, source_node, msg))
5127

    
5128
    feedback_fn("* deactivating the instance's disks on source node")
5129
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5130
      raise errors.OpExecError("Can't shut down the instance's disks.")
5131

    
5132
    instance.primary_node = target_node
5133
    # distribute new instance config to the other nodes
5134
    self.cfg.Update(instance, feedback_fn)
5135

    
5136
    # Only start the instance if it's marked as up
5137
    if instance.admin_up:
5138
      feedback_fn("* activating the instance's disks on target node")
5139
      logging.info("Starting instance %s on node %s",
5140
                   instance.name, target_node)
5141

    
5142
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5143
                                               ignore_secondaries=True)
5144
      if not disks_ok:
5145
        _ShutdownInstanceDisks(self, instance)
5146
        raise errors.OpExecError("Can't activate the instance's disks")
5147

    
5148
      feedback_fn("* starting the instance on the target node")
5149
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5150
      msg = result.fail_msg
5151
      if msg:
5152
        _ShutdownInstanceDisks(self, instance)
5153
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5154
                                 (instance.name, target_node, msg))
5155

    
5156

    
5157
class LUMigrateInstance(LogicalUnit):
5158
  """Migrate an instance.
5159

5160
  This is migration without shutting down, compared to the failover,
5161
  which is done with shutdown.
5162

5163
  """
5164
  HPATH = "instance-migrate"
5165
  HTYPE = constants.HTYPE_INSTANCE
5166
  _OP_REQP = ["instance_name", "live", "cleanup"]
5167

    
5168
  REQ_BGL = False
5169

    
5170
  def ExpandNames(self):
5171
    self._ExpandAndLockInstance()
5172

    
5173
    self.needed_locks[locking.LEVEL_NODE] = []
5174
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5175

    
5176
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5177
                                       self.op.live, self.op.cleanup)
5178
    self.tasklets = [self._migrater]
5179

    
5180
  def DeclareLocks(self, level):
5181
    if level == locking.LEVEL_NODE:
5182
      self._LockInstancesNodes()
5183

    
5184
  def BuildHooksEnv(self):
5185
    """Build hooks env.
5186

5187
    This runs on master, primary and secondary nodes of the instance.
5188

5189
    """
5190
    instance = self._migrater.instance
5191
    source_node = instance.primary_node
5192
    target_node = instance.secondary_nodes[0]
5193
    env = _BuildInstanceHookEnvByObject(self, instance)
5194
    env["MIGRATE_LIVE"] = self.op.live
5195
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5196
    env.update({
5197
        "OLD_PRIMARY": source_node,
5198
        "OLD_SECONDARY": target_node,
5199
        "NEW_PRIMARY": target_node,
5200
        "NEW_SECONDARY": source_node,
5201
        })
5202
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5203
    nl_post = list(nl)
5204
    nl_post.append(source_node)
5205
    return env, nl, nl_post
5206

    
5207

    
5208
class LUMoveInstance(LogicalUnit):
5209
  """Move an instance by data-copying.
5210

5211
  """
5212
  HPATH = "instance-move"
5213
  HTYPE = constants.HTYPE_INSTANCE
5214
  _OP_REQP = ["instance_name", "target_node"]
5215
  REQ_BGL = False
5216

    
5217
  def CheckArguments(self):
5218
    """Check the arguments.
5219

5220
    """
5221
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5222
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5223

    
5224
  def ExpandNames(self):
5225
    self._ExpandAndLockInstance()
5226
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5227
    self.op.target_node = target_node
5228
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5229
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5230

    
5231
  def DeclareLocks(self, level):
5232
    if level == locking.LEVEL_NODE:
5233
      self._LockInstancesNodes(primary_only=True)
5234

    
5235
  def BuildHooksEnv(self):
5236
    """Build hooks env.
5237

5238
    This runs on master, primary and secondary nodes of the instance.
5239

5240
    """
5241
    env = {
5242
      "TARGET_NODE": self.op.target_node,
5243
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5244
      }
5245
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5246
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5247
                                       self.op.target_node]
5248
    return env, nl, nl
5249

    
5250
  def CheckPrereq(self):
5251
    """Check prerequisites.
5252

5253
    This checks that the instance is in the cluster.
5254

5255
    """
5256
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5257
    assert self.instance is not None, \
5258
      "Cannot retrieve locked instance %s" % self.op.instance_name
5259

    
5260
    node = self.cfg.GetNodeInfo(self.op.target_node)
5261
    assert node is not None, \
5262
      "Cannot retrieve locked node %s" % self.op.target_node
5263

    
5264
    self.target_node = target_node = node.name
5265

    
5266
    if target_node == instance.primary_node:
5267
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5268
                                 (instance.name, target_node),
5269
                                 errors.ECODE_STATE)
5270

    
5271
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5272

    
5273
    for idx, dsk in enumerate(instance.disks):
5274
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5275
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5276
                                   " cannot copy" % idx, errors.ECODE_STATE)
5277

    
5278
    _CheckNodeOnline(self, target_node)
5279
    _CheckNodeNotDrained(self, target_node)
5280

    
5281
    if instance.admin_up:
5282
      # check memory requirements on the secondary node
5283
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5284
                           instance.name, bep[constants.BE_MEMORY],
5285
                           instance.hypervisor)
5286
    else:
5287
      self.LogInfo("Not checking memory on the secondary node as"
5288
                   " instance will not be started")
5289

    
5290
    # check bridge existance
5291
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5292

    
5293
  def Exec(self, feedback_fn):
5294
    """Move an instance.
5295

5296
    The move is done by shutting it down on its present node, copying
5297
    the data over (slow) and starting it on the new node.
5298

5299
    """
5300
    instance = self.instance
5301

    
5302
    source_node = instance.primary_node
5303
    target_node = self.target_node
5304

    
5305
    self.LogInfo("Shutting down instance %s on source node %s",
5306
                 instance.name, source_node)
5307

    
5308
    result = self.rpc.call_instance_shutdown(source_node, instance,
5309
                                             self.shutdown_timeout)
5310
    msg = result.fail_msg
5311
    if msg:
5312
      if self.op.ignore_consistency:
5313
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5314
                             " Proceeding anyway. Please make sure node"
5315
                             " %s is down. Error details: %s",
5316
                             instance.name, source_node, source_node, msg)
5317
      else:
5318
        raise errors.OpExecError("Could not shutdown instance %s on"
5319
                                 " node %s: %s" %
5320
                                 (instance.name, source_node, msg))
5321

    
5322
    # create the target disks
5323
    try:
5324
      _CreateDisks(self, instance, target_node=target_node)
5325
    except errors.OpExecError:
5326
      self.LogWarning("Device creation failed, reverting...")
5327
      try:
5328
        _RemoveDisks(self, instance, target_node=target_node)
5329
      finally:
5330
        self.cfg.ReleaseDRBDMinors(instance.name)
5331
        raise
5332

    
5333
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5334

    
5335
    errs = []
5336
    # activate, get path, copy the data over
5337
    for idx, disk in enumerate(instance.disks):
5338
      self.LogInfo("Copying data for disk %d", idx)
5339
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5340
                                               instance.name, True)
5341
      if result.fail_msg:
5342
        self.LogWarning("Can't assemble newly created disk %d: %s",
5343
                        idx, result.fail_msg)
5344
        errs.append(result.fail_msg)
5345
        break
5346
      dev_path = result.payload
5347
      result = self.rpc.call_blockdev_export(source_node, disk,
5348
                                             target_node, dev_path,
5349
                                             cluster_name)
5350
      if result.fail_msg:
5351
        self.LogWarning("Can't copy data over for disk %d: %s",
5352
                        idx, result.fail_msg)
5353
        errs.append(result.fail_msg)
5354
        break
5355

    
5356
    if errs:
5357
      self.LogWarning("Some disks failed to copy, aborting")
5358
      try:
5359
        _RemoveDisks(self, instance, target_node=target_node)
5360
      finally:
5361
        self.cfg.ReleaseDRBDMinors(instance.name)
5362
        raise errors.OpExecError("Errors during disk copy: %s" %
5363
                                 (",".join(errs),))
5364

    
5365
    instance.primary_node = target_node
5366
    self.cfg.Update(instance, feedback_fn)
5367

    
5368
    self.LogInfo("Removing the disks on the original node")
5369
    _RemoveDisks(self, instance, target_node=source_node)
5370

    
5371
    # Only start the instance if it's marked as up
5372
    if instance.admin_up:
5373
      self.LogInfo("Starting instance %s on node %s",
5374
                   instance.name, target_node)
5375

    
5376
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5377
                                           ignore_secondaries=True)
5378
      if not disks_ok:
5379
        _ShutdownInstanceDisks(self, instance)
5380
        raise errors.OpExecError("Can't activate the instance's disks")
5381

    
5382
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5383
      msg = result.fail_msg
5384
      if msg:
5385
        _ShutdownInstanceDisks(self, instance)
5386
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5387
                                 (instance.name, target_node, msg))
5388

    
5389

    
5390
class LUMigrateNode(LogicalUnit):
5391
  """Migrate all instances from a node.
5392

5393
  """
5394
  HPATH = "node-migrate"
5395
  HTYPE = constants.HTYPE_NODE
5396
  _OP_REQP = ["node_name", "live"]
5397
  REQ_BGL = False
5398

    
5399
  def ExpandNames(self):
5400
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5401

    
5402
    self.needed_locks = {
5403
      locking.LEVEL_NODE: [self.op.node_name],
5404
      }
5405

    
5406
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5407

    
5408
    # Create tasklets for migrating instances for all instances on this node
5409
    names = []
5410
    tasklets = []
5411

    
5412
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5413
      logging.debug("Migrating instance %s", inst.name)
5414
      names.append(inst.name)
5415

    
5416
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5417

    
5418
    self.tasklets = tasklets
5419

    
5420
    # Declare instance locks
5421
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5422

    
5423
  def DeclareLocks(self, level):
5424
    if level == locking.LEVEL_NODE:
5425
      self._LockInstancesNodes()
5426

    
5427
  def BuildHooksEnv(self):
5428
    """Build hooks env.
5429

5430
    This runs on the master, the primary and all the secondaries.
5431

5432
    """
5433
    env = {
5434
      "NODE_NAME": self.op.node_name,
5435
      }
5436

    
5437
    nl = [self.cfg.GetMasterNode()]
5438

    
5439
    return (env, nl, nl)
5440

    
5441

    
5442
class TLMigrateInstance(Tasklet):
5443
  def __init__(self, lu, instance_name, live, cleanup):
5444
    """Initializes this class.
5445

5446
    """
5447
    Tasklet.__init__(self, lu)
5448

    
5449
    # Parameters
5450
    self.instance_name = instance_name
5451
    self.live = live
5452
    self.cleanup = cleanup
5453

    
5454
  def CheckPrereq(self):
5455
    """Check prerequisites.
5456

5457
    This checks that the instance is in the cluster.
5458

5459
    """
5460
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5461
    instance = self.cfg.GetInstanceInfo(instance_name)
5462
    assert instance is not None
5463

    
5464
    if instance.disk_template != constants.DT_DRBD8:
5465
      raise errors.OpPrereqError("Instance's disk layout is not"
5466
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5467

    
5468
    secondary_nodes = instance.secondary_nodes
5469
    if not secondary_nodes:
5470
      raise errors.ConfigurationError("No secondary node but using"
5471
                                      " drbd8 disk template")
5472

    
5473
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5474

    
5475
    target_node = secondary_nodes[0]
5476
    # check memory requirements on the secondary node
5477
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5478
                         instance.name, i_be[constants.BE_MEMORY],
5479
                         instance.hypervisor)
5480

    
5481
    # check bridge existance
5482
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5483

    
5484
    if not self.cleanup:
5485
      _CheckNodeNotDrained(self.lu, target_node)
5486
      result = self.rpc.call_instance_migratable(instance.primary_node,
5487
                                                 instance)
5488
      result.Raise("Can't migrate, please use failover",
5489
                   prereq=True, ecode=errors.ECODE_STATE)
5490

    
5491
    self.instance = instance
5492

    
5493
  def _WaitUntilSync(self):
5494
    """Poll with custom rpc for disk sync.
5495

5496
    This uses our own step-based rpc call.
5497

5498
    """
5499
    self.feedback_fn("* wait until resync is done")
5500
    all_done = False
5501
    while not all_done:
5502
      all_done = True
5503
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5504
                                            self.nodes_ip,
5505
                                            self.instance.disks)
5506
      min_percent = 100
5507
      for node, nres in result.items():
5508
        nres.Raise("Cannot resync disks on node %s" % node)
5509
        node_done, node_percent = nres.payload
5510
        all_done = all_done and node_done
5511
        if node_percent is not None:
5512
          min_percent = min(min_percent, node_percent)
5513
      if not all_done:
5514
        if min_percent < 100:
5515
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5516
        time.sleep(2)
5517

    
5518
  def _EnsureSecondary(self, node):
5519
    """Demote a node to secondary.
5520

5521
    """
5522
    self.feedback_fn("* switching node %s to secondary mode" % node)
5523

    
5524
    for dev in self.instance.disks:
5525
      self.cfg.SetDiskID(dev, node)
5526

    
5527
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5528
                                          self.instance.disks)
5529
    result.Raise("Cannot change disk to secondary on node %s" % node)
5530

    
5531
  def _GoStandalone(self):
5532
    """Disconnect from the network.
5533

5534
    """
5535
    self.feedback_fn("* changing into standalone mode")
5536
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5537
                                               self.instance.disks)
5538
    for node, nres in result.items():
5539
      nres.Raise("Cannot disconnect disks node %s" % node)
5540

    
5541
  def _GoReconnect(self, multimaster):
5542
    """Reconnect to the network.
5543

5544
    """
5545
    if multimaster:
5546
      msg = "dual-master"
5547
    else:
5548
      msg = "single-master"
5549
    self.feedback_fn("* changing disks into %s mode" % msg)
5550
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5551
                                           self.instance.disks,
5552
                                           self.instance.name, multimaster)
5553
    for node, nres in result.items():
5554
      nres.Raise("Cannot change disks config on node %s" % node)
5555

    
5556
  def _ExecCleanup(self):
5557
    """Try to cleanup after a failed migration.
5558

5559
    The cleanup is done by:
5560
      - check that the instance is running only on one node
5561
        (and update the config if needed)
5562
      - change disks on its secondary node to secondary
5563
      - wait until disks are fully synchronized
5564
      - disconnect from the network
5565
      - change disks into single-master mode
5566
      - wait again until disks are fully synchronized
5567

5568
    """
5569
    instance = self.instance
5570
    target_node = self.target_node
5571
    source_node = self.source_node
5572

    
5573
    # check running on only one node
5574
    self.feedback_fn("* checking where the instance actually runs"
5575
                     " (if this hangs, the hypervisor might be in"
5576
                     " a bad state)")
5577
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5578
    for node, result in ins_l.items():
5579
      result.Raise("Can't contact node %s" % node)
5580

    
5581
    runningon_source = instance.name in ins_l[source_node].payload
5582
    runningon_target = instance.name in ins_l[target_node].payload
5583

    
5584
    if runningon_source and runningon_target:
5585
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5586
                               " or the hypervisor is confused. You will have"
5587
                               " to ensure manually that it runs only on one"
5588
                               " and restart this operation.")
5589

    
5590
    if not (runningon_source or runningon_target):
5591
      raise errors.OpExecError("Instance does not seem to be running at all."
5592
                               " In this case, it's safer to repair by"
5593
                               " running 'gnt-instance stop' to ensure disk"
5594
                               " shutdown, and then restarting it.")
5595

    
5596
    if runningon_target:
5597
      # the migration has actually succeeded, we need to update the config
5598
      self.feedback_fn("* instance running on secondary node (%s),"
5599
                       " updating config" % target_node)
5600
      instance.primary_node = target_node
5601
      self.cfg.Update(instance, self.feedback_fn)
5602
      demoted_node = source_node
5603
    else:
5604
      self.feedback_fn("* instance confirmed to be running on its"
5605
                       " primary node (%s)" % source_node)
5606
      demoted_node = target_node
5607

    
5608
    self._EnsureSecondary(demoted_node)
5609
    try:
5610
      self._WaitUntilSync()
5611
    except errors.OpExecError:
5612
      # we ignore here errors, since if the device is standalone, it
5613
      # won't be able to sync
5614
      pass
5615
    self._GoStandalone()
5616
    self._GoReconnect(False)
5617
    self._WaitUntilSync()
5618

    
5619
    self.feedback_fn("* done")
5620

    
5621
  def _RevertDiskStatus(self):
5622
    """Try to revert the disk status after a failed migration.
5623

5624
    """
5625
    target_node = self.target_node
5626
    try:
5627
      self._EnsureSecondary(target_node)
5628
      self._GoStandalone()
5629
      self._GoReconnect(False)
5630
      self._WaitUntilSync()
5631
    except errors.OpExecError, err:
5632
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5633
                         " drives: error '%s'\n"
5634
                         "Please look and recover the instance status" %
5635
                         str(err))
5636

    
5637
  def _AbortMigration(self):
5638
    """Call the hypervisor code to abort a started migration.
5639

5640
    """
5641
    instance = self.instance
5642
    target_node = self.target_node
5643
    migration_info = self.migration_info
5644

    
5645
    abort_result = self.rpc.call_finalize_migration(target_node,
5646
                                                    instance,
5647
                                                    migration_info,
5648
                                                    False)
5649
    abort_msg = abort_result.fail_msg
5650
    if abort_msg:
5651
      logging.error("Aborting migration failed on target node %s: %s",
5652
                    target_node, abort_msg)
5653
      # Don't raise an exception here, as we stil have to try to revert the
5654
      # disk status, even if this step failed.
5655

    
5656
  def _ExecMigration(self):
5657
    """Migrate an instance.
5658

5659
    The migrate is done by:
5660
      - change the disks into dual-master mode
5661
      - wait until disks are fully synchronized again
5662
      - migrate the instance
5663
      - change disks on the new secondary node (the old primary) to secondary
5664
      - wait until disks are fully synchronized
5665
      - change disks into single-master mode
5666

5667
    """
5668
    instance = self.instance
5669
    target_node = self.target_node
5670
    source_node = self.source_node
5671

    
5672
    self.feedback_fn("* checking disk consistency between source and target")
5673
    for dev in instance.disks:
5674
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5675
        raise errors.OpExecError("Disk %s is degraded or not fully"
5676
                                 " synchronized on target node,"
5677
                                 " aborting migrate." % dev.iv_name)
5678

    
5679
    # First get the migration information from the remote node
5680
    result = self.rpc.call_migration_info(source_node, instance)
5681
    msg = result.fail_msg
5682
    if msg:
5683
      log_err = ("Failed fetching source migration information from %s: %s" %
5684
                 (source_node, msg))
5685
      logging.error(log_err)
5686
      raise errors.OpExecError(log_err)
5687

    
5688
    self.migration_info = migration_info = result.payload
5689

    
5690
    # Then switch the disks to master/master mode
5691
    self._EnsureSecondary(target_node)
5692
    self._GoStandalone()
5693
    self._GoReconnect(True)
5694
    self._WaitUntilSync()
5695

    
5696
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5697
    result = self.rpc.call_accept_instance(target_node,
5698
                                           instance,
5699
                                           migration_info,
5700
                                           self.nodes_ip[target_node])
5701

    
5702
    msg = result.fail_msg
5703
    if msg:
5704
      logging.error("Instance pre-migration failed, trying to revert"
5705
                    " disk status: %s", msg)
5706
      self.feedback_fn("Pre-migration failed, aborting")
5707
      self._AbortMigration()
5708
      self._RevertDiskStatus()
5709
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5710
                               (instance.name, msg))
5711

    
5712
    self.feedback_fn("* migrating instance to %s" % target_node)
5713
    time.sleep(10)
5714
    result = self.rpc.call_instance_migrate(source_node, instance,
5715
                                            self.nodes_ip[target_node],
5716
                                            self.live)
5717
    msg = result.fail_msg
5718
    if msg:
5719
      logging.error("Instance migration failed, trying to revert"
5720
                    " disk status: %s", msg)
5721
      self.feedback_fn("Migration failed, aborting")
5722
      self._AbortMigration()
5723
      self._RevertDiskStatus()
5724
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5725
                               (instance.name, msg))
5726
    time.sleep(10)
5727

    
5728
    instance.primary_node = target_node
5729
    # distribute new instance config to the other nodes
5730
    self.cfg.Update(instance, self.feedback_fn)
5731

    
5732
    result = self.rpc.call_finalize_migration(target_node,
5733
                                              instance,
5734
                                              migration_info,
5735
                                              True)
5736
    msg = result.fail_msg
5737
    if msg:
5738
      logging.error("Instance migration succeeded, but finalization failed:"
5739
                    " %s", msg)
5740
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5741
                               msg)
5742

    
5743
    self._EnsureSecondary(source_node)
5744
    self._WaitUntilSync()
5745
    self._GoStandalone()
5746
    self._GoReconnect(False)
5747
    self._WaitUntilSync()
5748

    
5749
    self.feedback_fn("* done")
5750

    
5751
  def Exec(self, feedback_fn):
5752
    """Perform the migration.
5753

5754
    """
5755
    feedback_fn("Migrating instance %s" % self.instance.name)
5756

    
5757
    self.feedback_fn = feedback_fn
5758

    
5759
    self.source_node = self.instance.primary_node
5760
    self.target_node = self.instance.secondary_nodes[0]
5761
    self.all_nodes = [self.source_node, self.target_node]
5762
    self.nodes_ip = {
5763
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5764
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5765
      }
5766

    
5767
    if self.cleanup:
5768
      return self._ExecCleanup()
5769
    else:
5770
      return self._ExecMigration()
5771

    
5772

    
5773
def _CreateBlockDev(lu, node, instance, device, force_create,
5774
                    info, force_open):
5775
  """Create a tree of block devices on a given node.
5776

5777
  If this device type has to be created on secondaries, create it and
5778
  all its children.
5779

5780
  If not, just recurse to children keeping the same 'force' value.
5781

5782
  @param lu: the lu on whose behalf we execute
5783
  @param node: the node on which to create the device
5784
  @type instance: L{objects.Instance}
5785
  @param instance: the instance which owns the device
5786
  @type device: L{objects.Disk}
5787
  @param device: the device to create
5788
  @type force_create: boolean
5789
  @param force_create: whether to force creation of this device; this
5790
      will be change to True whenever we find a device which has
5791
      CreateOnSecondary() attribute
5792
  @param info: the extra 'metadata' we should attach to the device
5793
      (this will be represented as a LVM tag)
5794
  @type force_open: boolean
5795
  @param force_open: this parameter will be passes to the
5796
      L{backend.BlockdevCreate} function where it specifies
5797
      whether we run on primary or not, and it affects both
5798
      the child assembly and the device own Open() execution
5799

5800
  """
5801
  if device.CreateOnSecondary():
5802
    force_create = True
5803

    
5804
  if device.children:
5805
    for child in device.children:
5806
      _CreateBlockDev(lu, node, instance, child, force_create,
5807
                      info, force_open)
5808

    
5809
  if not force_create:
5810
    return
5811

    
5812
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5813

    
5814

    
5815
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5816
  """Create a single block device on a given node.
5817

5818
  This will not recurse over children of the device, so they must be
5819
  created in advance.
5820

5821
  @param lu: the lu on whose behalf we execute
5822
  @param node: the node on which to create the device
5823
  @type instance: L{objects.Instance}
5824
  @param instance: the instance which owns the device
5825
  @type device: L{objects.Disk}
5826
  @param device: the device to create
5827
  @param info: the extra 'metadata' we should attach to the device
5828
      (this will be represented as a LVM tag)
5829
  @type force_open: boolean
5830
  @param force_open: this parameter will be passes to the
5831
      L{backend.BlockdevCreate} function where it specifies
5832
      whether we run on primary or not, and it affects both
5833
      the child assembly and the device own Open() execution
5834

5835
  """
5836
  lu.cfg.SetDiskID(device, node)
5837
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5838
                                       instance.name, force_open, info)
5839
  result.Raise("Can't create block device %s on"
5840
               " node %s for instance %s" % (device, node, instance.name))
5841
  if device.physical_id is None:
5842
    device.physical_id = result.payload
5843

    
5844

    
5845
def _GenerateUniqueNames(lu, exts):
5846
  """Generate a suitable LV name.
5847

5848
  This will generate a logical volume name for the given instance.
5849

5850
  """
5851
  results = []
5852
  for val in exts:
5853
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5854
    results.append("%s%s" % (new_id, val))
5855
  return results
5856

    
5857

    
5858
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5859
                         p_minor, s_minor):
5860
  """Generate a drbd8 device complete with its children.
5861

5862
  """
5863
  port = lu.cfg.AllocatePort()
5864
  vgname = lu.cfg.GetVGName()
5865
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5866
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5867
                          logical_id=(vgname, names[0]))
5868
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5869
                          logical_id=(vgname, names[1]))
5870
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5871
                          logical_id=(primary, secondary, port,
5872
                                      p_minor, s_minor,
5873
                                      shared_secret),
5874
                          children=[dev_data, dev_meta],
5875
                          iv_name=iv_name)
5876
  return drbd_dev
5877

    
5878

    
5879
def _GenerateDiskTemplate(lu, template_name,
5880
                          instance_name, primary_node,
5881
                          secondary_nodes, disk_info,
5882
                          file_storage_dir, file_driver,
5883
                          base_index):
5884
  """Generate the entire disk layout for a given template type.
5885

5886
  """
5887
  #TODO: compute space requirements
5888

    
5889
  vgname = lu.cfg.GetVGName()
5890
  disk_count = len(disk_info)
5891
  disks = []
5892
  if template_name == constants.DT_DISKLESS:
5893
    pass
5894
  elif template_name == constants.DT_PLAIN:
5895
    if len(secondary_nodes) != 0:
5896
      raise errors.ProgrammerError("Wrong template configuration")
5897

    
5898
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5899
                                      for i in range(disk_count)])
5900
    for idx, disk in enumerate(disk_info):
5901
      disk_index = idx + base_index
5902
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5903
                              logical_id=(vgname, names[idx]),
5904
                              iv_name="disk/%d" % disk_index,
5905
                              mode=disk["mode"])
5906
      disks.append(disk_dev)
5907
  elif template_name == constants.DT_DRBD8:
5908
    if len(secondary_nodes) != 1:
5909
      raise errors.ProgrammerError("Wrong template configuration")
5910
    remote_node = secondary_nodes[0]
5911
    minors = lu.cfg.AllocateDRBDMinor(
5912
      [primary_node, remote_node] * len(disk_info), instance_name)
5913

    
5914
    names = []
5915
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5916
                                               for i in range(disk_count)]):
5917
      names.append(lv_prefix + "_data")
5918
      names.append(lv_prefix + "_meta")
5919
    for idx, disk in enumerate(disk_info):
5920
      disk_index = idx + base_index
5921
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5922
                                      disk["size"], names[idx*2:idx*2+2],
5923
                                      "disk/%d" % disk_index,
5924
                                      minors[idx*2], minors[idx*2+1])
5925
      disk_dev.mode = disk["mode"]
5926
      disks.append(disk_dev)
5927
  elif template_name == constants.DT_FILE:
5928
    if len(secondary_nodes) != 0:
5929
      raise errors.ProgrammerError("Wrong template configuration")
5930

    
5931
    _RequireFileStorage()
5932

    
5933
    for idx, disk in enumerate(disk_info):
5934
      disk_index = idx + base_index
5935
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5936
                              iv_name="disk/%d" % disk_index,
5937
                              logical_id=(file_driver,
5938
                                          "%s/disk%d" % (file_storage_dir,
5939
                                                         disk_index)),
5940
                              mode=disk["mode"])
5941
      disks.append(disk_dev)
5942
  else:
5943
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5944
  return disks
5945

    
5946

    
5947
def _GetInstanceInfoText(instance):
5948
  """Compute that text that should be added to the disk's metadata.
5949

5950
  """
5951
  return "originstname+%s" % instance.name
5952

    
5953

    
5954
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5955
  """Create all disks for an instance.
5956

5957
  This abstracts away some work from AddInstance.
5958

5959
  @type lu: L{LogicalUnit}
5960
  @param lu: the logical unit on whose behalf we execute
5961
  @type instance: L{objects.Instance}
5962
  @param instance: the instance whose disks we should create
5963
  @type to_skip: list
5964
  @param to_skip: list of indices to skip
5965
  @type target_node: string
5966
  @param target_node: if passed, overrides the target node for creation
5967
  @rtype: boolean
5968
  @return: the success of the creation
5969

5970
  """
5971
  info = _GetInstanceInfoText(instance)
5972
  if target_node is None:
5973
    pnode = instance.primary_node
5974
    all_nodes = instance.all_nodes
5975
  else:
5976
    pnode = target_node
5977
    all_nodes = [pnode]
5978

    
5979
  if instance.disk_template == constants.DT_FILE:
5980
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5981
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5982

    
5983
    result.Raise("Failed to create directory '%s' on"
5984
                 " node %s" % (file_storage_dir, pnode))
5985

    
5986
  # Note: this needs to be kept in sync with adding of disks in
5987
  # LUSetInstanceParams
5988
  for idx, device in enumerate(instance.disks):
5989
    if to_skip and idx in to_skip:
5990
      continue
5991
    logging.info("Creating volume %s for instance %s",
5992
                 device.iv_name, instance.name)
5993
    #HARDCODE
5994
    for node in all_nodes:
5995
      f_create = node == pnode
5996
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5997

    
5998

    
5999
def _RemoveDisks(lu, instance, target_node=None):
6000
  """Remove all disks for an instance.
6001

6002
  This abstracts away some work from `AddInstance()` and
6003
  `RemoveInstance()`. Note that in case some of the devices couldn't
6004
  be removed, the removal will continue with the other ones (compare
6005
  with `_CreateDisks()`).
6006

6007
  @type lu: L{LogicalUnit}
6008
  @param lu: the logical unit on whose behalf we execute
6009
  @type instance: L{objects.Instance}
6010
  @param instance: the instance whose disks we should remove
6011
  @type target_node: string
6012
  @param target_node: used to override the node on which to remove the disks
6013
  @rtype: boolean
6014
  @return: the success of the removal
6015

6016
  """
6017
  logging.info("Removing block devices for instance %s", instance.name)
6018

    
6019
  all_result = True
6020
  for device in instance.disks:
6021
    if target_node:
6022
      edata = [(target_node, device)]
6023
    else:
6024
      edata = device.ComputeNodeTree(instance.primary_node)
6025
    for node, disk in edata:
6026
      lu.cfg.SetDiskID(disk, node)
6027
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6028
      if msg:
6029
        lu.LogWarning("Could not remove block device %s on node %s,"
6030
                      " continuing anyway: %s", device.iv_name, node, msg)
6031
        all_result = False
6032

    
6033
  if instance.disk_template == constants.DT_FILE:
6034
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6035
    if target_node:
6036
      tgt = target_node
6037
    else:
6038
      tgt = instance.primary_node
6039
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6040
    if result.fail_msg:
6041
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6042
                    file_storage_dir, instance.primary_node, result.fail_msg)
6043
      all_result = False
6044

    
6045
  return all_result
6046

    
6047

    
6048
def _ComputeDiskSize(disk_template, disks):
6049
  """Compute disk size requirements in the volume group
6050

6051
  """
6052
  # Required free disk space as a function of disk and swap space
6053
  req_size_dict = {
6054
    constants.DT_DISKLESS: None,
6055
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6056
    # 128 MB are added for drbd metadata for each disk
6057
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6058
    constants.DT_FILE: None,
6059
  }
6060

    
6061
  if disk_template not in req_size_dict:
6062
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6063
                                 " is unknown" %  disk_template)
6064

    
6065
  return req_size_dict[disk_template]
6066

    
6067

    
6068
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6069
  """Hypervisor parameter validation.
6070

6071
  This function abstract the hypervisor parameter validation to be
6072
  used in both instance create and instance modify.
6073

6074
  @type lu: L{LogicalUnit}
6075
  @param lu: the logical unit for which we check
6076
  @type nodenames: list
6077
  @param nodenames: the list of nodes on which we should check
6078
  @type hvname: string
6079
  @param hvname: the name of the hypervisor we should use
6080
  @type hvparams: dict
6081
  @param hvparams: the parameters which we need to check
6082
  @raise errors.OpPrereqError: if the parameters are not valid
6083

6084
  """
6085
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6086
                                                  hvname,
6087
                                                  hvparams)
6088
  for node in nodenames:
6089
    info = hvinfo[node]
6090
    if info.offline:
6091
      continue
6092
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6093

    
6094

    
6095
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6096
  """OS parameters validation.
6097

6098
  @type lu: L{LogicalUnit}
6099
  @param lu: the logical unit for which we check
6100
  @type required: boolean
6101
  @param required: whether the validation should fail if the OS is not
6102
      found
6103
  @type nodenames: list
6104
  @param nodenames: the list of nodes on which we should check
6105
  @type osname: string
6106
  @param osname: the name of the hypervisor we should use
6107
  @type osparams: dict
6108
  @param osparams: the parameters which we need to check
6109
  @raise errors.OpPrereqError: if the parameters are not valid
6110

6111
  """
6112
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6113
                                   [constants.OS_VALIDATE_PARAMETERS],
6114
                                   osparams)
6115
  for node, nres in result.items():
6116
    # we don't check for offline cases since this should be run only
6117
    # against the master node and/or an instance's nodes
6118
    nres.Raise("OS Parameters validation failed on node %s" % node)
6119
    if not nres.payload:
6120
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6121
                 osname, node)
6122

    
6123

    
6124
class LUCreateInstance(LogicalUnit):
6125
  """Create an instance.
6126

6127
  """
6128
  HPATH = "instance-add"
6129
  HTYPE = constants.HTYPE_INSTANCE
6130
  _OP_REQP = ["instance_name", "disks",
6131
              "mode", "start",
6132
              "wait_for_sync", "ip_check", "nics",
6133
              "hvparams", "beparams"]
6134
  REQ_BGL = False
6135

    
6136
  def CheckArguments(self):
6137
    """Check arguments.
6138

6139
    """
6140
    # set optional parameters to none if they don't exist
6141
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6142
                 "disk_template", "identify_defaults"]:
6143
      if not hasattr(self.op, attr):
6144
        setattr(self.op, attr, None)
6145

    
6146
    # do not require name_check to ease forward/backward compatibility
6147
    # for tools
6148
    if not hasattr(self.op, "name_check"):
6149
      self.op.name_check = True
6150
    if not hasattr(self.op, "no_install"):
6151
      self.op.no_install = False
6152
    if self.op.no_install and self.op.start:
6153
      self.LogInfo("No-installation mode selected, disabling startup")
6154
      self.op.start = False
6155
    # validate/normalize the instance name
6156
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6157
    if self.op.ip_check and not self.op.name_check:
6158
      # TODO: make the ip check more flexible and not depend on the name check
6159
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6160
                                 errors.ECODE_INVAL)
6161

    
6162
    # check nics' parameter names
6163
    for nic in self.op.nics:
6164
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6165

    
6166
    # check disks. parameter names and consistent adopt/no-adopt strategy
6167
    has_adopt = has_no_adopt = False
6168
    for disk in self.op.disks:
6169
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6170
      if "adopt" in disk:
6171
        has_adopt = True
6172
      else:
6173
        has_no_adopt = True
6174
    if has_adopt and has_no_adopt:
6175
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6176
                                 errors.ECODE_INVAL)
6177
    if has_adopt:
6178
      if self.op.disk_template != constants.DT_PLAIN:
6179
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6180
                                   " 'plain' disk template",
6181
                                   errors.ECODE_INVAL)
6182
      if self.op.iallocator is not None:
6183
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6184
                                   " iallocator script", errors.ECODE_INVAL)
6185
      if self.op.mode == constants.INSTANCE_IMPORT:
6186
        raise errors.OpPrereqError("Disk adoption not allowed for"
6187
                                   " instance import", errors.ECODE_INVAL)
6188

    
6189
    self.adopt_disks = has_adopt
6190

    
6191
    # verify creation mode
6192
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6193
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6194
                                 self.op.mode, errors.ECODE_INVAL)
6195

    
6196
    # instance name verification
6197
    if self.op.name_check:
6198
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6199
      self.op.instance_name = self.hostname1.name
6200
      # used in CheckPrereq for ip ping check
6201
      self.check_ip = self.hostname1.ip
6202
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6203
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6204
                                 errors.ECODE_INVAL)
6205
    else:
6206
      self.check_ip = None
6207

    
6208
    # file storage checks
6209
    if (self.op.file_driver and
6210
        not self.op.file_driver in constants.FILE_DRIVER):
6211
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6212
                                 self.op.file_driver, errors.ECODE_INVAL)
6213

    
6214
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6215
      raise errors.OpPrereqError("File storage directory path not absolute",
6216
                                 errors.ECODE_INVAL)
6217

    
6218
    ### Node/iallocator related checks
6219
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6220
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6221
                                 " node must be given",
6222
                                 errors.ECODE_INVAL)
6223

    
6224
    self._cds = _GetClusterDomainSecret()
6225

    
6226
    if self.op.mode == constants.INSTANCE_IMPORT:
6227
      # On import force_variant must be True, because if we forced it at
6228
      # initial install, our only chance when importing it back is that it
6229
      # works again!
6230
      self.op.force_variant = True
6231

    
6232
      if self.op.no_install:
6233
        self.LogInfo("No-installation mode has no effect during import")
6234

    
6235
    elif self.op.mode == constants.INSTANCE_CREATE:
6236
      if getattr(self.op, "os_type", None) is None:
6237
        raise errors.OpPrereqError("No guest OS specified",
6238
                                   errors.ECODE_INVAL)
6239
      self.op.force_variant = getattr(self.op, "force_variant", False)
6240
      if self.op.disk_template is None:
6241
        raise errors.OpPrereqError("No disk template specified",
6242
                                   errors.ECODE_INVAL)
6243

    
6244
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6245
      # Check handshake to ensure both clusters have the same domain secret
6246
      src_handshake = getattr(self.op, "source_handshake", None)
6247
      if not src_handshake:
6248
        raise errors.OpPrereqError("Missing source handshake",
6249
                                   errors.ECODE_INVAL)
6250

    
6251
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6252
                                                           src_handshake)
6253
      if errmsg:
6254
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6255
                                   errors.ECODE_INVAL)
6256

    
6257
      # Load and check source CA
6258
      self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6259
      if not self.source_x509_ca_pem:
6260
        raise errors.OpPrereqError("Missing source X509 CA",
6261
                                   errors.ECODE_INVAL)
6262

    
6263
      try:
6264
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6265
                                                    self._cds)
6266
      except OpenSSL.crypto.Error, err:
6267
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6268
                                   (err, ), errors.ECODE_INVAL)
6269

    
6270
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6271
      if errcode is not None:
6272
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6273
                                   errors.ECODE_INVAL)
6274

    
6275
      self.source_x509_ca = cert
6276

    
6277
      src_instance_name = getattr(self.op, "source_instance_name", None)
6278
      if not src_instance_name:
6279
        raise errors.OpPrereqError("Missing source instance name",
6280
                                   errors.ECODE_INVAL)
6281

    
6282
      self.source_instance_name = \
6283
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6284

    
6285
    else:
6286
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6287
                                 self.op.mode, errors.ECODE_INVAL)
6288

    
6289
  def ExpandNames(self):
6290
    """ExpandNames for CreateInstance.
6291

6292
    Figure out the right locks for instance creation.
6293

6294
    """
6295
    self.needed_locks = {}
6296

    
6297
    instance_name = self.op.instance_name
6298
    # this is just a preventive check, but someone might still add this
6299
    # instance in the meantime, and creation will fail at lock-add time
6300
    if instance_name in self.cfg.GetInstanceList():
6301
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6302
                                 instance_name, errors.ECODE_EXISTS)
6303

    
6304
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6305

    
6306
    if self.op.iallocator:
6307
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6308
    else:
6309
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6310
      nodelist = [self.op.pnode]
6311
      if self.op.snode is not None:
6312
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6313
        nodelist.append(self.op.snode)
6314
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6315

    
6316
    # in case of import lock the source node too
6317
    if self.op.mode == constants.INSTANCE_IMPORT:
6318
      src_node = getattr(self.op, "src_node", None)
6319
      src_path = getattr(self.op, "src_path", None)
6320

    
6321
      if src_path is None:
6322
        self.op.src_path = src_path = self.op.instance_name
6323

    
6324
      if src_node is None:
6325
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6326
        self.op.src_node = None
6327
        if os.path.isabs(src_path):
6328
          raise errors.OpPrereqError("Importing an instance from an absolute"
6329
                                     " path requires a source node option.",
6330
                                     errors.ECODE_INVAL)
6331
      else:
6332
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6333
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6334
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6335
        if not os.path.isabs(src_path):
6336
          self.op.src_path = src_path = \
6337
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6338

    
6339
  def _RunAllocator(self):
6340
    """Run the allocator based on input opcode.
6341

6342
    """
6343
    nics = [n.ToDict() for n in self.nics]
6344
    ial = IAllocator(self.cfg, self.rpc,
6345
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6346
                     name=self.op.instance_name,
6347
                     disk_template=self.op.disk_template,
6348
                     tags=[],
6349
                     os=self.op.os_type,
6350
                     vcpus=self.be_full[constants.BE_VCPUS],
6351
                     mem_size=self.be_full[constants.BE_MEMORY],
6352
                     disks=self.disks,
6353
                     nics=nics,
6354
                     hypervisor=self.op.hypervisor,
6355
                     )
6356

    
6357
    ial.Run(self.op.iallocator)
6358

    
6359
    if not ial.success:
6360
      raise errors.OpPrereqError("Can't compute nodes using"
6361
                                 " iallocator '%s': %s" %
6362
                                 (self.op.iallocator, ial.info),
6363
                                 errors.ECODE_NORES)
6364
    if len(ial.result) != ial.required_nodes:
6365
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6366
                                 " of nodes (%s), required %s" %
6367
                                 (self.op.iallocator, len(ial.result),
6368
                                  ial.required_nodes), errors.ECODE_FAULT)
6369
    self.op.pnode = ial.result[0]
6370
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6371
                 self.op.instance_name, self.op.iallocator,
6372
                 utils.CommaJoin(ial.result))
6373
    if ial.required_nodes == 2:
6374
      self.op.snode = ial.result[1]
6375

    
6376
  def BuildHooksEnv(self):
6377
    """Build hooks env.
6378

6379
    This runs on master, primary and secondary nodes of the instance.
6380

6381
    """
6382
    env = {
6383
      "ADD_MODE": self.op.mode,
6384
      }
6385
    if self.op.mode == constants.INSTANCE_IMPORT:
6386
      env["SRC_NODE"] = self.op.src_node
6387
      env["SRC_PATH"] = self.op.src_path
6388
      env["SRC_IMAGES"] = self.src_images
6389

    
6390
    env.update(_BuildInstanceHookEnv(
6391
      name=self.op.instance_name,
6392
      primary_node=self.op.pnode,
6393
      secondary_nodes=self.secondaries,
6394
      status=self.op.start,
6395
      os_type=self.op.os_type,
6396
      memory=self.be_full[constants.BE_MEMORY],
6397
      vcpus=self.be_full[constants.BE_VCPUS],
6398
      nics=_NICListToTuple(self, self.nics),
6399
      disk_template=self.op.disk_template,
6400
      disks=[(d["size"], d["mode"]) for d in self.disks],
6401
      bep=self.be_full,
6402
      hvp=self.hv_full,
6403
      hypervisor_name=self.op.hypervisor,
6404
    ))
6405

    
6406
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6407
          self.secondaries)
6408
    return env, nl, nl
6409

    
6410
  def _ReadExportInfo(self):
6411
    """Reads the export information from disk.
6412

6413
    It will override the opcode source node and path with the actual
6414
    information, if these two were not specified before.
6415

6416
    @return: the export information
6417

6418
    """
6419
    assert self.op.mode == constants.INSTANCE_IMPORT
6420

    
6421
    src_node = self.op.src_node
6422
    src_path = self.op.src_path
6423

    
6424
    if src_node is None:
6425
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6426
      exp_list = self.rpc.call_export_list(locked_nodes)
6427
      found = False
6428
      for node in exp_list:
6429
        if exp_list[node].fail_msg:
6430
          continue
6431
        if src_path in exp_list[node].payload:
6432
          found = True
6433
          self.op.src_node = src_node = node
6434
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6435
                                                       src_path)
6436
          break
6437
      if not found:
6438
        raise errors.OpPrereqError("No export found for relative path %s" %
6439
                                    src_path, errors.ECODE_INVAL)
6440

    
6441
    _CheckNodeOnline(self, src_node)
6442
    result = self.rpc.call_export_info(src_node, src_path)
6443
    result.Raise("No export or invalid export found in dir %s" % src_path)
6444

    
6445
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6446
    if not export_info.has_section(constants.INISECT_EXP):
6447
      raise errors.ProgrammerError("Corrupted export config",
6448
                                   errors.ECODE_ENVIRON)
6449

    
6450
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6451
    if (int(ei_version) != constants.EXPORT_VERSION):
6452
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6453
                                 (ei_version, constants.EXPORT_VERSION),
6454
                                 errors.ECODE_ENVIRON)
6455
    return export_info
6456

    
6457
  def _ReadExportParams(self, einfo):
6458
    """Use export parameters as defaults.
6459

6460
    In case the opcode doesn't specify (as in override) some instance
6461
    parameters, then try to use them from the export information, if
6462
    that declares them.
6463

6464
    """
6465
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6466

    
6467
    if self.op.disk_template is None:
6468
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6469
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6470
                                          "disk_template")
6471
      else:
6472
        raise errors.OpPrereqError("No disk template specified and the export"
6473
                                   " is missing the disk_template information",
6474
                                   errors.ECODE_INVAL)
6475

    
6476
    if not self.op.disks:
6477
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6478
        disks = []
6479
        # TODO: import the disk iv_name too
6480
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6481
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6482
          disks.append({"size": disk_sz})
6483
        self.op.disks = disks
6484
      else:
6485
        raise errors.OpPrereqError("No disk info specified and the export"
6486
                                   " is missing the disk information",
6487
                                   errors.ECODE_INVAL)
6488

    
6489
    if (not self.op.nics and
6490
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6491
      nics = []
6492
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6493
        ndict = {}
6494
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6495
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6496
          ndict[name] = v
6497
        nics.append(ndict)
6498
      self.op.nics = nics
6499

    
6500
    if (self.op.hypervisor is None and
6501
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6502
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6503
    if einfo.has_section(constants.INISECT_HYP):
6504
      # use the export parameters but do not override the ones
6505
      # specified by the user
6506
      for name, value in einfo.items(constants.INISECT_HYP):
6507
        if name not in self.op.hvparams:
6508
          self.op.hvparams[name] = value
6509

    
6510
    if einfo.has_section(constants.INISECT_BEP):
6511
      # use the parameters, without overriding
6512
      for name, value in einfo.items(constants.INISECT_BEP):
6513
        if name not in self.op.beparams:
6514
          self.op.beparams[name] = value
6515
    else:
6516
      # try to read the parameters old style, from the main section
6517
      for name in constants.BES_PARAMETERS:
6518
        if (name not in self.op.beparams and
6519
            einfo.has_option(constants.INISECT_INS, name)):
6520
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6521

    
6522
  def _RevertToDefaults(self, cluster):
6523
    """Revert the instance parameters to the default values.
6524

6525
    """
6526
    # hvparams
6527
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6528
    for name in self.op.hvparams.keys():
6529
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6530
        del self.op.hvparams[name]
6531
    # beparams
6532
    be_defs = cluster.SimpleFillBE({})
6533
    for name in self.op.beparams.keys():
6534
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6535
        del self.op.beparams[name]
6536
    # nic params
6537
    nic_defs = cluster.SimpleFillNIC({})
6538
    for nic in self.op.nics:
6539
      for name in constants.NICS_PARAMETERS:
6540
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6541
          del nic[name]
6542

    
6543
  def CheckPrereq(self):
6544
    """Check prerequisites.
6545

6546
    """
6547
    if self.op.mode == constants.INSTANCE_IMPORT:
6548
      export_info = self._ReadExportInfo()
6549
      self._ReadExportParams(export_info)
6550

    
6551
    _CheckDiskTemplate(self.op.disk_template)
6552

    
6553
    if (not self.cfg.GetVGName() and
6554
        self.op.disk_template not in constants.DTS_NOT_LVM):
6555
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6556
                                 " instances", errors.ECODE_STATE)
6557

    
6558
    if self.op.hypervisor is None:
6559
      self.op.hypervisor = self.cfg.GetHypervisorType()
6560

    
6561
    cluster = self.cfg.GetClusterInfo()
6562
    enabled_hvs = cluster.enabled_hypervisors
6563
    if self.op.hypervisor not in enabled_hvs:
6564
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6565
                                 " cluster (%s)" % (self.op.hypervisor,
6566
                                  ",".join(enabled_hvs)),
6567
                                 errors.ECODE_STATE)
6568

    
6569
    # check hypervisor parameter syntax (locally)
6570
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6571
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6572
                                      self.op.hvparams)
6573
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6574
    hv_type.CheckParameterSyntax(filled_hvp)
6575
    self.hv_full = filled_hvp
6576
    # check that we don't specify global parameters on an instance
6577
    _CheckGlobalHvParams(self.op.hvparams)
6578

    
6579
    # fill and remember the beparams dict
6580
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6581
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6582

    
6583
    # now that hvp/bep are in final format, let's reset to defaults,
6584
    # if told to do so
6585
    if self.op.identify_defaults:
6586
      self._RevertToDefaults(cluster)
6587

    
6588
    # NIC buildup
6589
    self.nics = []
6590
    for idx, nic in enumerate(self.op.nics):
6591
      nic_mode_req = nic.get("mode", None)
6592
      nic_mode = nic_mode_req
6593
      if nic_mode is None:
6594
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6595

    
6596
      # in routed mode, for the first nic, the default ip is 'auto'
6597
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6598
        default_ip_mode = constants.VALUE_AUTO
6599
      else:
6600
        default_ip_mode = constants.VALUE_NONE
6601

    
6602
      # ip validity checks
6603
      ip = nic.get("ip", default_ip_mode)
6604
      if ip is None or ip.lower() == constants.VALUE_NONE:
6605
        nic_ip = None
6606
      elif ip.lower() == constants.VALUE_AUTO:
6607
        if not self.op.name_check:
6608
          raise errors.OpPrereqError("IP address set to auto but name checks"
6609
                                     " have been skipped. Aborting.",
6610
                                     errors.ECODE_INVAL)
6611
        nic_ip = self.hostname1.ip
6612
      else:
6613
        if not utils.IsValidIP(ip):
6614
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6615
                                     " like a valid IP" % ip,
6616
                                     errors.ECODE_INVAL)
6617
        nic_ip = ip
6618

    
6619
      # TODO: check the ip address for uniqueness
6620
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6621
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6622
                                   errors.ECODE_INVAL)
6623

    
6624
      # MAC address verification
6625
      mac = nic.get("mac", constants.VALUE_AUTO)
6626
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6627
        mac = utils.NormalizeAndValidateMac(mac)
6628

    
6629
        try:
6630
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6631
        except errors.ReservationError:
6632
          raise errors.OpPrereqError("MAC address %s already in use"
6633
                                     " in cluster" % mac,
6634
                                     errors.ECODE_NOTUNIQUE)
6635

    
6636
      # bridge verification
6637
      bridge = nic.get("bridge", None)
6638
      link = nic.get("link", None)
6639
      if bridge and link:
6640
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6641
                                   " at the same time", errors.ECODE_INVAL)
6642
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6643
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6644
                                   errors.ECODE_INVAL)
6645
      elif bridge:
6646
        link = bridge
6647

    
6648
      nicparams = {}
6649
      if nic_mode_req:
6650
        nicparams[constants.NIC_MODE] = nic_mode_req
6651
      if link:
6652
        nicparams[constants.NIC_LINK] = link
6653

    
6654
      check_params = cluster.SimpleFillNIC(nicparams)
6655
      objects.NIC.CheckParameterSyntax(check_params)
6656
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6657

    
6658
    # disk checks/pre-build
6659
    self.disks = []
6660
    for disk in self.op.disks:
6661
      mode = disk.get("mode", constants.DISK_RDWR)
6662
      if mode not in constants.DISK_ACCESS_SET:
6663
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6664
                                   mode, errors.ECODE_INVAL)
6665
      size = disk.get("size", None)
6666
      if size is None:
6667
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6668
      try:
6669
        size = int(size)
6670
      except (TypeError, ValueError):
6671
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6672
                                   errors.ECODE_INVAL)
6673
      new_disk = {"size": size, "mode": mode}
6674
      if "adopt" in disk:
6675
        new_disk["adopt"] = disk["adopt"]
6676
      self.disks.append(new_disk)
6677

    
6678
    if self.op.mode == constants.INSTANCE_IMPORT:
6679

    
6680
      # Check that the new instance doesn't have less disks than the export
6681
      instance_disks = len(self.disks)
6682
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6683
      if instance_disks < export_disks:
6684
        raise errors.OpPrereqError("Not enough disks to import."
6685
                                   " (instance: %d, export: %d)" %
6686
                                   (instance_disks, export_disks),
6687
                                   errors.ECODE_INVAL)
6688

    
6689
      disk_images = []
6690
      for idx in range(export_disks):
6691
        option = 'disk%d_dump' % idx
6692
        if export_info.has_option(constants.INISECT_INS, option):
6693
          # FIXME: are the old os-es, disk sizes, etc. useful?
6694
          export_name = export_info.get(constants.INISECT_INS, option)
6695
          image = utils.PathJoin(self.op.src_path, export_name)
6696
          disk_images.append(image)
6697
        else:
6698
          disk_images.append(False)
6699

    
6700
      self.src_images = disk_images
6701

    
6702
      old_name = export_info.get(constants.INISECT_INS, 'name')
6703
      try:
6704
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6705
      except (TypeError, ValueError), err:
6706
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6707
                                   " an integer: %s" % str(err),
6708
                                   errors.ECODE_STATE)
6709
      if self.op.instance_name == old_name:
6710
        for idx, nic in enumerate(self.nics):
6711
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6712
            nic_mac_ini = 'nic%d_mac' % idx
6713
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6714

    
6715
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6716

    
6717
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6718
    if self.op.ip_check:
6719
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6720
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6721
                                   (self.check_ip, self.op.instance_name),
6722
                                   errors.ECODE_NOTUNIQUE)
6723

    
6724
    #### mac address generation
6725
    # By generating here the mac address both the allocator and the hooks get
6726
    # the real final mac address rather than the 'auto' or 'generate' value.
6727
    # There is a race condition between the generation and the instance object
6728
    # creation, which means that we know the mac is valid now, but we're not
6729
    # sure it will be when we actually add the instance. If things go bad
6730
    # adding the instance will abort because of a duplicate mac, and the
6731
    # creation job will fail.
6732
    for nic in self.nics:
6733
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6734
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6735

    
6736
    #### allocator run
6737

    
6738
    if self.op.iallocator is not None:
6739
      self._RunAllocator()
6740

    
6741
    #### node related checks
6742

    
6743
    # check primary node
6744
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6745
    assert self.pnode is not None, \
6746
      "Cannot retrieve locked node %s" % self.op.pnode
6747
    if pnode.offline:
6748
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6749
                                 pnode.name, errors.ECODE_STATE)
6750
    if pnode.drained:
6751
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6752
                                 pnode.name, errors.ECODE_STATE)
6753

    
6754
    self.secondaries = []
6755

    
6756
    # mirror node verification
6757
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6758
      if self.op.snode is None:
6759
        raise errors.OpPrereqError("The networked disk templates need"
6760
                                   " a mirror node", errors.ECODE_INVAL)
6761
      if self.op.snode == pnode.name:
6762
        raise errors.OpPrereqError("The secondary node cannot be the"
6763
                                   " primary node.", errors.ECODE_INVAL)
6764
      _CheckNodeOnline(self, self.op.snode)
6765
      _CheckNodeNotDrained(self, self.op.snode)
6766
      self.secondaries.append(self.op.snode)
6767

    
6768
    nodenames = [pnode.name] + self.secondaries
6769

    
6770
    req_size = _ComputeDiskSize(self.op.disk_template,
6771
                                self.disks)
6772

    
6773
    # Check lv size requirements, if not adopting
6774
    if req_size is not None and not self.adopt_disks:
6775
      _CheckNodesFreeDisk(self, nodenames, req_size)
6776

    
6777
    if self.adopt_disks: # instead, we must check the adoption data
6778
      all_lvs = set([i["adopt"] for i in self.disks])
6779
      if len(all_lvs) != len(self.disks):
6780
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6781
                                   errors.ECODE_INVAL)
6782
      for lv_name in all_lvs:
6783
        try:
6784
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6785
        except errors.ReservationError:
6786
          raise errors.OpPrereqError("LV named %s used by another instance" %
6787
                                     lv_name, errors.ECODE_NOTUNIQUE)
6788

    
6789
      node_lvs = self.rpc.call_lv_list([pnode.name],
6790
                                       self.cfg.GetVGName())[pnode.name]
6791
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6792
      node_lvs = node_lvs.payload
6793
      delta = all_lvs.difference(node_lvs.keys())
6794
      if delta:
6795
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6796
                                   utils.CommaJoin(delta),
6797
                                   errors.ECODE_INVAL)
6798
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6799
      if online_lvs:
6800
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6801
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6802
                                   errors.ECODE_STATE)
6803
      # update the size of disk based on what is found
6804
      for dsk in self.disks:
6805
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6806

    
6807
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6808

    
6809
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6810

    
6811
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6812

    
6813
    # memory check on primary node
6814
    if self.op.start:
6815
      _CheckNodeFreeMemory(self, self.pnode.name,
6816
                           "creating instance %s" % self.op.instance_name,
6817
                           self.be_full[constants.BE_MEMORY],
6818
                           self.op.hypervisor)
6819

    
6820
    self.dry_run_result = list(nodenames)
6821

    
6822
  def Exec(self, feedback_fn):
6823
    """Create and add the instance to the cluster.
6824

6825
    """
6826
    instance = self.op.instance_name
6827
    pnode_name = self.pnode.name
6828

    
6829
    ht_kind = self.op.hypervisor
6830
    if ht_kind in constants.HTS_REQ_PORT:
6831
      network_port = self.cfg.AllocatePort()
6832
    else:
6833
      network_port = None
6834

    
6835
    if constants.ENABLE_FILE_STORAGE:
6836
      # this is needed because os.path.join does not accept None arguments
6837
      if self.op.file_storage_dir is None:
6838
        string_file_storage_dir = ""
6839
      else:
6840
        string_file_storage_dir = self.op.file_storage_dir
6841

    
6842
      # build the full file storage dir path
6843
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6844
                                        string_file_storage_dir, instance)
6845
    else:
6846
      file_storage_dir = ""
6847

    
6848
    disks = _GenerateDiskTemplate(self,
6849
                                  self.op.disk_template,
6850
                                  instance, pnode_name,
6851
                                  self.secondaries,
6852
                                  self.disks,
6853
                                  file_storage_dir,
6854
                                  self.op.file_driver,
6855
                                  0)
6856

    
6857
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6858
                            primary_node=pnode_name,
6859
                            nics=self.nics, disks=disks,
6860
                            disk_template=self.op.disk_template,
6861
                            admin_up=False,
6862
                            network_port=network_port,
6863
                            beparams=self.op.beparams,
6864
                            hvparams=self.op.hvparams,
6865
                            hypervisor=self.op.hypervisor,
6866
                            )
6867

    
6868
    if self.adopt_disks:
6869
      # rename LVs to the newly-generated names; we need to construct
6870
      # 'fake' LV disks with the old data, plus the new unique_id
6871
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6872
      rename_to = []
6873
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6874
        rename_to.append(t_dsk.logical_id)
6875
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6876
        self.cfg.SetDiskID(t_dsk, pnode_name)
6877
      result = self.rpc.call_blockdev_rename(pnode_name,
6878
                                             zip(tmp_disks, rename_to))
6879
      result.Raise("Failed to rename adoped LVs")
6880
    else:
6881
      feedback_fn("* creating instance disks...")
6882
      try:
6883
        _CreateDisks(self, iobj)
6884
      except errors.OpExecError:
6885
        self.LogWarning("Device creation failed, reverting...")
6886
        try:
6887
          _RemoveDisks(self, iobj)
6888
        finally:
6889
          self.cfg.ReleaseDRBDMinors(instance)
6890
          raise
6891

    
6892
    feedback_fn("adding instance %s to cluster config" % instance)
6893

    
6894
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6895

    
6896
    # Declare that we don't want to remove the instance lock anymore, as we've
6897
    # added the instance to the config
6898
    del self.remove_locks[locking.LEVEL_INSTANCE]
6899
    # Unlock all the nodes
6900
    if self.op.mode == constants.INSTANCE_IMPORT:
6901
      nodes_keep = [self.op.src_node]
6902
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6903
                       if node != self.op.src_node]
6904
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6905
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6906
    else:
6907
      self.context.glm.release(locking.LEVEL_NODE)
6908
      del self.acquired_locks[locking.LEVEL_NODE]
6909

    
6910
    if self.op.wait_for_sync:
6911
      disk_abort = not _WaitForSync(self, iobj)
6912
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6913
      # make sure the disks are not degraded (still sync-ing is ok)
6914
      time.sleep(15)
6915
      feedback_fn("* checking mirrors status")
6916
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6917
    else:
6918
      disk_abort = False
6919

    
6920
    if disk_abort:
6921
      _RemoveDisks(self, iobj)
6922
      self.cfg.RemoveInstance(iobj.name)
6923
      # Make sure the instance lock gets removed
6924
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6925
      raise errors.OpExecError("There are some degraded disks for"
6926
                               " this instance")
6927

    
6928
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6929
      if self.op.mode == constants.INSTANCE_CREATE:
6930
        if not self.op.no_install:
6931
          feedback_fn("* running the instance OS create scripts...")
6932
          # FIXME: pass debug option from opcode to backend
6933
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6934
                                                 self.op.debug_level)
6935
          result.Raise("Could not add os for instance %s"
6936
                       " on node %s" % (instance, pnode_name))
6937

    
6938
      elif self.op.mode == constants.INSTANCE_IMPORT:
6939
        feedback_fn("* running the instance OS import scripts...")
6940

    
6941
        transfers = []
6942

    
6943
        for idx, image in enumerate(self.src_images):
6944
          if not image:
6945
            continue
6946

    
6947
          # FIXME: pass debug option from opcode to backend
6948
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6949
                                             constants.IEIO_FILE, (image, ),
6950
                                             constants.IEIO_SCRIPT,
6951
                                             (iobj.disks[idx], idx),
6952
                                             None)
6953
          transfers.append(dt)
6954

    
6955
        import_result = \
6956
          masterd.instance.TransferInstanceData(self, feedback_fn,
6957
                                                self.op.src_node, pnode_name,
6958
                                                self.pnode.secondary_ip,
6959
                                                iobj, transfers)
6960
        if not compat.all(import_result):
6961
          self.LogWarning("Some disks for instance %s on node %s were not"
6962
                          " imported successfully" % (instance, pnode_name))
6963

    
6964
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6965
        feedback_fn("* preparing remote import...")
6966
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
6967
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6968

    
6969
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
6970
                                                     self.source_x509_ca,
6971
                                                     self._cds, timeouts)
6972
        if not compat.all(disk_results):
6973
          # TODO: Should the instance still be started, even if some disks
6974
          # failed to import (valid for local imports, too)?
6975
          self.LogWarning("Some disks for instance %s on node %s were not"
6976
                          " imported successfully" % (instance, pnode_name))
6977

    
6978
        # Run rename script on newly imported instance
6979
        assert iobj.name == instance
6980
        feedback_fn("Running rename script for %s" % instance)
6981
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
6982
                                                   self.source_instance_name,
6983
                                                   self.op.debug_level)
6984
        if result.fail_msg:
6985
          self.LogWarning("Failed to run rename script for %s on node"
6986
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
6987

    
6988
      else:
6989
        # also checked in the prereq part
6990
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6991
                                     % self.op.mode)
6992

    
6993
    if self.op.start:
6994
      iobj.admin_up = True
6995
      self.cfg.Update(iobj, feedback_fn)
6996
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6997
      feedback_fn("* starting instance...")
6998
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6999
      result.Raise("Could not start instance")
7000

    
7001
    return list(iobj.all_nodes)
7002

    
7003

    
7004
class LUConnectConsole(NoHooksLU):
7005
  """Connect to an instance's console.
7006

7007
  This is somewhat special in that it returns the command line that
7008
  you need to run on the master node in order to connect to the
7009
  console.
7010

7011
  """
7012
  _OP_REQP = ["instance_name"]
7013
  REQ_BGL = False
7014

    
7015
  def ExpandNames(self):
7016
    self._ExpandAndLockInstance()
7017

    
7018
  def CheckPrereq(self):
7019
    """Check prerequisites.
7020

7021
    This checks that the instance is in the cluster.
7022

7023
    """
7024
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7025
    assert self.instance is not None, \
7026
      "Cannot retrieve locked instance %s" % self.op.instance_name
7027
    _CheckNodeOnline(self, self.instance.primary_node)
7028

    
7029
  def Exec(self, feedback_fn):
7030
    """Connect to the console of an instance
7031

7032
    """
7033
    instance = self.instance
7034
    node = instance.primary_node
7035

    
7036
    node_insts = self.rpc.call_instance_list([node],
7037
                                             [instance.hypervisor])[node]
7038
    node_insts.Raise("Can't get node information from %s" % node)
7039

    
7040
    if instance.name not in node_insts.payload:
7041
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7042

    
7043
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7044

    
7045
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7046
    cluster = self.cfg.GetClusterInfo()
7047
    # beparams and hvparams are passed separately, to avoid editing the
7048
    # instance and then saving the defaults in the instance itself.
7049
    hvparams = cluster.FillHV(instance)
7050
    beparams = cluster.FillBE(instance)
7051
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7052

    
7053
    # build ssh cmdline
7054
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7055

    
7056

    
7057
class LUReplaceDisks(LogicalUnit):
7058
  """Replace the disks of an instance.
7059

7060
  """
7061
  HPATH = "mirrors-replace"
7062
  HTYPE = constants.HTYPE_INSTANCE
7063
  _OP_REQP = ["instance_name", "mode", "disks"]
7064
  REQ_BGL = False
7065

    
7066
  def CheckArguments(self):
7067
    if not hasattr(self.op, "remote_node"):
7068
      self.op.remote_node = None
7069
    if not hasattr(self.op, "iallocator"):
7070
      self.op.iallocator = None
7071
    if not hasattr(self.op, "early_release"):
7072
      self.op.early_release = False
7073

    
7074
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7075
                                  self.op.iallocator)
7076

    
7077
  def ExpandNames(self):
7078
    self._ExpandAndLockInstance()
7079

    
7080
    if self.op.iallocator is not None:
7081
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7082

    
7083
    elif self.op.remote_node is not None:
7084
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7085
      self.op.remote_node = remote_node
7086

    
7087
      # Warning: do not remove the locking of the new secondary here
7088
      # unless DRBD8.AddChildren is changed to work in parallel;
7089
      # currently it doesn't since parallel invocations of
7090
      # FindUnusedMinor will conflict
7091
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7092
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7093

    
7094
    else:
7095
      self.needed_locks[locking.LEVEL_NODE] = []
7096
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7097

    
7098
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7099
                                   self.op.iallocator, self.op.remote_node,
7100
                                   self.op.disks, False, self.op.early_release)
7101

    
7102
    self.tasklets = [self.replacer]
7103

    
7104
  def DeclareLocks(self, level):
7105
    # If we're not already locking all nodes in the set we have to declare the
7106
    # instance's primary/secondary nodes.
7107
    if (level == locking.LEVEL_NODE and
7108
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7109
      self._LockInstancesNodes()
7110

    
7111
  def BuildHooksEnv(self):
7112
    """Build hooks env.
7113

7114
    This runs on the master, the primary and all the secondaries.
7115

7116
    """
7117
    instance = self.replacer.instance
7118
    env = {
7119
      "MODE": self.op.mode,
7120
      "NEW_SECONDARY": self.op.remote_node,
7121
      "OLD_SECONDARY": instance.secondary_nodes[0],
7122
      }
7123
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7124
    nl = [
7125
      self.cfg.GetMasterNode(),
7126
      instance.primary_node,
7127
      ]
7128
    if self.op.remote_node is not None:
7129
      nl.append(self.op.remote_node)
7130
    return env, nl, nl
7131

    
7132

    
7133
class LUEvacuateNode(LogicalUnit):
7134
  """Relocate the secondary instances from a node.
7135

7136
  """
7137
  HPATH = "node-evacuate"
7138
  HTYPE = constants.HTYPE_NODE
7139
  _OP_REQP = ["node_name"]
7140
  REQ_BGL = False
7141

    
7142
  def CheckArguments(self):
7143
    if not hasattr(self.op, "remote_node"):
7144
      self.op.remote_node = None
7145
    if not hasattr(self.op, "iallocator"):
7146
      self.op.iallocator = None
7147
    if not hasattr(self.op, "early_release"):
7148
      self.op.early_release = False
7149

    
7150
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7151
                                  self.op.remote_node,
7152
                                  self.op.iallocator)
7153

    
7154
  def ExpandNames(self):
7155
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7156

    
7157
    self.needed_locks = {}
7158

    
7159
    # Declare node locks
7160
    if self.op.iallocator is not None:
7161
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7162

    
7163
    elif self.op.remote_node is not None:
7164
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7165

    
7166
      # Warning: do not remove the locking of the new secondary here
7167
      # unless DRBD8.AddChildren is changed to work in parallel;
7168
      # currently it doesn't since parallel invocations of
7169
      # FindUnusedMinor will conflict
7170
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7171
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7172

    
7173
    else:
7174
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7175

    
7176
    # Create tasklets for replacing disks for all secondary instances on this
7177
    # node
7178
    names = []
7179
    tasklets = []
7180

    
7181
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7182
      logging.debug("Replacing disks for instance %s", inst.name)
7183
      names.append(inst.name)
7184

    
7185
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7186
                                self.op.iallocator, self.op.remote_node, [],
7187
                                True, self.op.early_release)
7188
      tasklets.append(replacer)
7189

    
7190
    self.tasklets = tasklets
7191
    self.instance_names = names
7192

    
7193
    # Declare instance locks
7194
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7195

    
7196
  def DeclareLocks(self, level):
7197
    # If we're not already locking all nodes in the set we have to declare the
7198
    # instance's primary/secondary nodes.
7199
    if (level == locking.LEVEL_NODE and
7200
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7201
      self._LockInstancesNodes()
7202

    
7203
  def BuildHooksEnv(self):
7204
    """Build hooks env.
7205

7206
    This runs on the master, the primary and all the secondaries.
7207

7208
    """
7209
    env = {
7210
      "NODE_NAME": self.op.node_name,
7211
      }
7212

    
7213
    nl = [self.cfg.GetMasterNode()]
7214

    
7215
    if self.op.remote_node is not None:
7216
      env["NEW_SECONDARY"] = self.op.remote_node
7217
      nl.append(self.op.remote_node)
7218

    
7219
    return (env, nl, nl)
7220

    
7221

    
7222
class TLReplaceDisks(Tasklet):
7223
  """Replaces disks for an instance.
7224

7225
  Note: Locking is not within the scope of this class.
7226

7227
  """
7228
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7229
               disks, delay_iallocator, early_release):
7230
    """Initializes this class.
7231

7232
    """
7233
    Tasklet.__init__(self, lu)
7234

    
7235
    # Parameters
7236
    self.instance_name = instance_name
7237
    self.mode = mode
7238
    self.iallocator_name = iallocator_name
7239
    self.remote_node = remote_node
7240
    self.disks = disks
7241
    self.delay_iallocator = delay_iallocator
7242
    self.early_release = early_release
7243

    
7244
    # Runtime data
7245
    self.instance = None
7246
    self.new_node = None
7247
    self.target_node = None
7248
    self.other_node = None
7249
    self.remote_node_info = None
7250
    self.node_secondary_ip = None
7251

    
7252
  @staticmethod
7253
  def CheckArguments(mode, remote_node, iallocator):
7254
    """Helper function for users of this class.
7255

7256
    """
7257
    # check for valid parameter combination
7258
    if mode == constants.REPLACE_DISK_CHG:
7259
      if remote_node is None and iallocator is None:
7260
        raise errors.OpPrereqError("When changing the secondary either an"
7261
                                   " iallocator script must be used or the"
7262
                                   " new node given", errors.ECODE_INVAL)
7263

    
7264
      if remote_node is not None and iallocator is not None:
7265
        raise errors.OpPrereqError("Give either the iallocator or the new"
7266
                                   " secondary, not both", errors.ECODE_INVAL)
7267

    
7268
    elif remote_node is not None or iallocator is not None:
7269
      # Not replacing the secondary
7270
      raise errors.OpPrereqError("The iallocator and new node options can"
7271
                                 " only be used when changing the"
7272
                                 " secondary node", errors.ECODE_INVAL)
7273

    
7274
  @staticmethod
7275
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7276
    """Compute a new secondary node using an IAllocator.
7277

7278
    """
7279
    ial = IAllocator(lu.cfg, lu.rpc,
7280
                     mode=constants.IALLOCATOR_MODE_RELOC,
7281
                     name=instance_name,
7282
                     relocate_from=relocate_from)
7283

    
7284
    ial.Run(iallocator_name)
7285

    
7286
    if not ial.success:
7287
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7288
                                 " %s" % (iallocator_name, ial.info),
7289
                                 errors.ECODE_NORES)
7290

    
7291
    if len(ial.result) != ial.required_nodes:
7292
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7293
                                 " of nodes (%s), required %s" %
7294
                                 (iallocator_name,
7295
                                  len(ial.result), ial.required_nodes),
7296
                                 errors.ECODE_FAULT)
7297

    
7298
    remote_node_name = ial.result[0]
7299

    
7300
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7301
               instance_name, remote_node_name)
7302

    
7303
    return remote_node_name
7304

    
7305
  def _FindFaultyDisks(self, node_name):
7306
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7307
                                    node_name, True)
7308

    
7309
  def CheckPrereq(self):
7310
    """Check prerequisites.
7311

7312
    This checks that the instance is in the cluster.
7313

7314
    """
7315
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7316
    assert instance is not None, \
7317
      "Cannot retrieve locked instance %s" % self.instance_name
7318

    
7319
    if instance.disk_template != constants.DT_DRBD8:
7320
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7321
                                 " instances", errors.ECODE_INVAL)
7322

    
7323
    if len(instance.secondary_nodes) != 1:
7324
      raise errors.OpPrereqError("The instance has a strange layout,"
7325
                                 " expected one secondary but found %d" %
7326
                                 len(instance.secondary_nodes),
7327
                                 errors.ECODE_FAULT)
7328

    
7329
    if not self.delay_iallocator:
7330
      self._CheckPrereq2()
7331

    
7332
  def _CheckPrereq2(self):
7333
    """Check prerequisites, second part.
7334

7335
    This function should always be part of CheckPrereq. It was separated and is
7336
    now called from Exec because during node evacuation iallocator was only
7337
    called with an unmodified cluster model, not taking planned changes into
7338
    account.
7339

7340
    """
7341
    instance = self.instance
7342
    secondary_node = instance.secondary_nodes[0]
7343

    
7344
    if self.iallocator_name is None:
7345
      remote_node = self.remote_node
7346
    else:
7347
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7348
                                       instance.name, instance.secondary_nodes)
7349

    
7350
    if remote_node is not None:
7351
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7352
      assert self.remote_node_info is not None, \
7353
        "Cannot retrieve locked node %s" % remote_node
7354
    else:
7355
      self.remote_node_info = None
7356

    
7357
    if remote_node == self.instance.primary_node:
7358
      raise errors.OpPrereqError("The specified node is the primary node of"
7359
                                 " the instance.", errors.ECODE_INVAL)
7360

    
7361
    if remote_node == secondary_node:
7362
      raise errors.OpPrereqError("The specified node is already the"
7363
                                 " secondary node of the instance.",
7364
                                 errors.ECODE_INVAL)
7365

    
7366
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7367
                                    constants.REPLACE_DISK_CHG):
7368
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7369
                                 errors.ECODE_INVAL)
7370

    
7371
    if self.mode == constants.REPLACE_DISK_AUTO:
7372
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7373
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7374

    
7375
      if faulty_primary and faulty_secondary:
7376
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7377
                                   " one node and can not be repaired"
7378
                                   " automatically" % self.instance_name,
7379
                                   errors.ECODE_STATE)
7380

    
7381
      if faulty_primary:
7382
        self.disks = faulty_primary
7383
        self.target_node = instance.primary_node
7384
        self.other_node = secondary_node
7385
        check_nodes = [self.target_node, self.other_node]
7386
      elif faulty_secondary:
7387
        self.disks = faulty_secondary
7388
        self.target_node = secondary_node
7389
        self.other_node = instance.primary_node
7390
        check_nodes = [self.target_node, self.other_node]
7391
      else:
7392
        self.disks = []
7393
        check_nodes = []
7394

    
7395
    else:
7396
      # Non-automatic modes
7397
      if self.mode == constants.REPLACE_DISK_PRI:
7398
        self.target_node = instance.primary_node
7399
        self.other_node = secondary_node
7400
        check_nodes = [self.target_node, self.other_node]
7401

    
7402
      elif self.mode == constants.REPLACE_DISK_SEC:
7403
        self.target_node = secondary_node
7404
        self.other_node = instance.primary_node
7405
        check_nodes = [self.target_node, self.other_node]
7406

    
7407
      elif self.mode == constants.REPLACE_DISK_CHG:
7408
        self.new_node = remote_node
7409
        self.other_node = instance.primary_node
7410
        self.target_node = secondary_node
7411
        check_nodes = [self.new_node, self.other_node]
7412

    
7413
        _CheckNodeNotDrained(self.lu, remote_node)
7414

    
7415
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7416
        assert old_node_info is not None
7417
        if old_node_info.offline and not self.early_release:
7418
          # doesn't make sense to delay the release
7419
          self.early_release = True
7420
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7421
                          " early-release mode", secondary_node)
7422

    
7423
      else:
7424
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7425
                                     self.mode)
7426

    
7427
      # If not specified all disks should be replaced
7428
      if not self.disks:
7429
        self.disks = range(len(self.instance.disks))
7430

    
7431
    for node in check_nodes:
7432
      _CheckNodeOnline(self.lu, node)
7433

    
7434
    # Check whether disks are valid
7435
    for disk_idx in self.disks:
7436
      instance.FindDisk(disk_idx)
7437

    
7438
    # Get secondary node IP addresses
7439
    node_2nd_ip = {}
7440

    
7441
    for node_name in [self.target_node, self.other_node, self.new_node]:
7442
      if node_name is not None:
7443
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7444

    
7445
    self.node_secondary_ip = node_2nd_ip
7446

    
7447
  def Exec(self, feedback_fn):
7448
    """Execute disk replacement.
7449

7450
    This dispatches the disk replacement to the appropriate handler.
7451

7452
    """
7453
    if self.delay_iallocator:
7454
      self._CheckPrereq2()
7455

    
7456
    if not self.disks:
7457
      feedback_fn("No disks need replacement")
7458
      return
7459

    
7460
    feedback_fn("Replacing disk(s) %s for %s" %
7461
                (utils.CommaJoin(self.disks), self.instance.name))
7462

    
7463
    activate_disks = (not self.instance.admin_up)
7464

    
7465
    # Activate the instance disks if we're replacing them on a down instance
7466
    if activate_disks:
7467
      _StartInstanceDisks(self.lu, self.instance, True)
7468

    
7469
    try:
7470
      # Should we replace the secondary node?
7471
      if self.new_node is not None:
7472
        fn = self._ExecDrbd8Secondary
7473
      else:
7474
        fn = self._ExecDrbd8DiskOnly
7475

    
7476
      return fn(feedback_fn)
7477

    
7478
    finally:
7479
      # Deactivate the instance disks if we're replacing them on a
7480
      # down instance
7481
      if activate_disks:
7482
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7483

    
7484
  def _CheckVolumeGroup(self, nodes):
7485
    self.lu.LogInfo("Checking volume groups")
7486

    
7487
    vgname = self.cfg.GetVGName()
7488

    
7489
    # Make sure volume group exists on all involved nodes
7490
    results = self.rpc.call_vg_list(nodes)
7491
    if not results:
7492
      raise errors.OpExecError("Can't list volume groups on the nodes")
7493

    
7494
    for node in nodes:
7495
      res = results[node]
7496
      res.Raise("Error checking node %s" % node)
7497
      if vgname not in res.payload:
7498
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7499
                                 (vgname, node))
7500

    
7501
  def _CheckDisksExistence(self, nodes):
7502
    # Check disk existence
7503
    for idx, dev in enumerate(self.instance.disks):
7504
      if idx not in self.disks:
7505
        continue
7506

    
7507
      for node in nodes:
7508
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7509
        self.cfg.SetDiskID(dev, node)
7510

    
7511
        result = self.rpc.call_blockdev_find(node, dev)
7512

    
7513
        msg = result.fail_msg
7514
        if msg or not result.payload:
7515
          if not msg:
7516
            msg = "disk not found"
7517
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7518
                                   (idx, node, msg))
7519

    
7520
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7521
    for idx, dev in enumerate(self.instance.disks):
7522
      if idx not in self.disks:
7523
        continue
7524

    
7525
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7526
                      (idx, node_name))
7527

    
7528
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7529
                                   ldisk=ldisk):
7530
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7531
                                 " replace disks for instance %s" %
7532
                                 (node_name, self.instance.name))
7533

    
7534
  def _CreateNewStorage(self, node_name):
7535
    vgname = self.cfg.GetVGName()
7536
    iv_names = {}
7537

    
7538
    for idx, dev in enumerate(self.instance.disks):
7539
      if idx not in self.disks:
7540
        continue
7541

    
7542
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7543

    
7544
      self.cfg.SetDiskID(dev, node_name)
7545

    
7546
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7547
      names = _GenerateUniqueNames(self.lu, lv_names)
7548

    
7549
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7550
                             logical_id=(vgname, names[0]))
7551
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7552
                             logical_id=(vgname, names[1]))
7553

    
7554
      new_lvs = [lv_data, lv_meta]
7555
      old_lvs = dev.children
7556
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7557

    
7558
      # we pass force_create=True to force the LVM creation
7559
      for new_lv in new_lvs:
7560
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7561
                        _GetInstanceInfoText(self.instance), False)
7562

    
7563
    return iv_names
7564

    
7565
  def _CheckDevices(self, node_name, iv_names):
7566
    for name, (dev, _, _) in iv_names.iteritems():
7567
      self.cfg.SetDiskID(dev, node_name)
7568

    
7569
      result = self.rpc.call_blockdev_find(node_name, dev)
7570

    
7571
      msg = result.fail_msg
7572
      if msg or not result.payload:
7573
        if not msg:
7574
          msg = "disk not found"
7575
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7576
                                 (name, msg))
7577

    
7578
      if result.payload.is_degraded:
7579
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7580

    
7581
  def _RemoveOldStorage(self, node_name, iv_names):
7582
    for name, (_, old_lvs, _) in iv_names.iteritems():
7583
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7584

    
7585
      for lv in old_lvs:
7586
        self.cfg.SetDiskID(lv, node_name)
7587

    
7588
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7589
        if msg:
7590
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7591
                             hint="remove unused LVs manually")
7592

    
7593
  def _ReleaseNodeLock(self, node_name):
7594
    """Releases the lock for a given node."""
7595
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7596

    
7597
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7598
    """Replace a disk on the primary or secondary for DRBD 8.
7599

7600
    The algorithm for replace is quite complicated:
7601

7602
      1. for each disk to be replaced:
7603

7604
        1. create new LVs on the target node with unique names
7605
        1. detach old LVs from the drbd device
7606
        1. rename old LVs to name_replaced.<time_t>
7607
        1. rename new LVs to old LVs
7608
        1. attach the new LVs (with the old names now) to the drbd device
7609

7610
      1. wait for sync across all devices
7611

7612
      1. for each modified disk:
7613

7614
        1. remove old LVs (which have the name name_replaces.<time_t>)
7615

7616
    Failures are not very well handled.
7617

7618
    """
7619
    steps_total = 6
7620

    
7621
    # Step: check device activation
7622
    self.lu.LogStep(1, steps_total, "Check device existence")
7623
    self._CheckDisksExistence([self.other_node, self.target_node])
7624
    self._CheckVolumeGroup([self.target_node, self.other_node])
7625

    
7626
    # Step: check other node consistency
7627
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7628
    self._CheckDisksConsistency(self.other_node,
7629
                                self.other_node == self.instance.primary_node,
7630
                                False)
7631

    
7632
    # Step: create new storage
7633
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7634
    iv_names = self._CreateNewStorage(self.target_node)
7635

    
7636
    # Step: for each lv, detach+rename*2+attach
7637
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7638
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7639
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7640

    
7641
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7642
                                                     old_lvs)
7643
      result.Raise("Can't detach drbd from local storage on node"
7644
                   " %s for device %s" % (self.target_node, dev.iv_name))
7645
      #dev.children = []
7646
      #cfg.Update(instance)
7647

    
7648
      # ok, we created the new LVs, so now we know we have the needed
7649
      # storage; as such, we proceed on the target node to rename
7650
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7651
      # using the assumption that logical_id == physical_id (which in
7652
      # turn is the unique_id on that node)
7653

    
7654
      # FIXME(iustin): use a better name for the replaced LVs
7655
      temp_suffix = int(time.time())
7656
      ren_fn = lambda d, suff: (d.physical_id[0],
7657
                                d.physical_id[1] + "_replaced-%s" % suff)
7658

    
7659
      # Build the rename list based on what LVs exist on the node
7660
      rename_old_to_new = []
7661
      for to_ren in old_lvs:
7662
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7663
        if not result.fail_msg and result.payload:
7664
          # device exists
7665
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7666

    
7667
      self.lu.LogInfo("Renaming the old LVs on the target node")
7668
      result = self.rpc.call_blockdev_rename(self.target_node,
7669
                                             rename_old_to_new)
7670
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7671

    
7672
      # Now we rename the new LVs to the old LVs
7673
      self.lu.LogInfo("Renaming the new LVs on the target node")
7674
      rename_new_to_old = [(new, old.physical_id)
7675
                           for old, new in zip(old_lvs, new_lvs)]
7676
      result = self.rpc.call_blockdev_rename(self.target_node,
7677
                                             rename_new_to_old)
7678
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7679

    
7680
      for old, new in zip(old_lvs, new_lvs):
7681
        new.logical_id = old.logical_id
7682
        self.cfg.SetDiskID(new, self.target_node)
7683

    
7684
      for disk in old_lvs:
7685
        disk.logical_id = ren_fn(disk, temp_suffix)
7686
        self.cfg.SetDiskID(disk, self.target_node)
7687

    
7688
      # Now that the new lvs have the old name, we can add them to the device
7689
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7690
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7691
                                                  new_lvs)
7692
      msg = result.fail_msg
7693
      if msg:
7694
        for new_lv in new_lvs:
7695
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7696
                                               new_lv).fail_msg
7697
          if msg2:
7698
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7699
                               hint=("cleanup manually the unused logical"
7700
                                     "volumes"))
7701
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7702

    
7703
      dev.children = new_lvs
7704

    
7705
      self.cfg.Update(self.instance, feedback_fn)
7706

    
7707
    cstep = 5
7708
    if self.early_release:
7709
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7710
      cstep += 1
7711
      self._RemoveOldStorage(self.target_node, iv_names)
7712
      # WARNING: we release both node locks here, do not do other RPCs
7713
      # than WaitForSync to the primary node
7714
      self._ReleaseNodeLock([self.target_node, self.other_node])
7715

    
7716
    # Wait for sync
7717
    # This can fail as the old devices are degraded and _WaitForSync
7718
    # does a combined result over all disks, so we don't check its return value
7719
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7720
    cstep += 1
7721
    _WaitForSync(self.lu, self.instance)
7722

    
7723
    # Check all devices manually
7724
    self._CheckDevices(self.instance.primary_node, iv_names)
7725

    
7726
    # Step: remove old storage
7727
    if not self.early_release:
7728
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7729
      cstep += 1
7730
      self._RemoveOldStorage(self.target_node, iv_names)
7731

    
7732
  def _ExecDrbd8Secondary(self, feedback_fn):
7733
    """Replace the secondary node for DRBD 8.
7734

7735
    The algorithm for replace is quite complicated:
7736
      - for all disks of the instance:
7737
        - create new LVs on the new node with same names
7738
        - shutdown the drbd device on the old secondary
7739
        - disconnect the drbd network on the primary
7740
        - create the drbd device on the new secondary
7741
        - network attach the drbd on the primary, using an artifice:
7742
          the drbd code for Attach() will connect to the network if it
7743
          finds a device which is connected to the good local disks but
7744
          not network enabled
7745
      - wait for sync across all devices
7746
      - remove all disks from the old secondary
7747

7748
    Failures are not very well handled.
7749

7750
    """
7751
    steps_total = 6
7752

    
7753
    # Step: check device activation
7754
    self.lu.LogStep(1, steps_total, "Check device existence")
7755
    self._CheckDisksExistence([self.instance.primary_node])
7756
    self._CheckVolumeGroup([self.instance.primary_node])
7757

    
7758
    # Step: check other node consistency
7759
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7760
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7761

    
7762
    # Step: create new storage
7763
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7764
    for idx, dev in enumerate(self.instance.disks):
7765
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7766
                      (self.new_node, idx))
7767
      # we pass force_create=True to force LVM creation
7768
      for new_lv in dev.children:
7769
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7770
                        _GetInstanceInfoText(self.instance), False)
7771

    
7772
    # Step 4: dbrd minors and drbd setups changes
7773
    # after this, we must manually remove the drbd minors on both the
7774
    # error and the success paths
7775
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7776
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7777
                                         for dev in self.instance.disks],
7778
                                        self.instance.name)
7779
    logging.debug("Allocated minors %r", minors)
7780

    
7781
    iv_names = {}
7782
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7783
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7784
                      (self.new_node, idx))
7785
      # create new devices on new_node; note that we create two IDs:
7786
      # one without port, so the drbd will be activated without
7787
      # networking information on the new node at this stage, and one
7788
      # with network, for the latter activation in step 4
7789
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7790
      if self.instance.primary_node == o_node1:
7791
        p_minor = o_minor1
7792
      else:
7793
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7794
        p_minor = o_minor2
7795

    
7796
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7797
                      p_minor, new_minor, o_secret)
7798
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7799
                    p_minor, new_minor, o_secret)
7800

    
7801
      iv_names[idx] = (dev, dev.children, new_net_id)
7802
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7803
                    new_net_id)
7804
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7805
                              logical_id=new_alone_id,
7806
                              children=dev.children,
7807
                              size=dev.size)
7808
      try:
7809
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7810
                              _GetInstanceInfoText(self.instance), False)
7811
      except errors.GenericError:
7812
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7813
        raise
7814

    
7815
    # We have new devices, shutdown the drbd on the old secondary
7816
    for idx, dev in enumerate(self.instance.disks):
7817
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7818
      self.cfg.SetDiskID(dev, self.target_node)
7819
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7820
      if msg:
7821
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7822
                           "node: %s" % (idx, msg),
7823
                           hint=("Please cleanup this device manually as"
7824
                                 " soon as possible"))
7825

    
7826
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7827
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7828
                                               self.node_secondary_ip,
7829
                                               self.instance.disks)\
7830
                                              [self.instance.primary_node]
7831

    
7832
    msg = result.fail_msg
7833
    if msg:
7834
      # detaches didn't succeed (unlikely)
7835
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7836
      raise errors.OpExecError("Can't detach the disks from the network on"
7837
                               " old node: %s" % (msg,))
7838

    
7839
    # if we managed to detach at least one, we update all the disks of
7840
    # the instance to point to the new secondary
7841
    self.lu.LogInfo("Updating instance configuration")
7842
    for dev, _, new_logical_id in iv_names.itervalues():
7843
      dev.logical_id = new_logical_id
7844
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7845

    
7846
    self.cfg.Update(self.instance, feedback_fn)
7847

    
7848
    # and now perform the drbd attach
7849
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7850
                    " (standalone => connected)")
7851
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7852
                                            self.new_node],
7853
                                           self.node_secondary_ip,
7854
                                           self.instance.disks,
7855
                                           self.instance.name,
7856
                                           False)
7857
    for to_node, to_result in result.items():
7858
      msg = to_result.fail_msg
7859
      if msg:
7860
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7861
                           to_node, msg,
7862
                           hint=("please do a gnt-instance info to see the"
7863
                                 " status of disks"))
7864
    cstep = 5
7865
    if self.early_release:
7866
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7867
      cstep += 1
7868
      self._RemoveOldStorage(self.target_node, iv_names)
7869
      # WARNING: we release all node locks here, do not do other RPCs
7870
      # than WaitForSync to the primary node
7871
      self._ReleaseNodeLock([self.instance.primary_node,
7872
                             self.target_node,
7873
                             self.new_node])
7874

    
7875
    # Wait for sync
7876
    # This can fail as the old devices are degraded and _WaitForSync
7877
    # does a combined result over all disks, so we don't check its return value
7878
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7879
    cstep += 1
7880
    _WaitForSync(self.lu, self.instance)
7881

    
7882
    # Check all devices manually
7883
    self._CheckDevices(self.instance.primary_node, iv_names)
7884

    
7885
    # Step: remove old storage
7886
    if not self.early_release:
7887
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7888
      self._RemoveOldStorage(self.target_node, iv_names)
7889

    
7890

    
7891
class LURepairNodeStorage(NoHooksLU):
7892
  """Repairs the volume group on a node.
7893

7894
  """
7895
  _OP_REQP = ["node_name"]
7896
  REQ_BGL = False
7897

    
7898
  def CheckArguments(self):
7899
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7900

    
7901
    _CheckStorageType(self.op.storage_type)
7902

    
7903
  def ExpandNames(self):
7904
    self.needed_locks = {
7905
      locking.LEVEL_NODE: [self.op.node_name],
7906
      }
7907

    
7908
  def _CheckFaultyDisks(self, instance, node_name):
7909
    """Ensure faulty disks abort the opcode or at least warn."""
7910
    try:
7911
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7912
                                  node_name, True):
7913
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7914
                                   " node '%s'" % (instance.name, node_name),
7915
                                   errors.ECODE_STATE)
7916
    except errors.OpPrereqError, err:
7917
      if self.op.ignore_consistency:
7918
        self.proc.LogWarning(str(err.args[0]))
7919
      else:
7920
        raise
7921

    
7922
  def CheckPrereq(self):
7923
    """Check prerequisites.
7924

7925
    """
7926
    storage_type = self.op.storage_type
7927

    
7928
    if (constants.SO_FIX_CONSISTENCY not in
7929
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7930
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7931
                                 " repaired" % storage_type,
7932
                                 errors.ECODE_INVAL)
7933

    
7934
    # Check whether any instance on this node has faulty disks
7935
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7936
      if not inst.admin_up:
7937
        continue
7938
      check_nodes = set(inst.all_nodes)
7939
      check_nodes.discard(self.op.node_name)
7940
      for inst_node_name in check_nodes:
7941
        self._CheckFaultyDisks(inst, inst_node_name)
7942

    
7943
  def Exec(self, feedback_fn):
7944
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7945
                (self.op.name, self.op.node_name))
7946

    
7947
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7948
    result = self.rpc.call_storage_execute(self.op.node_name,
7949
                                           self.op.storage_type, st_args,
7950
                                           self.op.name,
7951
                                           constants.SO_FIX_CONSISTENCY)
7952
    result.Raise("Failed to repair storage unit '%s' on %s" %
7953
                 (self.op.name, self.op.node_name))
7954

    
7955

    
7956
class LUNodeEvacuationStrategy(NoHooksLU):
7957
  """Computes the node evacuation strategy.
7958

7959
  """
7960
  _OP_REQP = ["nodes"]
7961
  REQ_BGL = False
7962

    
7963
  def CheckArguments(self):
7964
    if not hasattr(self.op, "remote_node"):
7965
      self.op.remote_node = None
7966
    if not hasattr(self.op, "iallocator"):
7967
      self.op.iallocator = None
7968
    if self.op.remote_node is not None and self.op.iallocator is not None:
7969
      raise errors.OpPrereqError("Give either the iallocator or the new"
7970
                                 " secondary, not both", errors.ECODE_INVAL)
7971

    
7972
  def ExpandNames(self):
7973
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7974
    self.needed_locks = locks = {}
7975
    if self.op.remote_node is None:
7976
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7977
    else:
7978
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7979
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7980

    
7981
  def CheckPrereq(self):
7982
    pass
7983

    
7984
  def Exec(self, feedback_fn):
7985
    if self.op.remote_node is not None:
7986
      instances = []
7987
      for node in self.op.nodes:
7988
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7989
      result = []
7990
      for i in instances:
7991
        if i.primary_node == self.op.remote_node:
7992
          raise errors.OpPrereqError("Node %s is the primary node of"
7993
                                     " instance %s, cannot use it as"
7994
                                     " secondary" %
7995
                                     (self.op.remote_node, i.name),
7996
                                     errors.ECODE_INVAL)
7997
        result.append([i.name, self.op.remote_node])
7998
    else:
7999
      ial = IAllocator(self.cfg, self.rpc,
8000
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8001
                       evac_nodes=self.op.nodes)
8002
      ial.Run(self.op.iallocator, validate=True)
8003
      if not ial.success:
8004
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8005
                                 errors.ECODE_NORES)
8006
      result = ial.result
8007
    return result
8008

    
8009

    
8010
class LUGrowDisk(LogicalUnit):
8011
  """Grow a disk of an instance.
8012

8013
  """
8014
  HPATH = "disk-grow"
8015
  HTYPE = constants.HTYPE_INSTANCE
8016
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8017
  REQ_BGL = False
8018

    
8019
  def ExpandNames(self):
8020
    self._ExpandAndLockInstance()
8021
    self.needed_locks[locking.LEVEL_NODE] = []
8022
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8023

    
8024
  def DeclareLocks(self, level):
8025
    if level == locking.LEVEL_NODE:
8026
      self._LockInstancesNodes()
8027

    
8028
  def BuildHooksEnv(self):
8029
    """Build hooks env.
8030

8031
    This runs on the master, the primary and all the secondaries.
8032

8033
    """
8034
    env = {
8035
      "DISK": self.op.disk,
8036
      "AMOUNT": self.op.amount,
8037
      }
8038
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8039
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8040
    return env, nl, nl
8041

    
8042
  def CheckPrereq(self):
8043
    """Check prerequisites.
8044

8045
    This checks that the instance is in the cluster.
8046

8047
    """
8048
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8049
    assert instance is not None, \
8050
      "Cannot retrieve locked instance %s" % self.op.instance_name
8051
    nodenames = list(instance.all_nodes)
8052
    for node in nodenames:
8053
      _CheckNodeOnline(self, node)
8054

    
8055

    
8056
    self.instance = instance
8057

    
8058
    if instance.disk_template not in constants.DTS_GROWABLE:
8059
      raise errors.OpPrereqError("Instance's disk layout does not support"
8060
                                 " growing.", errors.ECODE_INVAL)
8061

    
8062
    self.disk = instance.FindDisk(self.op.disk)
8063

    
8064
    if instance.disk_template != constants.DT_FILE:
8065
      # TODO: check the free disk space for file, when that feature will be
8066
      # supported
8067
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8068

    
8069
  def Exec(self, feedback_fn):
8070
    """Execute disk grow.
8071

8072
    """
8073
    instance = self.instance
8074
    disk = self.disk
8075

    
8076
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8077
    if not disks_ok:
8078
      raise errors.OpExecError("Cannot activate block device to grow")
8079

    
8080
    for node in instance.all_nodes:
8081
      self.cfg.SetDiskID(disk, node)
8082
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8083
      result.Raise("Grow request failed to node %s" % node)
8084

    
8085
      # TODO: Rewrite code to work properly
8086
      # DRBD goes into sync mode for a short amount of time after executing the
8087
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8088
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8089
      # time is a work-around.
8090
      time.sleep(5)
8091

    
8092
    disk.RecordGrow(self.op.amount)
8093
    self.cfg.Update(instance, feedback_fn)
8094
    if self.op.wait_for_sync:
8095
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8096
      if disk_abort:
8097
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8098
                             " status.\nPlease check the instance.")
8099
      if not instance.admin_up:
8100
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8101
    elif not instance.admin_up:
8102
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8103
                           " not supposed to be running because no wait for"
8104
                           " sync mode was requested.")
8105

    
8106

    
8107
class LUQueryInstanceData(NoHooksLU):
8108
  """Query runtime instance data.
8109

8110
  """
8111
  _OP_REQP = ["instances", "static"]
8112
  REQ_BGL = False
8113

    
8114
  def ExpandNames(self):
8115
    self.needed_locks = {}
8116
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8117

    
8118
    if not isinstance(self.op.instances, list):
8119
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8120
                                 errors.ECODE_INVAL)
8121

    
8122
    if self.op.instances:
8123
      self.wanted_names = []
8124
      for name in self.op.instances:
8125
        full_name = _ExpandInstanceName(self.cfg, name)
8126
        self.wanted_names.append(full_name)
8127
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8128
    else:
8129
      self.wanted_names = None
8130
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8131

    
8132
    self.needed_locks[locking.LEVEL_NODE] = []
8133
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8134

    
8135
  def DeclareLocks(self, level):
8136
    if level == locking.LEVEL_NODE:
8137
      self._LockInstancesNodes()
8138

    
8139
  def CheckPrereq(self):
8140
    """Check prerequisites.
8141

8142
    This only checks the optional instance list against the existing names.
8143

8144
    """
8145
    if self.wanted_names is None:
8146
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8147

    
8148
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8149
                             in self.wanted_names]
8150
    return
8151

    
8152
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8153
    """Returns the status of a block device
8154

8155
    """
8156
    if self.op.static or not node:
8157
      return None
8158

    
8159
    self.cfg.SetDiskID(dev, node)
8160

    
8161
    result = self.rpc.call_blockdev_find(node, dev)
8162
    if result.offline:
8163
      return None
8164

    
8165
    result.Raise("Can't compute disk status for %s" % instance_name)
8166

    
8167
    status = result.payload
8168
    if status is None:
8169
      return None
8170

    
8171
    return (status.dev_path, status.major, status.minor,
8172
            status.sync_percent, status.estimated_time,
8173
            status.is_degraded, status.ldisk_status)
8174

    
8175
  def _ComputeDiskStatus(self, instance, snode, dev):
8176
    """Compute block device status.
8177

8178
    """
8179
    if dev.dev_type in constants.LDS_DRBD:
8180
      # we change the snode then (otherwise we use the one passed in)
8181
      if dev.logical_id[0] == instance.primary_node:
8182
        snode = dev.logical_id[1]
8183
      else:
8184
        snode = dev.logical_id[0]
8185

    
8186
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8187
                                              instance.name, dev)
8188
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8189

    
8190
    if dev.children:
8191
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8192
                      for child in dev.children]
8193
    else:
8194
      dev_children = []
8195

    
8196
    data = {
8197
      "iv_name": dev.iv_name,
8198
      "dev_type": dev.dev_type,
8199
      "logical_id": dev.logical_id,
8200
      "physical_id": dev.physical_id,
8201
      "pstatus": dev_pstatus,
8202
      "sstatus": dev_sstatus,
8203
      "children": dev_children,
8204
      "mode": dev.mode,
8205
      "size": dev.size,
8206
      }
8207

    
8208
    return data
8209

    
8210
  def Exec(self, feedback_fn):
8211
    """Gather and return data"""
8212
    result = {}
8213

    
8214
    cluster = self.cfg.GetClusterInfo()
8215

    
8216
    for instance in self.wanted_instances:
8217
      if not self.op.static:
8218
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8219
                                                  instance.name,
8220
                                                  instance.hypervisor)
8221
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8222
        remote_info = remote_info.payload
8223
        if remote_info and "state" in remote_info:
8224
          remote_state = "up"
8225
        else:
8226
          remote_state = "down"
8227
      else:
8228
        remote_state = None
8229
      if instance.admin_up:
8230
        config_state = "up"
8231
      else:
8232
        config_state = "down"
8233

    
8234
      disks = [self._ComputeDiskStatus(instance, None, device)
8235
               for device in instance.disks]
8236

    
8237
      idict = {
8238
        "name": instance.name,
8239
        "config_state": config_state,
8240
        "run_state": remote_state,
8241
        "pnode": instance.primary_node,
8242
        "snodes": instance.secondary_nodes,
8243
        "os": instance.os,
8244
        # this happens to be the same format used for hooks
8245
        "nics": _NICListToTuple(self, instance.nics),
8246
        "disk_template": instance.disk_template,
8247
        "disks": disks,
8248
        "hypervisor": instance.hypervisor,
8249
        "network_port": instance.network_port,
8250
        "hv_instance": instance.hvparams,
8251
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8252
        "be_instance": instance.beparams,
8253
        "be_actual": cluster.FillBE(instance),
8254
        "os_instance": instance.osparams,
8255
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8256
        "serial_no": instance.serial_no,
8257
        "mtime": instance.mtime,
8258
        "ctime": instance.ctime,
8259
        "uuid": instance.uuid,
8260
        }
8261

    
8262
      result[instance.name] = idict
8263

    
8264
    return result
8265

    
8266

    
8267
class LUSetInstanceParams(LogicalUnit):
8268
  """Modifies an instances's parameters.
8269

8270
  """
8271
  HPATH = "instance-modify"
8272
  HTYPE = constants.HTYPE_INSTANCE
8273
  _OP_REQP = ["instance_name"]
8274
  REQ_BGL = False
8275

    
8276
  def CheckArguments(self):
8277
    if not hasattr(self.op, 'nics'):
8278
      self.op.nics = []
8279
    if not hasattr(self.op, 'disks'):
8280
      self.op.disks = []
8281
    if not hasattr(self.op, 'beparams'):
8282
      self.op.beparams = {}
8283
    if not hasattr(self.op, 'hvparams'):
8284
      self.op.hvparams = {}
8285
    if not hasattr(self.op, "disk_template"):
8286
      self.op.disk_template = None
8287
    if not hasattr(self.op, "remote_node"):
8288
      self.op.remote_node = None
8289
    if not hasattr(self.op, "os_name"):
8290
      self.op.os_name = None
8291
    if not hasattr(self.op, "force_variant"):
8292
      self.op.force_variant = False
8293
    self.op.force = getattr(self.op, "force", False)
8294
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8295
            self.op.hvparams or self.op.beparams or self.op.os_name):
8296
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8297

    
8298
    if self.op.hvparams:
8299
      _CheckGlobalHvParams(self.op.hvparams)
8300

    
8301
    # Disk validation
8302
    disk_addremove = 0
8303
    for disk_op, disk_dict in self.op.disks:
8304
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8305
      if disk_op == constants.DDM_REMOVE:
8306
        disk_addremove += 1
8307
        continue
8308
      elif disk_op == constants.DDM_ADD:
8309
        disk_addremove += 1
8310
      else:
8311
        if not isinstance(disk_op, int):
8312
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8313
        if not isinstance(disk_dict, dict):
8314
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8315
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8316

    
8317
      if disk_op == constants.DDM_ADD:
8318
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8319
        if mode not in constants.DISK_ACCESS_SET:
8320
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8321
                                     errors.ECODE_INVAL)
8322
        size = disk_dict.get('size', None)
8323
        if size is None:
8324
          raise errors.OpPrereqError("Required disk parameter size missing",
8325
                                     errors.ECODE_INVAL)
8326
        try:
8327
          size = int(size)
8328
        except (TypeError, ValueError), err:
8329
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8330
                                     str(err), errors.ECODE_INVAL)
8331
        disk_dict['size'] = size
8332
      else:
8333
        # modification of disk
8334
        if 'size' in disk_dict:
8335
          raise errors.OpPrereqError("Disk size change not possible, use"
8336
                                     " grow-disk", errors.ECODE_INVAL)
8337

    
8338
    if disk_addremove > 1:
8339
      raise errors.OpPrereqError("Only one disk add or remove operation"
8340
                                 " supported at a time", errors.ECODE_INVAL)
8341

    
8342
    if self.op.disks and self.op.disk_template is not None:
8343
      raise errors.OpPrereqError("Disk template conversion and other disk"
8344
                                 " changes not supported at the same time",
8345
                                 errors.ECODE_INVAL)
8346

    
8347
    if self.op.disk_template:
8348
      _CheckDiskTemplate(self.op.disk_template)
8349
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8350
          self.op.remote_node is None):
8351
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8352
                                   " one requires specifying a secondary node",
8353
                                   errors.ECODE_INVAL)
8354

    
8355
    # NIC validation
8356
    nic_addremove = 0
8357
    for nic_op, nic_dict in self.op.nics:
8358
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8359
      if nic_op == constants.DDM_REMOVE:
8360
        nic_addremove += 1
8361
        continue
8362
      elif nic_op == constants.DDM_ADD:
8363
        nic_addremove += 1
8364
      else:
8365
        if not isinstance(nic_op, int):
8366
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8367
        if not isinstance(nic_dict, dict):
8368
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8369
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8370

    
8371
      # nic_dict should be a dict
8372
      nic_ip = nic_dict.get('ip', None)
8373
      if nic_ip is not None:
8374
        if nic_ip.lower() == constants.VALUE_NONE:
8375
          nic_dict['ip'] = None
8376
        else:
8377
          if not utils.IsValidIP(nic_ip):
8378
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8379
                                       errors.ECODE_INVAL)
8380

    
8381
      nic_bridge = nic_dict.get('bridge', None)
8382
      nic_link = nic_dict.get('link', None)
8383
      if nic_bridge and nic_link:
8384
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8385
                                   " at the same time", errors.ECODE_INVAL)
8386
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8387
        nic_dict['bridge'] = None
8388
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8389
        nic_dict['link'] = None
8390

    
8391
      if nic_op == constants.DDM_ADD:
8392
        nic_mac = nic_dict.get('mac', None)
8393
        if nic_mac is None:
8394
          nic_dict['mac'] = constants.VALUE_AUTO
8395

    
8396
      if 'mac' in nic_dict:
8397
        nic_mac = nic_dict['mac']
8398
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8399
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8400

    
8401
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8402
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8403
                                     " modifying an existing nic",
8404
                                     errors.ECODE_INVAL)
8405

    
8406
    if nic_addremove > 1:
8407
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8408
                                 " supported at a time", errors.ECODE_INVAL)
8409

    
8410
  def ExpandNames(self):
8411
    self._ExpandAndLockInstance()
8412
    self.needed_locks[locking.LEVEL_NODE] = []
8413
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8414

    
8415
  def DeclareLocks(self, level):
8416
    if level == locking.LEVEL_NODE:
8417
      self._LockInstancesNodes()
8418
      if self.op.disk_template and self.op.remote_node:
8419
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8420
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8421

    
8422
  def BuildHooksEnv(self):
8423
    """Build hooks env.
8424

8425
    This runs on the master, primary and secondaries.
8426

8427
    """
8428
    args = dict()
8429
    if constants.BE_MEMORY in self.be_new:
8430
      args['memory'] = self.be_new[constants.BE_MEMORY]
8431
    if constants.BE_VCPUS in self.be_new:
8432
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8433
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8434
    # information at all.
8435
    if self.op.nics:
8436
      args['nics'] = []
8437
      nic_override = dict(self.op.nics)
8438
      for idx, nic in enumerate(self.instance.nics):
8439
        if idx in nic_override:
8440
          this_nic_override = nic_override[idx]
8441
        else:
8442
          this_nic_override = {}
8443
        if 'ip' in this_nic_override:
8444
          ip = this_nic_override['ip']
8445
        else:
8446
          ip = nic.ip
8447
        if 'mac' in this_nic_override:
8448
          mac = this_nic_override['mac']
8449
        else:
8450
          mac = nic.mac
8451
        if idx in self.nic_pnew:
8452
          nicparams = self.nic_pnew[idx]
8453
        else:
8454
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8455
        mode = nicparams[constants.NIC_MODE]
8456
        link = nicparams[constants.NIC_LINK]
8457
        args['nics'].append((ip, mac, mode, link))
8458
      if constants.DDM_ADD in nic_override:
8459
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8460
        mac = nic_override[constants.DDM_ADD]['mac']
8461
        nicparams = self.nic_pnew[constants.DDM_ADD]
8462
        mode = nicparams[constants.NIC_MODE]
8463
        link = nicparams[constants.NIC_LINK]
8464
        args['nics'].append((ip, mac, mode, link))
8465
      elif constants.DDM_REMOVE in nic_override:
8466
        del args['nics'][-1]
8467

    
8468
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8469
    if self.op.disk_template:
8470
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8471
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8472
    return env, nl, nl
8473

    
8474
  def CheckPrereq(self):
8475
    """Check prerequisites.
8476

8477
    This only checks the instance list against the existing names.
8478

8479
    """
8480
    self.force = self.op.force
8481

    
8482
    # checking the new params on the primary/secondary nodes
8483

    
8484
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8485
    cluster = self.cluster = self.cfg.GetClusterInfo()
8486
    assert self.instance is not None, \
8487
      "Cannot retrieve locked instance %s" % self.op.instance_name
8488
    pnode = instance.primary_node
8489
    nodelist = list(instance.all_nodes)
8490

    
8491
    if self.op.disk_template:
8492
      if instance.disk_template == self.op.disk_template:
8493
        raise errors.OpPrereqError("Instance already has disk template %s" %
8494
                                   instance.disk_template, errors.ECODE_INVAL)
8495

    
8496
      if (instance.disk_template,
8497
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8498
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8499
                                   " %s to %s" % (instance.disk_template,
8500
                                                  self.op.disk_template),
8501
                                   errors.ECODE_INVAL)
8502
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8503
        _CheckNodeOnline(self, self.op.remote_node)
8504
        _CheckNodeNotDrained(self, self.op.remote_node)
8505
        disks = [{"size": d.size} for d in instance.disks]
8506
        required = _ComputeDiskSize(self.op.disk_template, disks)
8507
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8508
        _CheckInstanceDown(self, instance, "cannot change disk template")
8509

    
8510
    # hvparams processing
8511
    if self.op.hvparams:
8512
      hv_type = instance.hypervisor
8513
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8514
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8515
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8516

    
8517
      # local check
8518
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8519
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8520
      self.hv_new = hv_new # the new actual values
8521
      self.hv_inst = i_hvdict # the new dict (without defaults)
8522
    else:
8523
      self.hv_new = self.hv_inst = {}
8524

    
8525
    # beparams processing
8526
    if self.op.beparams:
8527
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams)
8528
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8529
      be_new = cluster.SimpleFillBE(i_bedict)
8530
      self.be_new = be_new # the new actual values
8531
      self.be_inst = i_bedict # the new dict (without defaults)
8532
    else:
8533
      self.be_new = self.be_inst = {}
8534

    
8535
    self.warn = []
8536

    
8537
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8538
      mem_check_list = [pnode]
8539
      if be_new[constants.BE_AUTO_BALANCE]:
8540
        # either we changed auto_balance to yes or it was from before
8541
        mem_check_list.extend(instance.secondary_nodes)
8542
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8543
                                                  instance.hypervisor)
8544
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8545
                                         instance.hypervisor)
8546
      pninfo = nodeinfo[pnode]
8547
      msg = pninfo.fail_msg
8548
      if msg:
8549
        # Assume the primary node is unreachable and go ahead
8550
        self.warn.append("Can't get info from primary node %s: %s" %
8551
                         (pnode,  msg))
8552
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8553
        self.warn.append("Node data from primary node %s doesn't contain"
8554
                         " free memory information" % pnode)
8555
      elif instance_info.fail_msg:
8556
        self.warn.append("Can't get instance runtime information: %s" %
8557
                        instance_info.fail_msg)
8558
      else:
8559
        if instance_info.payload:
8560
          current_mem = int(instance_info.payload['memory'])
8561
        else:
8562
          # Assume instance not running
8563
          # (there is a slight race condition here, but it's not very probable,
8564
          # and we have no other way to check)
8565
          current_mem = 0
8566
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8567
                    pninfo.payload['memory_free'])
8568
        if miss_mem > 0:
8569
          raise errors.OpPrereqError("This change will prevent the instance"
8570
                                     " from starting, due to %d MB of memory"
8571
                                     " missing on its primary node" % miss_mem,
8572
                                     errors.ECODE_NORES)
8573

    
8574
      if be_new[constants.BE_AUTO_BALANCE]:
8575
        for node, nres in nodeinfo.items():
8576
          if node not in instance.secondary_nodes:
8577
            continue
8578
          msg = nres.fail_msg
8579
          if msg:
8580
            self.warn.append("Can't get info from secondary node %s: %s" %
8581
                             (node, msg))
8582
          elif not isinstance(nres.payload.get('memory_free', None), int):
8583
            self.warn.append("Secondary node %s didn't return free"
8584
                             " memory information" % node)
8585
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8586
            self.warn.append("Not enough memory to failover instance to"
8587
                             " secondary node %s" % node)
8588

    
8589
    # NIC processing
8590
    self.nic_pnew = {}
8591
    self.nic_pinst = {}
8592
    for nic_op, nic_dict in self.op.nics:
8593
      if nic_op == constants.DDM_REMOVE:
8594
        if not instance.nics:
8595
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8596
                                     errors.ECODE_INVAL)
8597
        continue
8598
      if nic_op != constants.DDM_ADD:
8599
        # an existing nic
8600
        if not instance.nics:
8601
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8602
                                     " no NICs" % nic_op,
8603
                                     errors.ECODE_INVAL)
8604
        if nic_op < 0 or nic_op >= len(instance.nics):
8605
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8606
                                     " are 0 to %d" %
8607
                                     (nic_op, len(instance.nics) - 1),
8608
                                     errors.ECODE_INVAL)
8609
        old_nic_params = instance.nics[nic_op].nicparams
8610
        old_nic_ip = instance.nics[nic_op].ip
8611
      else:
8612
        old_nic_params = {}
8613
        old_nic_ip = None
8614

    
8615
      update_params_dict = dict([(key, nic_dict[key])
8616
                                 for key in constants.NICS_PARAMETERS
8617
                                 if key in nic_dict])
8618

    
8619
      if 'bridge' in nic_dict:
8620
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8621

    
8622
      new_nic_params = _GetUpdatedParams(old_nic_params,
8623
                                         update_params_dict)
8624
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8625
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8626
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8627
      self.nic_pinst[nic_op] = new_nic_params
8628
      self.nic_pnew[nic_op] = new_filled_nic_params
8629
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8630

    
8631
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8632
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8633
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8634
        if msg:
8635
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8636
          if self.force:
8637
            self.warn.append(msg)
8638
          else:
8639
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8640
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8641
        if 'ip' in nic_dict:
8642
          nic_ip = nic_dict['ip']
8643
        else:
8644
          nic_ip = old_nic_ip
8645
        if nic_ip is None:
8646
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8647
                                     ' on a routed nic', errors.ECODE_INVAL)
8648
      if 'mac' in nic_dict:
8649
        nic_mac = nic_dict['mac']
8650
        if nic_mac is None:
8651
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8652
                                     errors.ECODE_INVAL)
8653
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8654
          # otherwise generate the mac
8655
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8656
        else:
8657
          # or validate/reserve the current one
8658
          try:
8659
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8660
          except errors.ReservationError:
8661
            raise errors.OpPrereqError("MAC address %s already in use"
8662
                                       " in cluster" % nic_mac,
8663
                                       errors.ECODE_NOTUNIQUE)
8664

    
8665
    # DISK processing
8666
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8667
      raise errors.OpPrereqError("Disk operations not supported for"
8668
                                 " diskless instances",
8669
                                 errors.ECODE_INVAL)
8670
    for disk_op, _ in self.op.disks:
8671
      if disk_op == constants.DDM_REMOVE:
8672
        if len(instance.disks) == 1:
8673
          raise errors.OpPrereqError("Cannot remove the last disk of"
8674
                                     " an instance", errors.ECODE_INVAL)
8675
        _CheckInstanceDown(self, instance, "cannot remove disks")
8676

    
8677
      if (disk_op == constants.DDM_ADD and
8678
          len(instance.nics) >= constants.MAX_DISKS):
8679
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8680
                                   " add more" % constants.MAX_DISKS,
8681
                                   errors.ECODE_STATE)
8682
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8683
        # an existing disk
8684
        if disk_op < 0 or disk_op >= len(instance.disks):
8685
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8686
                                     " are 0 to %d" %
8687
                                     (disk_op, len(instance.disks)),
8688
                                     errors.ECODE_INVAL)
8689

    
8690
    # OS change
8691
    if self.op.os_name and not self.op.force:
8692
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8693
                      self.op.force_variant)
8694

    
8695
    return
8696

    
8697
  def _ConvertPlainToDrbd(self, feedback_fn):
8698
    """Converts an instance from plain to drbd.
8699

8700
    """
8701
    feedback_fn("Converting template to drbd")
8702
    instance = self.instance
8703
    pnode = instance.primary_node
8704
    snode = self.op.remote_node
8705

    
8706
    # create a fake disk info for _GenerateDiskTemplate
8707
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8708
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8709
                                      instance.name, pnode, [snode],
8710
                                      disk_info, None, None, 0)
8711
    info = _GetInstanceInfoText(instance)
8712
    feedback_fn("Creating aditional volumes...")
8713
    # first, create the missing data and meta devices
8714
    for disk in new_disks:
8715
      # unfortunately this is... not too nice
8716
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8717
                            info, True)
8718
      for child in disk.children:
8719
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8720
    # at this stage, all new LVs have been created, we can rename the
8721
    # old ones
8722
    feedback_fn("Renaming original volumes...")
8723
    rename_list = [(o, n.children[0].logical_id)
8724
                   for (o, n) in zip(instance.disks, new_disks)]
8725
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8726
    result.Raise("Failed to rename original LVs")
8727

    
8728
    feedback_fn("Initializing DRBD devices...")
8729
    # all child devices are in place, we can now create the DRBD devices
8730
    for disk in new_disks:
8731
      for node in [pnode, snode]:
8732
        f_create = node == pnode
8733
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8734

    
8735
    # at this point, the instance has been modified
8736
    instance.disk_template = constants.DT_DRBD8
8737
    instance.disks = new_disks
8738
    self.cfg.Update(instance, feedback_fn)
8739

    
8740
    # disks are created, waiting for sync
8741
    disk_abort = not _WaitForSync(self, instance)
8742
    if disk_abort:
8743
      raise errors.OpExecError("There are some degraded disks for"
8744
                               " this instance, please cleanup manually")
8745

    
8746
  def _ConvertDrbdToPlain(self, feedback_fn):
8747
    """Converts an instance from drbd to plain.
8748

8749
    """
8750
    instance = self.instance
8751
    assert len(instance.secondary_nodes) == 1
8752
    pnode = instance.primary_node
8753
    snode = instance.secondary_nodes[0]
8754
    feedback_fn("Converting template to plain")
8755

    
8756
    old_disks = instance.disks
8757
    new_disks = [d.children[0] for d in old_disks]
8758

    
8759
    # copy over size and mode
8760
    for parent, child in zip(old_disks, new_disks):
8761
      child.size = parent.size
8762
      child.mode = parent.mode
8763

    
8764
    # update instance structure
8765
    instance.disks = new_disks
8766
    instance.disk_template = constants.DT_PLAIN
8767
    self.cfg.Update(instance, feedback_fn)
8768

    
8769
    feedback_fn("Removing volumes on the secondary node...")
8770
    for disk in old_disks:
8771
      self.cfg.SetDiskID(disk, snode)
8772
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8773
      if msg:
8774
        self.LogWarning("Could not remove block device %s on node %s,"
8775
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8776

    
8777
    feedback_fn("Removing unneeded volumes on the primary node...")
8778
    for idx, disk in enumerate(old_disks):
8779
      meta = disk.children[1]
8780
      self.cfg.SetDiskID(meta, pnode)
8781
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8782
      if msg:
8783
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8784
                        " continuing anyway: %s", idx, pnode, msg)
8785

    
8786

    
8787
  def Exec(self, feedback_fn):
8788
    """Modifies an instance.
8789

8790
    All parameters take effect only at the next restart of the instance.
8791

8792
    """
8793
    # Process here the warnings from CheckPrereq, as we don't have a
8794
    # feedback_fn there.
8795
    for warn in self.warn:
8796
      feedback_fn("WARNING: %s" % warn)
8797

    
8798
    result = []
8799
    instance = self.instance
8800
    # disk changes
8801
    for disk_op, disk_dict in self.op.disks:
8802
      if disk_op == constants.DDM_REMOVE:
8803
        # remove the last disk
8804
        device = instance.disks.pop()
8805
        device_idx = len(instance.disks)
8806
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8807
          self.cfg.SetDiskID(disk, node)
8808
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8809
          if msg:
8810
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8811
                            " continuing anyway", device_idx, node, msg)
8812
        result.append(("disk/%d" % device_idx, "remove"))
8813
      elif disk_op == constants.DDM_ADD:
8814
        # add a new disk
8815
        if instance.disk_template == constants.DT_FILE:
8816
          file_driver, file_path = instance.disks[0].logical_id
8817
          file_path = os.path.dirname(file_path)
8818
        else:
8819
          file_driver = file_path = None
8820
        disk_idx_base = len(instance.disks)
8821
        new_disk = _GenerateDiskTemplate(self,
8822
                                         instance.disk_template,
8823
                                         instance.name, instance.primary_node,
8824
                                         instance.secondary_nodes,
8825
                                         [disk_dict],
8826
                                         file_path,
8827
                                         file_driver,
8828
                                         disk_idx_base)[0]
8829
        instance.disks.append(new_disk)
8830
        info = _GetInstanceInfoText(instance)
8831

    
8832
        logging.info("Creating volume %s for instance %s",
8833
                     new_disk.iv_name, instance.name)
8834
        # Note: this needs to be kept in sync with _CreateDisks
8835
        #HARDCODE
8836
        for node in instance.all_nodes:
8837
          f_create = node == instance.primary_node
8838
          try:
8839
            _CreateBlockDev(self, node, instance, new_disk,
8840
                            f_create, info, f_create)
8841
          except errors.OpExecError, err:
8842
            self.LogWarning("Failed to create volume %s (%s) on"
8843
                            " node %s: %s",
8844
                            new_disk.iv_name, new_disk, node, err)
8845
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8846
                       (new_disk.size, new_disk.mode)))
8847
      else:
8848
        # change a given disk
8849
        instance.disks[disk_op].mode = disk_dict['mode']
8850
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8851

    
8852
    if self.op.disk_template:
8853
      r_shut = _ShutdownInstanceDisks(self, instance)
8854
      if not r_shut:
8855
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8856
                                 " proceed with disk template conversion")
8857
      mode = (instance.disk_template, self.op.disk_template)
8858
      try:
8859
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8860
      except:
8861
        self.cfg.ReleaseDRBDMinors(instance.name)
8862
        raise
8863
      result.append(("disk_template", self.op.disk_template))
8864

    
8865
    # NIC changes
8866
    for nic_op, nic_dict in self.op.nics:
8867
      if nic_op == constants.DDM_REMOVE:
8868
        # remove the last nic
8869
        del instance.nics[-1]
8870
        result.append(("nic.%d" % len(instance.nics), "remove"))
8871
      elif nic_op == constants.DDM_ADD:
8872
        # mac and bridge should be set, by now
8873
        mac = nic_dict['mac']
8874
        ip = nic_dict.get('ip', None)
8875
        nicparams = self.nic_pinst[constants.DDM_ADD]
8876
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8877
        instance.nics.append(new_nic)
8878
        result.append(("nic.%d" % (len(instance.nics) - 1),
8879
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8880
                       (new_nic.mac, new_nic.ip,
8881
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8882
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8883
                       )))
8884
      else:
8885
        for key in 'mac', 'ip':
8886
          if key in nic_dict:
8887
            setattr(instance.nics[nic_op], key, nic_dict[key])
8888
        if nic_op in self.nic_pinst:
8889
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8890
        for key, val in nic_dict.iteritems():
8891
          result.append(("nic.%s/%d" % (key, nic_op), val))
8892

    
8893
    # hvparams changes
8894
    if self.op.hvparams:
8895
      instance.hvparams = self.hv_inst
8896
      for key, val in self.op.hvparams.iteritems():
8897
        result.append(("hv/%s" % key, val))
8898

    
8899
    # beparams changes
8900
    if self.op.beparams:
8901
      instance.beparams = self.be_inst
8902
      for key, val in self.op.beparams.iteritems():
8903
        result.append(("be/%s" % key, val))
8904

    
8905
    # OS change
8906
    if self.op.os_name:
8907
      instance.os = self.op.os_name
8908

    
8909
    self.cfg.Update(instance, feedback_fn)
8910

    
8911
    return result
8912

    
8913
  _DISK_CONVERSIONS = {
8914
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8915
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8916
    }
8917

    
8918

    
8919
class LUQueryExports(NoHooksLU):
8920
  """Query the exports list
8921

8922
  """
8923
  _OP_REQP = ['nodes']
8924
  REQ_BGL = False
8925

    
8926
  def ExpandNames(self):
8927
    self.needed_locks = {}
8928
    self.share_locks[locking.LEVEL_NODE] = 1
8929
    if not self.op.nodes:
8930
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8931
    else:
8932
      self.needed_locks[locking.LEVEL_NODE] = \
8933
        _GetWantedNodes(self, self.op.nodes)
8934

    
8935
  def CheckPrereq(self):
8936
    """Check prerequisites.
8937

8938
    """
8939
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8940

    
8941
  def Exec(self, feedback_fn):
8942
    """Compute the list of all the exported system images.
8943

8944
    @rtype: dict
8945
    @return: a dictionary with the structure node->(export-list)
8946
        where export-list is a list of the instances exported on
8947
        that node.
8948

8949
    """
8950
    rpcresult = self.rpc.call_export_list(self.nodes)
8951
    result = {}
8952
    for node in rpcresult:
8953
      if rpcresult[node].fail_msg:
8954
        result[node] = False
8955
      else:
8956
        result[node] = rpcresult[node].payload
8957

    
8958
    return result
8959

    
8960

    
8961
class LUPrepareExport(NoHooksLU):
8962
  """Prepares an instance for an export and returns useful information.
8963

8964
  """
8965
  _OP_REQP = ["instance_name", "mode"]
8966
  REQ_BGL = False
8967

    
8968
  def CheckArguments(self):
8969
    """Check the arguments.
8970

8971
    """
8972
    if self.op.mode not in constants.EXPORT_MODES:
8973
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
8974
                                 errors.ECODE_INVAL)
8975

    
8976
  def ExpandNames(self):
8977
    self._ExpandAndLockInstance()
8978

    
8979
  def CheckPrereq(self):
8980
    """Check prerequisites.
8981

8982
    """
8983
    instance_name = self.op.instance_name
8984

    
8985
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8986
    assert self.instance is not None, \
8987
          "Cannot retrieve locked instance %s" % self.op.instance_name
8988
    _CheckNodeOnline(self, self.instance.primary_node)
8989

    
8990
    self._cds = _GetClusterDomainSecret()
8991

    
8992
  def Exec(self, feedback_fn):
8993
    """Prepares an instance for an export.
8994

8995
    """
8996
    instance = self.instance
8997

    
8998
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
8999
      salt = utils.GenerateSecret(8)
9000

    
9001
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9002
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9003
                                              constants.RIE_CERT_VALIDITY)
9004
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9005

    
9006
      (name, cert_pem) = result.payload
9007

    
9008
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9009
                                             cert_pem)
9010

    
9011
      return {
9012
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9013
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9014
                          salt),
9015
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9016
        }
9017

    
9018
    return None
9019

    
9020

    
9021
class LUExportInstance(LogicalUnit):
9022
  """Export an instance to an image in the cluster.
9023

9024
  """
9025
  HPATH = "instance-export"
9026
  HTYPE = constants.HTYPE_INSTANCE
9027
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
9028
  REQ_BGL = False
9029

    
9030
  def CheckArguments(self):
9031
    """Check the arguments.
9032

9033
    """
9034
    _CheckBooleanOpField(self.op, "remove_instance")
9035
    _CheckBooleanOpField(self.op, "ignore_remove_failures")
9036

    
9037
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9038
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
9039
    self.remove_instance = getattr(self.op, "remove_instance", False)
9040
    self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9041
                                          False)
9042
    self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9043
    self.x509_key_name = getattr(self.op, "x509_key_name", None)
9044
    self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9045

    
9046
    if self.remove_instance and not self.op.shutdown:
9047
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9048
                                 " down before")
9049

    
9050
    if self.export_mode not in constants.EXPORT_MODES:
9051
      raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9052
                                 errors.ECODE_INVAL)
9053

    
9054
    if self.export_mode == constants.EXPORT_MODE_REMOTE:
9055
      if not self.x509_key_name:
9056
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9057
                                   errors.ECODE_INVAL)
9058

    
9059
      if not self.dest_x509_ca_pem:
9060
        raise errors.OpPrereqError("Missing destination X509 CA",
9061
                                   errors.ECODE_INVAL)
9062

    
9063
  def ExpandNames(self):
9064
    self._ExpandAndLockInstance()
9065

    
9066
    # Lock all nodes for local exports
9067
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9068
      # FIXME: lock only instance primary and destination node
9069
      #
9070
      # Sad but true, for now we have do lock all nodes, as we don't know where
9071
      # the previous export might be, and in this LU we search for it and
9072
      # remove it from its current node. In the future we could fix this by:
9073
      #  - making a tasklet to search (share-lock all), then create the new one,
9074
      #    then one to remove, after
9075
      #  - removing the removal operation altogether
9076
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9077

    
9078
  def DeclareLocks(self, level):
9079
    """Last minute lock declaration."""
9080
    # All nodes are locked anyway, so nothing to do here.
9081

    
9082
  def BuildHooksEnv(self):
9083
    """Build hooks env.
9084

9085
    This will run on the master, primary node and target node.
9086

9087
    """
9088
    env = {
9089
      "EXPORT_MODE": self.export_mode,
9090
      "EXPORT_NODE": self.op.target_node,
9091
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9092
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9093
      # TODO: Generic function for boolean env variables
9094
      "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9095
      }
9096

    
9097
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9098

    
9099
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9100

    
9101
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9102
      nl.append(self.op.target_node)
9103

    
9104
    return env, nl, nl
9105

    
9106
  def CheckPrereq(self):
9107
    """Check prerequisites.
9108

9109
    This checks that the instance and node names are valid.
9110

9111
    """
9112
    instance_name = self.op.instance_name
9113

    
9114
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9115
    assert self.instance is not None, \
9116
          "Cannot retrieve locked instance %s" % self.op.instance_name
9117
    _CheckNodeOnline(self, self.instance.primary_node)
9118

    
9119
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9120
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9121
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9122
      assert self.dst_node is not None
9123

    
9124
      _CheckNodeOnline(self, self.dst_node.name)
9125
      _CheckNodeNotDrained(self, self.dst_node.name)
9126

    
9127
      self._cds = None
9128
      self.dest_disk_info = None
9129
      self.dest_x509_ca = None
9130

    
9131
    elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9132
      self.dst_node = None
9133

    
9134
      if len(self.op.target_node) != len(self.instance.disks):
9135
        raise errors.OpPrereqError(("Received destination information for %s"
9136
                                    " disks, but instance %s has %s disks") %
9137
                                   (len(self.op.target_node), instance_name,
9138
                                    len(self.instance.disks)),
9139
                                   errors.ECODE_INVAL)
9140

    
9141
      cds = _GetClusterDomainSecret()
9142

    
9143
      # Check X509 key name
9144
      try:
9145
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9146
      except (TypeError, ValueError), err:
9147
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9148

    
9149
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9150
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9151
                                   errors.ECODE_INVAL)
9152

    
9153
      # Load and verify CA
9154
      try:
9155
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9156
      except OpenSSL.crypto.Error, err:
9157
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9158
                                   (err, ), errors.ECODE_INVAL)
9159

    
9160
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9161
      if errcode is not None:
9162
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9163
                                   errors.ECODE_INVAL)
9164

    
9165
      self.dest_x509_ca = cert
9166

    
9167
      # Verify target information
9168
      disk_info = []
9169
      for idx, disk_data in enumerate(self.op.target_node):
9170
        try:
9171
          (host, port, magic) = \
9172
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9173
        except errors.GenericError, err:
9174
          raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9175
                                     errors.ECODE_INVAL)
9176

    
9177
        disk_info.append((host, port, magic))
9178

    
9179
      assert len(disk_info) == len(self.op.target_node)
9180
      self.dest_disk_info = disk_info
9181

    
9182
    else:
9183
      raise errors.ProgrammerError("Unhandled export mode %r" %
9184
                                   self.export_mode)
9185

    
9186
    # instance disk type verification
9187
    # TODO: Implement export support for file-based disks
9188
    for disk in self.instance.disks:
9189
      if disk.dev_type == constants.LD_FILE:
9190
        raise errors.OpPrereqError("Export not supported for instances with"
9191
                                   " file-based disks", errors.ECODE_INVAL)
9192

    
9193
  def _CleanupExports(self, feedback_fn):
9194
    """Removes exports of current instance from all other nodes.
9195

9196
    If an instance in a cluster with nodes A..D was exported to node C, its
9197
    exports will be removed from the nodes A, B and D.
9198

9199
    """
9200
    assert self.export_mode != constants.EXPORT_MODE_REMOTE
9201

    
9202
    nodelist = self.cfg.GetNodeList()
9203
    nodelist.remove(self.dst_node.name)
9204

    
9205
    # on one-node clusters nodelist will be empty after the removal
9206
    # if we proceed the backup would be removed because OpQueryExports
9207
    # substitutes an empty list with the full cluster node list.
9208
    iname = self.instance.name
9209
    if nodelist:
9210
      feedback_fn("Removing old exports for instance %s" % iname)
9211
      exportlist = self.rpc.call_export_list(nodelist)
9212
      for node in exportlist:
9213
        if exportlist[node].fail_msg:
9214
          continue
9215
        if iname in exportlist[node].payload:
9216
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9217
          if msg:
9218
            self.LogWarning("Could not remove older export for instance %s"
9219
                            " on node %s: %s", iname, node, msg)
9220

    
9221
  def Exec(self, feedback_fn):
9222
    """Export an instance to an image in the cluster.
9223

9224
    """
9225
    assert self.export_mode in constants.EXPORT_MODES
9226

    
9227
    instance = self.instance
9228
    src_node = instance.primary_node
9229

    
9230
    if self.op.shutdown:
9231
      # shutdown the instance, but not the disks
9232
      feedback_fn("Shutting down instance %s" % instance.name)
9233
      result = self.rpc.call_instance_shutdown(src_node, instance,
9234
                                               self.shutdown_timeout)
9235
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9236
      result.Raise("Could not shutdown instance %s on"
9237
                   " node %s" % (instance.name, src_node))
9238

    
9239
    # set the disks ID correctly since call_instance_start needs the
9240
    # correct drbd minor to create the symlinks
9241
    for disk in instance.disks:
9242
      self.cfg.SetDiskID(disk, src_node)
9243

    
9244
    activate_disks = (not instance.admin_up)
9245

    
9246
    if activate_disks:
9247
      # Activate the instance disks if we'exporting a stopped instance
9248
      feedback_fn("Activating disks for %s" % instance.name)
9249
      _StartInstanceDisks(self, instance, None)
9250

    
9251
    try:
9252
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9253
                                                     instance)
9254

    
9255
      helper.CreateSnapshots()
9256
      try:
9257
        if (self.op.shutdown and instance.admin_up and
9258
            not self.remove_instance):
9259
          assert not activate_disks
9260
          feedback_fn("Starting instance %s" % instance.name)
9261
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9262
          msg = result.fail_msg
9263
          if msg:
9264
            feedback_fn("Failed to start instance: %s" % msg)
9265
            _ShutdownInstanceDisks(self, instance)
9266
            raise errors.OpExecError("Could not start instance: %s" % msg)
9267

    
9268
        if self.export_mode == constants.EXPORT_MODE_LOCAL:
9269
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9270
        elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9271
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9272
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9273

    
9274
          (key_name, _, _) = self.x509_key_name
9275

    
9276
          dest_ca_pem = \
9277
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9278
                                            self.dest_x509_ca)
9279

    
9280
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9281
                                                     key_name, dest_ca_pem,
9282
                                                     timeouts)
9283
      finally:
9284
        helper.Cleanup()
9285

    
9286
      # Check for backwards compatibility
9287
      assert len(dresults) == len(instance.disks)
9288
      assert compat.all(isinstance(i, bool) for i in dresults), \
9289
             "Not all results are boolean: %r" % dresults
9290

    
9291
    finally:
9292
      if activate_disks:
9293
        feedback_fn("Deactivating disks for %s" % instance.name)
9294
        _ShutdownInstanceDisks(self, instance)
9295

    
9296
    # Remove instance if requested
9297
    if self.remove_instance:
9298
      if not (compat.all(dresults) and fin_resu):
9299
        feedback_fn("Not removing instance %s as parts of the export failed" %
9300
                    instance.name)
9301
      else:
9302
        feedback_fn("Removing instance %s" % instance.name)
9303
        _RemoveInstance(self, feedback_fn, instance,
9304
                        self.ignore_remove_failures)
9305

    
9306
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9307
      self._CleanupExports(feedback_fn)
9308

    
9309
    return fin_resu, dresults
9310

    
9311

    
9312
class LURemoveExport(NoHooksLU):
9313
  """Remove exports related to the named instance.
9314

9315
  """
9316
  _OP_REQP = ["instance_name"]
9317
  REQ_BGL = False
9318

    
9319
  def ExpandNames(self):
9320
    self.needed_locks = {}
9321
    # We need all nodes to be locked in order for RemoveExport to work, but we
9322
    # don't need to lock the instance itself, as nothing will happen to it (and
9323
    # we can remove exports also for a removed instance)
9324
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9325

    
9326
  def CheckPrereq(self):
9327
    """Check prerequisites.
9328
    """
9329
    pass
9330

    
9331
  def Exec(self, feedback_fn):
9332
    """Remove any export.
9333

9334
    """
9335
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9336
    # If the instance was not found we'll try with the name that was passed in.
9337
    # This will only work if it was an FQDN, though.
9338
    fqdn_warn = False
9339
    if not instance_name:
9340
      fqdn_warn = True
9341
      instance_name = self.op.instance_name
9342

    
9343
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9344
    exportlist = self.rpc.call_export_list(locked_nodes)
9345
    found = False
9346
    for node in exportlist:
9347
      msg = exportlist[node].fail_msg
9348
      if msg:
9349
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9350
        continue
9351
      if instance_name in exportlist[node].payload:
9352
        found = True
9353
        result = self.rpc.call_export_remove(node, instance_name)
9354
        msg = result.fail_msg
9355
        if msg:
9356
          logging.error("Could not remove export for instance %s"
9357
                        " on node %s: %s", instance_name, node, msg)
9358

    
9359
    if fqdn_warn and not found:
9360
      feedback_fn("Export not found. If trying to remove an export belonging"
9361
                  " to a deleted instance please use its Fully Qualified"
9362
                  " Domain Name.")
9363

    
9364

    
9365
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9366
  """Generic tags LU.
9367

9368
  This is an abstract class which is the parent of all the other tags LUs.
9369

9370
  """
9371

    
9372
  def ExpandNames(self):
9373
    self.needed_locks = {}
9374
    if self.op.kind == constants.TAG_NODE:
9375
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9376
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9377
    elif self.op.kind == constants.TAG_INSTANCE:
9378
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9379
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9380

    
9381
  def CheckPrereq(self):
9382
    """Check prerequisites.
9383

9384
    """
9385
    if self.op.kind == constants.TAG_CLUSTER:
9386
      self.target = self.cfg.GetClusterInfo()
9387
    elif self.op.kind == constants.TAG_NODE:
9388
      self.target = self.cfg.GetNodeInfo(self.op.name)
9389
    elif self.op.kind == constants.TAG_INSTANCE:
9390
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9391
    else:
9392
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9393
                                 str(self.op.kind), errors.ECODE_INVAL)
9394

    
9395

    
9396
class LUGetTags(TagsLU):
9397
  """Returns the tags of a given object.
9398

9399
  """
9400
  _OP_REQP = ["kind", "name"]
9401
  REQ_BGL = False
9402

    
9403
  def Exec(self, feedback_fn):
9404
    """Returns the tag list.
9405

9406
    """
9407
    return list(self.target.GetTags())
9408

    
9409

    
9410
class LUSearchTags(NoHooksLU):
9411
  """Searches the tags for a given pattern.
9412

9413
  """
9414
  _OP_REQP = ["pattern"]
9415
  REQ_BGL = False
9416

    
9417
  def ExpandNames(self):
9418
    self.needed_locks = {}
9419

    
9420
  def CheckPrereq(self):
9421
    """Check prerequisites.
9422

9423
    This checks the pattern passed for validity by compiling it.
9424

9425
    """
9426
    try:
9427
      self.re = re.compile(self.op.pattern)
9428
    except re.error, err:
9429
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9430
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9431

    
9432
  def Exec(self, feedback_fn):
9433
    """Returns the tag list.
9434

9435
    """
9436
    cfg = self.cfg
9437
    tgts = [("/cluster", cfg.GetClusterInfo())]
9438
    ilist = cfg.GetAllInstancesInfo().values()
9439
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9440
    nlist = cfg.GetAllNodesInfo().values()
9441
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9442
    results = []
9443
    for path, target in tgts:
9444
      for tag in target.GetTags():
9445
        if self.re.search(tag):
9446
          results.append((path, tag))
9447
    return results
9448

    
9449

    
9450
class LUAddTags(TagsLU):
9451
  """Sets a tag on a given object.
9452

9453
  """
9454
  _OP_REQP = ["kind", "name", "tags"]
9455
  REQ_BGL = False
9456

    
9457
  def CheckPrereq(self):
9458
    """Check prerequisites.
9459

9460
    This checks the type and length of the tag name and value.
9461

9462
    """
9463
    TagsLU.CheckPrereq(self)
9464
    for tag in self.op.tags:
9465
      objects.TaggableObject.ValidateTag(tag)
9466

    
9467
  def Exec(self, feedback_fn):
9468
    """Sets the tag.
9469

9470
    """
9471
    try:
9472
      for tag in self.op.tags:
9473
        self.target.AddTag(tag)
9474
    except errors.TagError, err:
9475
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9476
    self.cfg.Update(self.target, feedback_fn)
9477

    
9478

    
9479
class LUDelTags(TagsLU):
9480
  """Delete a list of tags from a given object.
9481

9482
  """
9483
  _OP_REQP = ["kind", "name", "tags"]
9484
  REQ_BGL = False
9485

    
9486
  def CheckPrereq(self):
9487
    """Check prerequisites.
9488

9489
    This checks that we have the given tag.
9490

9491
    """
9492
    TagsLU.CheckPrereq(self)
9493
    for tag in self.op.tags:
9494
      objects.TaggableObject.ValidateTag(tag)
9495
    del_tags = frozenset(self.op.tags)
9496
    cur_tags = self.target.GetTags()
9497
    if not del_tags <= cur_tags:
9498
      diff_tags = del_tags - cur_tags
9499
      diff_names = ["'%s'" % tag for tag in diff_tags]
9500
      diff_names.sort()
9501
      raise errors.OpPrereqError("Tag(s) %s not found" %
9502
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9503

    
9504
  def Exec(self, feedback_fn):
9505
    """Remove the tag from the object.
9506

9507
    """
9508
    for tag in self.op.tags:
9509
      self.target.RemoveTag(tag)
9510
    self.cfg.Update(self.target, feedback_fn)
9511

    
9512

    
9513
class LUTestDelay(NoHooksLU):
9514
  """Sleep for a specified amount of time.
9515

9516
  This LU sleeps on the master and/or nodes for a specified amount of
9517
  time.
9518

9519
  """
9520
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9521
  REQ_BGL = False
9522

    
9523
  def CheckArguments(self):
9524
    # TODO: convert to the type system
9525
    self.op.repeat = getattr(self.op, "repeat", 0)
9526
    if self.op.repeat < 0:
9527
      raise errors.OpPrereqError("Repetition count cannot be negative")
9528

    
9529
  def ExpandNames(self):
9530
    """Expand names and set required locks.
9531

9532
    This expands the node list, if any.
9533

9534
    """
9535
    self.needed_locks = {}
9536
    if self.op.on_nodes:
9537
      # _GetWantedNodes can be used here, but is not always appropriate to use
9538
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9539
      # more information.
9540
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9541
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9542

    
9543
  def CheckPrereq(self):
9544
    """Check prerequisites.
9545

9546
    """
9547

    
9548
  def _TestDelay(self):
9549
    """Do the actual sleep.
9550

9551
    """
9552
    if self.op.on_master:
9553
      if not utils.TestDelay(self.op.duration):
9554
        raise errors.OpExecError("Error during master delay test")
9555
    if self.op.on_nodes:
9556
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9557
      for node, node_result in result.items():
9558
        node_result.Raise("Failure during rpc call to node %s" % node)
9559

    
9560
  def Exec(self, feedback_fn):
9561
    """Execute the test delay opcode, with the wanted repetitions.
9562

9563
    """
9564
    if self.op.repeat == 0:
9565
      self._TestDelay()
9566
    else:
9567
      top_value = self.op.repeat - 1
9568
      for i in range(self.op.repeat):
9569
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9570
        self._TestDelay()
9571

    
9572

    
9573
class IAllocator(object):
9574
  """IAllocator framework.
9575

9576
  An IAllocator instance has three sets of attributes:
9577
    - cfg that is needed to query the cluster
9578
    - input data (all members of the _KEYS class attribute are required)
9579
    - four buffer attributes (in|out_data|text), that represent the
9580
      input (to the external script) in text and data structure format,
9581
      and the output from it, again in two formats
9582
    - the result variables from the script (success, info, nodes) for
9583
      easy usage
9584

9585
  """
9586
  # pylint: disable-msg=R0902
9587
  # lots of instance attributes
9588
  _ALLO_KEYS = [
9589
    "name", "mem_size", "disks", "disk_template",
9590
    "os", "tags", "nics", "vcpus", "hypervisor",
9591
    ]
9592
  _RELO_KEYS = [
9593
    "name", "relocate_from",
9594
    ]
9595
  _EVAC_KEYS = [
9596
    "evac_nodes",
9597
    ]
9598

    
9599
  def __init__(self, cfg, rpc, mode, **kwargs):
9600
    self.cfg = cfg
9601
    self.rpc = rpc
9602
    # init buffer variables
9603
    self.in_text = self.out_text = self.in_data = self.out_data = None
9604
    # init all input fields so that pylint is happy
9605
    self.mode = mode
9606
    self.mem_size = self.disks = self.disk_template = None
9607
    self.os = self.tags = self.nics = self.vcpus = None
9608
    self.hypervisor = None
9609
    self.relocate_from = None
9610
    self.name = None
9611
    self.evac_nodes = None
9612
    # computed fields
9613
    self.required_nodes = None
9614
    # init result fields
9615
    self.success = self.info = self.result = None
9616
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9617
      keyset = self._ALLO_KEYS
9618
      fn = self._AddNewInstance
9619
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9620
      keyset = self._RELO_KEYS
9621
      fn = self._AddRelocateInstance
9622
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9623
      keyset = self._EVAC_KEYS
9624
      fn = self._AddEvacuateNodes
9625
    else:
9626
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9627
                                   " IAllocator" % self.mode)
9628
    for key in kwargs:
9629
      if key not in keyset:
9630
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9631
                                     " IAllocator" % key)
9632
      setattr(self, key, kwargs[key])
9633

    
9634
    for key in keyset:
9635
      if key not in kwargs:
9636
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9637
                                     " IAllocator" % key)
9638
    self._BuildInputData(fn)
9639

    
9640
  def _ComputeClusterData(self):
9641
    """Compute the generic allocator input data.
9642

9643
    This is the data that is independent of the actual operation.
9644

9645
    """
9646
    cfg = self.cfg
9647
    cluster_info = cfg.GetClusterInfo()
9648
    # cluster data
9649
    data = {
9650
      "version": constants.IALLOCATOR_VERSION,
9651
      "cluster_name": cfg.GetClusterName(),
9652
      "cluster_tags": list(cluster_info.GetTags()),
9653
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9654
      # we don't have job IDs
9655
      }
9656
    iinfo = cfg.GetAllInstancesInfo().values()
9657
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9658

    
9659
    # node data
9660
    node_results = {}
9661
    node_list = cfg.GetNodeList()
9662

    
9663
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9664
      hypervisor_name = self.hypervisor
9665
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9666
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9667
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9668
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9669

    
9670
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9671
                                        hypervisor_name)
9672
    node_iinfo = \
9673
      self.rpc.call_all_instances_info(node_list,
9674
                                       cluster_info.enabled_hypervisors)
9675
    for nname, nresult in node_data.items():
9676
      # first fill in static (config-based) values
9677
      ninfo = cfg.GetNodeInfo(nname)
9678
      pnr = {
9679
        "tags": list(ninfo.GetTags()),
9680
        "primary_ip": ninfo.primary_ip,
9681
        "secondary_ip": ninfo.secondary_ip,
9682
        "offline": ninfo.offline,
9683
        "drained": ninfo.drained,
9684
        "master_candidate": ninfo.master_candidate,
9685
        }
9686

    
9687
      if not (ninfo.offline or ninfo.drained):
9688
        nresult.Raise("Can't get data for node %s" % nname)
9689
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9690
                                nname)
9691
        remote_info = nresult.payload
9692

    
9693
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9694
                     'vg_size', 'vg_free', 'cpu_total']:
9695
          if attr not in remote_info:
9696
            raise errors.OpExecError("Node '%s' didn't return attribute"
9697
                                     " '%s'" % (nname, attr))
9698
          if not isinstance(remote_info[attr], int):
9699
            raise errors.OpExecError("Node '%s' returned invalid value"
9700
                                     " for '%s': %s" %
9701
                                     (nname, attr, remote_info[attr]))
9702
        # compute memory used by primary instances
9703
        i_p_mem = i_p_up_mem = 0
9704
        for iinfo, beinfo in i_list:
9705
          if iinfo.primary_node == nname:
9706
            i_p_mem += beinfo[constants.BE_MEMORY]
9707
            if iinfo.name not in node_iinfo[nname].payload:
9708
              i_used_mem = 0
9709
            else:
9710
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9711
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9712
            remote_info['memory_free'] -= max(0, i_mem_diff)
9713

    
9714
            if iinfo.admin_up:
9715
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9716

    
9717
        # compute memory used by instances
9718
        pnr_dyn = {
9719
          "total_memory": remote_info['memory_total'],
9720
          "reserved_memory": remote_info['memory_dom0'],
9721
          "free_memory": remote_info['memory_free'],
9722
          "total_disk": remote_info['vg_size'],
9723
          "free_disk": remote_info['vg_free'],
9724
          "total_cpus": remote_info['cpu_total'],
9725
          "i_pri_memory": i_p_mem,
9726
          "i_pri_up_memory": i_p_up_mem,
9727
          }
9728
        pnr.update(pnr_dyn)
9729

    
9730
      node_results[nname] = pnr
9731
    data["nodes"] = node_results
9732

    
9733
    # instance data
9734
    instance_data = {}
9735
    for iinfo, beinfo in i_list:
9736
      nic_data = []
9737
      for nic in iinfo.nics:
9738
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9739
        nic_dict = {"mac": nic.mac,
9740
                    "ip": nic.ip,
9741
                    "mode": filled_params[constants.NIC_MODE],
9742
                    "link": filled_params[constants.NIC_LINK],
9743
                   }
9744
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9745
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9746
        nic_data.append(nic_dict)
9747
      pir = {
9748
        "tags": list(iinfo.GetTags()),
9749
        "admin_up": iinfo.admin_up,
9750
        "vcpus": beinfo[constants.BE_VCPUS],
9751
        "memory": beinfo[constants.BE_MEMORY],
9752
        "os": iinfo.os,
9753
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9754
        "nics": nic_data,
9755
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9756
        "disk_template": iinfo.disk_template,
9757
        "hypervisor": iinfo.hypervisor,
9758
        }
9759
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9760
                                                 pir["disks"])
9761
      instance_data[iinfo.name] = pir
9762

    
9763
    data["instances"] = instance_data
9764

    
9765
    self.in_data = data
9766

    
9767
  def _AddNewInstance(self):
9768
    """Add new instance data to allocator structure.
9769

9770
    This in combination with _AllocatorGetClusterData will create the
9771
    correct structure needed as input for the allocator.
9772

9773
    The checks for the completeness of the opcode must have already been
9774
    done.
9775

9776
    """
9777
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9778

    
9779
    if self.disk_template in constants.DTS_NET_MIRROR:
9780
      self.required_nodes = 2
9781
    else:
9782
      self.required_nodes = 1
9783
    request = {
9784
      "name": self.name,
9785
      "disk_template": self.disk_template,
9786
      "tags": self.tags,
9787
      "os": self.os,
9788
      "vcpus": self.vcpus,
9789
      "memory": self.mem_size,
9790
      "disks": self.disks,
9791
      "disk_space_total": disk_space,
9792
      "nics": self.nics,
9793
      "required_nodes": self.required_nodes,
9794
      }
9795
    return request
9796

    
9797
  def _AddRelocateInstance(self):
9798
    """Add relocate instance data to allocator structure.
9799

9800
    This in combination with _IAllocatorGetClusterData will create the
9801
    correct structure needed as input for the allocator.
9802

9803
    The checks for the completeness of the opcode must have already been
9804
    done.
9805

9806
    """
9807
    instance = self.cfg.GetInstanceInfo(self.name)
9808
    if instance is None:
9809
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9810
                                   " IAllocator" % self.name)
9811

    
9812
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9813
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9814
                                 errors.ECODE_INVAL)
9815

    
9816
    if len(instance.secondary_nodes) != 1:
9817
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9818
                                 errors.ECODE_STATE)
9819

    
9820
    self.required_nodes = 1
9821
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9822
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9823

    
9824
    request = {
9825
      "name": self.name,
9826
      "disk_space_total": disk_space,
9827
      "required_nodes": self.required_nodes,
9828
      "relocate_from": self.relocate_from,
9829
      }
9830
    return request
9831

    
9832
  def _AddEvacuateNodes(self):
9833
    """Add evacuate nodes data to allocator structure.
9834

9835
    """
9836
    request = {
9837
      "evac_nodes": self.evac_nodes
9838
      }
9839
    return request
9840

    
9841
  def _BuildInputData(self, fn):
9842
    """Build input data structures.
9843

9844
    """
9845
    self._ComputeClusterData()
9846

    
9847
    request = fn()
9848
    request["type"] = self.mode
9849
    self.in_data["request"] = request
9850

    
9851
    self.in_text = serializer.Dump(self.in_data)
9852

    
9853
  def Run(self, name, validate=True, call_fn=None):
9854
    """Run an instance allocator and return the results.
9855

9856
    """
9857
    if call_fn is None:
9858
      call_fn = self.rpc.call_iallocator_runner
9859

    
9860
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9861
    result.Raise("Failure while running the iallocator script")
9862

    
9863
    self.out_text = result.payload
9864
    if validate:
9865
      self._ValidateResult()
9866

    
9867
  def _ValidateResult(self):
9868
    """Process the allocator results.
9869

9870
    This will process and if successful save the result in
9871
    self.out_data and the other parameters.
9872

9873
    """
9874
    try:
9875
      rdict = serializer.Load(self.out_text)
9876
    except Exception, err:
9877
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9878

    
9879
    if not isinstance(rdict, dict):
9880
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9881

    
9882
    # TODO: remove backwards compatiblity in later versions
9883
    if "nodes" in rdict and "result" not in rdict:
9884
      rdict["result"] = rdict["nodes"]
9885
      del rdict["nodes"]
9886

    
9887
    for key in "success", "info", "result":
9888
      if key not in rdict:
9889
        raise errors.OpExecError("Can't parse iallocator results:"
9890
                                 " missing key '%s'" % key)
9891
      setattr(self, key, rdict[key])
9892

    
9893
    if not isinstance(rdict["result"], list):
9894
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9895
                               " is not a list")
9896
    self.out_data = rdict
9897

    
9898

    
9899
class LUTestAllocator(NoHooksLU):
9900
  """Run allocator tests.
9901

9902
  This LU runs the allocator tests
9903

9904
  """
9905
  _OP_REQP = ["direction", "mode", "name"]
9906

    
9907
  def CheckPrereq(self):
9908
    """Check prerequisites.
9909

9910
    This checks the opcode parameters depending on the director and mode test.
9911

9912
    """
9913
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9914
      for attr in ["name", "mem_size", "disks", "disk_template",
9915
                   "os", "tags", "nics", "vcpus"]:
9916
        if not hasattr(self.op, attr):
9917
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9918
                                     attr, errors.ECODE_INVAL)
9919
      iname = self.cfg.ExpandInstanceName(self.op.name)
9920
      if iname is not None:
9921
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9922
                                   iname, errors.ECODE_EXISTS)
9923
      if not isinstance(self.op.nics, list):
9924
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9925
                                   errors.ECODE_INVAL)
9926
      for row in self.op.nics:
9927
        if (not isinstance(row, dict) or
9928
            "mac" not in row or
9929
            "ip" not in row or
9930
            "bridge" not in row):
9931
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9932
                                     " parameter", errors.ECODE_INVAL)
9933
      if not isinstance(self.op.disks, list):
9934
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9935
                                   errors.ECODE_INVAL)
9936
      for row in self.op.disks:
9937
        if (not isinstance(row, dict) or
9938
            "size" not in row or
9939
            not isinstance(row["size"], int) or
9940
            "mode" not in row or
9941
            row["mode"] not in ['r', 'w']):
9942
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9943
                                     " parameter", errors.ECODE_INVAL)
9944
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9945
        self.op.hypervisor = self.cfg.GetHypervisorType()
9946
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9947
      if not hasattr(self.op, "name"):
9948
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9949
                                   errors.ECODE_INVAL)
9950
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9951
      self.op.name = fname
9952
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9953
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9954
      if not hasattr(self.op, "evac_nodes"):
9955
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9956
                                   " opcode input", errors.ECODE_INVAL)
9957
    else:
9958
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9959
                                 self.op.mode, errors.ECODE_INVAL)
9960

    
9961
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9962
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9963
        raise errors.OpPrereqError("Missing allocator name",
9964
                                   errors.ECODE_INVAL)
9965
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9966
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9967
                                 self.op.direction, errors.ECODE_INVAL)
9968

    
9969
  def Exec(self, feedback_fn):
9970
    """Run the allocator test.
9971

9972
    """
9973
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9974
      ial = IAllocator(self.cfg, self.rpc,
9975
                       mode=self.op.mode,
9976
                       name=self.op.name,
9977
                       mem_size=self.op.mem_size,
9978
                       disks=self.op.disks,
9979
                       disk_template=self.op.disk_template,
9980
                       os=self.op.os,
9981
                       tags=self.op.tags,
9982
                       nics=self.op.nics,
9983
                       vcpus=self.op.vcpus,
9984
                       hypervisor=self.op.hypervisor,
9985
                       )
9986
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9987
      ial = IAllocator(self.cfg, self.rpc,
9988
                       mode=self.op.mode,
9989
                       name=self.op.name,
9990
                       relocate_from=list(self.relocate_from),
9991
                       )
9992
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9993
      ial = IAllocator(self.cfg, self.rpc,
9994
                       mode=self.op.mode,
9995
                       evac_nodes=self.op.evac_nodes)
9996
    else:
9997
      raise errors.ProgrammerError("Uncatched mode %s in"
9998
                                   " LUTestAllocator.Exec", self.op.mode)
9999

    
10000
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10001
      result = ial.in_text
10002
    else:
10003
      ial.Run(self.op.allocator, validate=False)
10004
      result = ial.out_text
10005
    return result