Revision 22b7f6f8

b/Makefile.am
314 314
	lib/cmdlib/cluster.py \
315 315
	lib/cmdlib/group.py \
316 316
	lib/cmdlib/node.py \
317
	lib/cmdlib/instance.py \
318
	lib/cmdlib/instance_utils.py \
317 319
	lib/cmdlib/tags.py \
318 320
	lib/cmdlib/network.py \
319 321
	lib/cmdlib/test.py
b/lib/cmdlib/__init__.py
28 28

  
29 29
# C0302: since we have waaaay too many lines in this module
30 30

  
31
import os
32 31
import time
33 32
import logging
34
import copy
35 33
import OpenSSL
36
import itertools
37
import operator
38 34

  
39 35
from ganeti import utils
40 36
from ganeti import errors
41
from ganeti import hypervisor
42 37
from ganeti import locking
43 38
from ganeti import constants
44
from ganeti import objects
45 39
from ganeti import compat
46 40
from ganeti import masterd
47
from ganeti import netutils
48 41
from ganeti import query
49 42
from ganeti import qlang
50
from ganeti import opcodes
51
from ganeti import ht
52
from ganeti import rpc
53
from ganeti import pathutils
54
from ganeti import network
55
from ganeti.masterd import iallocator
56 43

  
57 44
from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
58 45
  Tasklet, _QueryBase
......
69 56
  _CheckInstancesNodeGroups, _LoadNodeEvacResult, _MapInstanceDisksToNodes, \
70 57
  _CheckInstanceNodeGroups, _CheckParamsNotGlobal, \
71 58
  _IsExclusiveStorageEnabledNode, _CheckInstanceState, \
72
  _CheckIAllocatorOrNode, _FindFaultyInstanceDisks
59
  _CheckIAllocatorOrNode, _FindFaultyInstanceDisks, _CheckNodeOnline
60
from ganeti.cmdlib.instance_utils import _AssembleInstanceDisks, \
61
  _BuildInstanceHookEnvByObject, _GetClusterDomainSecret, \
62
  _CheckNodeNotDrained, _RemoveDisks, _ShutdownInstanceDisks, \
63
  _StartInstanceDisks, _RemoveInstance
73 64

  
74 65
from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
75 66
  LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
......
84 75
  LUNodePowercycle, LUNodeEvacuate, LUNodeMigrate, LUNodeModifyStorage, \
85 76
  _NodeQuery, LUNodeQuery, LUNodeQueryvols, LUNodeQueryStorage, \
86 77
  LUNodeRemove, LURepairNodeStorage
78
from ganeti.cmdlib.instance import LUInstanceCreate, LUInstanceRename, \
79
  LUInstanceRemove, LUInstanceMove, _InstanceQuery, LUInstanceQuery, \
80
  LUInstanceQueryData, LUInstanceRecreateDisks, LUInstanceGrowDisk, \
81
  LUInstanceReplaceDisks, LUInstanceActivateDisks, \
82
  LUInstanceDeactivateDisks, LUInstanceStartup, LUInstanceShutdown, \
83
  LUInstanceReinstall, LUInstanceReboot, LUInstanceConsole, \
84
  LUInstanceFailover, LUInstanceMigrate, LUInstanceMultiAlloc, \
85
  LUInstanceSetParams, LUInstanceChangeGroup
87 86
from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
88 87
from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
89 88
  LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
90 89
  LUNetworkDisconnect
91 90
from ganeti.cmdlib.test import LUTestDelay, LUTestJqueue, LUTestAllocator
92 91

  
93
import ganeti.masterd.instance # pylint: disable=W0611
94

  
95

  
96
def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
97
  """Whether exclusive_storage is in effect for the given node.
98

  
99
  @type cfg: L{config.ConfigWriter}
100
  @param cfg: The cluster configuration
101
  @type nodename: string
102
  @param nodename: The node
103
  @rtype: bool
104
  @return: The effective value of exclusive_storage
105
  @raise errors.OpPrereqError: if no node exists with the given name
106

  
107
  """
108
  ni = cfg.GetNodeInfo(nodename)
109
  if ni is None:
110
    raise errors.OpPrereqError("Invalid node name %s" % nodename,
111
                               errors.ECODE_NOENT)
112
  return _IsExclusiveStorageEnabledNode(cfg, ni)
113

  
114

  
115
def _CopyLockList(names):
116
  """Makes a copy of a list of lock names.
117

  
118
  Handles L{locking.ALL_SET} correctly.
119

  
120
  """
121
  if names == locking.ALL_SET:
122
    return locking.ALL_SET
123
  else:
124
    return names[:]
125

  
126

  
127
def _ReleaseLocks(lu, level, names=None, keep=None):
128
  """Releases locks owned by an LU.
129

  
130
  @type lu: L{LogicalUnit}
131
  @param level: Lock level
132
  @type names: list or None
133
  @param names: Names of locks to release
134
  @type keep: list or None
135
  @param keep: Names of locks to retain
136

  
137
  """
138
  assert not (keep is not None and names is not None), \
139
         "Only one of the 'names' and the 'keep' parameters can be given"
140

  
141
  if names is not None:
142
    should_release = names.__contains__
143
  elif keep:
144
    should_release = lambda name: name not in keep
145
  else:
146
    should_release = None
147

  
148
  owned = lu.owned_locks(level)
149
  if not owned:
150
    # Not owning any lock at this level, do nothing
151
    pass
152

  
153
  elif should_release:
154
    retain = []
155
    release = []
156

  
157
    # Determine which locks to release
158
    for name in owned:
159
      if should_release(name):
160
        release.append(name)
161
      else:
162
        retain.append(name)
163

  
164
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
165

  
166
    # Release just some locks
167
    lu.glm.release(level, names=release)
168

  
169
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
170
  else:
171
    # Release everything
172
    lu.glm.release(level)
173

  
174
    assert not lu.glm.is_owned(level), "No locks should be owned"
175

  
176

  
177
def _CheckNodeOnline(lu, node, msg=None):
178
  """Ensure that a given node is online.
179

  
180
  @param lu: the LU on behalf of which we make the check
181
  @param node: the node to check
182
  @param msg: if passed, should be a message to replace the default one
183
  @raise errors.OpPrereqError: if the node is offline
184

  
185
  """
186
  if msg is None:
187
    msg = "Can't use offline node"
188
  if lu.cfg.GetNodeInfo(node).offline:
189
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
190

  
191

  
192
def _CheckNodeNotDrained(lu, node):
193
  """Ensure that a given node is not drained.
194

  
195
  @param lu: the LU on behalf of which we make the check
196
  @param node: the node to check
197
  @raise errors.OpPrereqError: if the node is drained
198

  
199
  """
200
  if lu.cfg.GetNodeInfo(node).drained:
201
    raise errors.OpPrereqError("Can't use drained node %s" % node,
202
                               errors.ECODE_STATE)
203

  
204

  
205
def _CheckNodeVmCapable(lu, node):
206
  """Ensure that a given node is vm capable.
207

  
208
  @param lu: the LU on behalf of which we make the check
209
  @param node: the node to check
210
  @raise errors.OpPrereqError: if the node is not vm capable
211

  
212
  """
213
  if not lu.cfg.GetNodeInfo(node).vm_capable:
214
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
215
                               errors.ECODE_STATE)
216

  
217

  
218
def _CheckNodeHasOS(lu, node, os_name, force_variant):
219
  """Ensure that a node supports a given OS.
220

  
221
  @param lu: the LU on behalf of which we make the check
222
  @param node: the node to check
223
  @param os_name: the OS to query about
224
  @param force_variant: whether to ignore variant errors
225
  @raise errors.OpPrereqError: if the node is not supporting the OS
226

  
227
  """
228
  result = lu.rpc.call_os_get(node, os_name)
229
  result.Raise("OS '%s' not in supported OS list for node %s" %
230
               (os_name, node),
231
               prereq=True, ecode=errors.ECODE_INVAL)
232
  if not force_variant:
233
    _CheckOSVariant(result.payload, os_name)
234

  
235

  
236
def _GetClusterDomainSecret():
237
  """Reads the cluster domain secret.
238

  
239
  """
240
  return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
241
                               strict=True)
242

  
243

  
244
def _ComputeIPolicyInstanceSpecViolation(
245
  ipolicy, instance_spec, disk_template,
246
  _compute_fn=_ComputeIPolicySpecViolation):
247
  """Compute if instance specs meets the specs of ipolicy.
248

  
249
  @type ipolicy: dict
250
  @param ipolicy: The ipolicy to verify against
251
  @param instance_spec: dict
252
  @param instance_spec: The instance spec to verify
253
  @type disk_template: string
254
  @param disk_template: the disk template of the instance
255
  @param _compute_fn: The function to verify ipolicy (unittest only)
256
  @see: L{_ComputeIPolicySpecViolation}
257

  
258
  """
259
  mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
260
  cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
261
  disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
262
  disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
263
  nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
264
  spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
265

  
266
  return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
267
                     disk_sizes, spindle_use, disk_template)
268

  
269

  
270
def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
271
                                 target_group, cfg,
272
                                 _compute_fn=_ComputeIPolicyInstanceViolation):
273
  """Compute if instance meets the specs of the new target group.
274

  
275
  @param ipolicy: The ipolicy to verify
276
  @param instance: The instance object to verify
277
  @param current_group: The current group of the instance
278
  @param target_group: The new group of the instance
279
  @type cfg: L{config.ConfigWriter}
280
  @param cfg: Cluster configuration
281
  @param _compute_fn: The function to verify ipolicy (unittest only)
282
  @see: L{_ComputeIPolicySpecViolation}
283

  
284
  """
285
  if current_group == target_group:
286
    return []
287
  else:
288
    return _compute_fn(ipolicy, instance, cfg)
289

  
290

  
291
def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
292
                            _compute_fn=_ComputeIPolicyNodeViolation):
293
  """Checks that the target node is correct in terms of instance policy.
294

  
295
  @param ipolicy: The ipolicy to verify
296
  @param instance: The instance object to verify
297
  @param node: The new node to relocate
298
  @type cfg: L{config.ConfigWriter}
299
  @param cfg: Cluster configuration
300
  @param ignore: Ignore violations of the ipolicy
301
  @param _compute_fn: The function to verify ipolicy (unittest only)
302
  @see: L{_ComputeIPolicySpecViolation}
303

  
304
  """
305
  primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
306
  res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
307

  
308
  if res:
309
    msg = ("Instance does not meet target node group's (%s) instance"
310
           " policy: %s") % (node.group, utils.CommaJoin(res))
311
    if ignore:
312
      lu.LogWarning(msg)
313
    else:
314
      raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
315

  
316

  
317
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
318
                          minmem, maxmem, vcpus, nics, disk_template, disks,
319
                          bep, hvp, hypervisor_name, tags):
320
  """Builds instance related env variables for hooks
321

  
322
  This builds the hook environment from individual variables.
323

  
324
  @type name: string
325
  @param name: the name of the instance
326
  @type primary_node: string
327
  @param primary_node: the name of the instance's primary node
328
  @type secondary_nodes: list
329
  @param secondary_nodes: list of secondary nodes as strings
330
  @type os_type: string
331
  @param os_type: the name of the instance's OS
332
  @type status: string
333
  @param status: the desired status of the instance
334
  @type minmem: string
335
  @param minmem: the minimum memory size of the instance
336
  @type maxmem: string
337
  @param maxmem: the maximum memory size of the instance
338
  @type vcpus: string
339
  @param vcpus: the count of VCPUs the instance has
340
  @type nics: list
341
  @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
342
      representing the NICs the instance has
343
  @type disk_template: string
344
  @param disk_template: the disk template of the instance
345
  @type disks: list
346
  @param disks: list of tuples (name, uuid, size, mode)
347
  @type bep: dict
348
  @param bep: the backend parameters for the instance
349
  @type hvp: dict
350
  @param hvp: the hypervisor parameters for the instance
351
  @type hypervisor_name: string
352
  @param hypervisor_name: the hypervisor for the instance
353
  @type tags: list
354
  @param tags: list of instance tags as strings
355
  @rtype: dict
356
  @return: the hook environment for this instance
357

  
358
  """
359
  env = {
360
    "OP_TARGET": name,
361
    "INSTANCE_NAME": name,
362
    "INSTANCE_PRIMARY": primary_node,
363
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
364
    "INSTANCE_OS_TYPE": os_type,
365
    "INSTANCE_STATUS": status,
366
    "INSTANCE_MINMEM": minmem,
367
    "INSTANCE_MAXMEM": maxmem,
368
    # TODO(2.9) remove deprecated "memory" value
369
    "INSTANCE_MEMORY": maxmem,
370
    "INSTANCE_VCPUS": vcpus,
371
    "INSTANCE_DISK_TEMPLATE": disk_template,
372
    "INSTANCE_HYPERVISOR": hypervisor_name,
373
  }
374
  if nics:
375
    nic_count = len(nics)
376
    for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
377
      if ip is None:
378
        ip = ""
379
      env["INSTANCE_NIC%d_NAME" % idx] = name
380
      env["INSTANCE_NIC%d_IP" % idx] = ip
381
      env["INSTANCE_NIC%d_MAC" % idx] = mac
382
      env["INSTANCE_NIC%d_MODE" % idx] = mode
383
      env["INSTANCE_NIC%d_LINK" % idx] = link
384
      if netinfo:
385
        nobj = objects.Network.FromDict(netinfo)
386
        env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
387
      elif network:
388
        # FIXME: broken network reference: the instance NIC specifies a
389
        # network, but the relevant network entry was not in the config. This
390
        # should be made impossible.
391
        env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
392
      if mode == constants.NIC_MODE_BRIDGED:
393
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
394
  else:
395
    nic_count = 0
396

  
397
  env["INSTANCE_NIC_COUNT"] = nic_count
398

  
399
  if disks:
400
    disk_count = len(disks)
401
    for idx, (name, size, mode) in enumerate(disks):
402
      env["INSTANCE_DISK%d_NAME" % idx] = name
403
      env["INSTANCE_DISK%d_SIZE" % idx] = size
404
      env["INSTANCE_DISK%d_MODE" % idx] = mode
405
  else:
406
    disk_count = 0
407

  
408
  env["INSTANCE_DISK_COUNT"] = disk_count
409

  
410
  if not tags:
411
    tags = []
412

  
413
  env["INSTANCE_TAGS"] = " ".join(tags)
414

  
415
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
416
    for key, value in source.items():
417
      env["INSTANCE_%s_%s" % (kind, key)] = value
418

  
419
  return env
420

  
421

  
422
def _NICToTuple(lu, nic):
423
  """Build a tupple of nic information.
424

  
425
  @type lu:  L{LogicalUnit}
426
  @param lu: the logical unit on whose behalf we execute
427
  @type nic: L{objects.NIC}
428
  @param nic: nic to convert to hooks tuple
429

  
430
  """
431
  cluster = lu.cfg.GetClusterInfo()
432
  filled_params = cluster.SimpleFillNIC(nic.nicparams)
433
  mode = filled_params[constants.NIC_MODE]
434
  link = filled_params[constants.NIC_LINK]
435
  netinfo = None
436
  if nic.network:
437
    nobj = lu.cfg.GetNetwork(nic.network)
438
    netinfo = objects.Network.ToDict(nobj)
439
  return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
440

  
441

  
442
def _NICListToTuple(lu, nics):
443
  """Build a list of nic information tuples.
444

  
445
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
446
  value in LUInstanceQueryData.
447

  
448
  @type lu:  L{LogicalUnit}
449
  @param lu: the logical unit on whose behalf we execute
450
  @type nics: list of L{objects.NIC}
451
  @param nics: list of nics to convert to hooks tuples
452

  
453
  """
454
  hooks_nics = []
455
  for nic in nics:
456
    hooks_nics.append(_NICToTuple(lu, nic))
457
  return hooks_nics
458

  
459

  
460
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
461
  """Builds instance related env variables for hooks from an object.
462

  
463
  @type lu: L{LogicalUnit}
464
  @param lu: the logical unit on whose behalf we execute
465
  @type instance: L{objects.Instance}
466
  @param instance: the instance for which we should build the
467
      environment
468
  @type override: dict
469
  @param override: dictionary with key/values that will override
470
      our values
471
  @rtype: dict
472
  @return: the hook environment dictionary
473

  
474
  """
475
  cluster = lu.cfg.GetClusterInfo()
476
  bep = cluster.FillBE(instance)
477
  hvp = cluster.FillHV(instance)
478
  args = {
479
    "name": instance.name,
480
    "primary_node": instance.primary_node,
481
    "secondary_nodes": instance.secondary_nodes,
482
    "os_type": instance.os,
483
    "status": instance.admin_state,
484
    "maxmem": bep[constants.BE_MAXMEM],
485
    "minmem": bep[constants.BE_MINMEM],
486
    "vcpus": bep[constants.BE_VCPUS],
487
    "nics": _NICListToTuple(lu, instance.nics),
488
    "disk_template": instance.disk_template,
489
    "disks": [(disk.name, disk.size, disk.mode)
490
              for disk in instance.disks],
491
    "bep": bep,
492
    "hvp": hvp,
493
    "hypervisor_name": instance.hypervisor,
494
    "tags": instance.tags,
495
  }
496
  if override:
497
    args.update(override)
498
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
499

  
500

  
501
def _CheckNicsBridgesExist(lu, target_nics, target_node):
502
  """Check that the brigdes needed by a list of nics exist.
503

  
504
  """
505
  cluster = lu.cfg.GetClusterInfo()
506
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
507
  brlist = [params[constants.NIC_LINK] for params in paramslist
508
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
509
  if brlist:
510
    result = lu.rpc.call_bridges_exist(target_node, brlist)
511
    result.Raise("Error checking bridges on destination node '%s'" %
512
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
513

  
514

  
515
def _CheckInstanceBridgesExist(lu, instance, node=None):
516
  """Check that the brigdes needed by an instance exist.
517

  
518
  """
519
  if node is None:
520
    node = instance.primary_node
521
  _CheckNicsBridgesExist(lu, instance.nics, node)
522

  
523

  
524
def _CheckOSVariant(os_obj, name):
525
  """Check whether an OS name conforms to the os variants specification.
526

  
527
  @type os_obj: L{objects.OS}
528
  @param os_obj: OS object to check
529
  @type name: string
530
  @param name: OS name passed by the user, to check for validity
531

  
532
  """
533
  variant = objects.OS.GetVariant(name)
534
  if not os_obj.supported_variants:
535
    if variant:
536
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
537
                                 " passed)" % (os_obj.name, variant),
538
                                 errors.ECODE_INVAL)
539
    return
540
  if not variant:
541
    raise errors.OpPrereqError("OS name must include a variant",
542
                               errors.ECODE_INVAL)
543

  
544
  if variant not in os_obj.supported_variants:
545
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
546

  
547

  
548
def _CheckHostnameSane(lu, name):
549
  """Ensures that a given hostname resolves to a 'sane' name.
550

  
551
  The given name is required to be a prefix of the resolved hostname,
552
  to prevent accidental mismatches.
553

  
554
  @param lu: the logical unit on behalf of which we're checking
555
  @param name: the name we should resolve and check
556
  @return: the resolved hostname object
557

  
558
  """
559
  hostname = netutils.GetHostname(name=name)
560
  if hostname.name != name:
561
    lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
562
  if not utils.MatchNameComponent(name, [hostname.name]):
563
    raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
564
                                " same as given hostname '%s'") %
565
                                (hostname.name, name), errors.ECODE_INVAL)
566
  return hostname
567

  
568

  
569
def _WaitForSync(lu, instance, disks=None, oneshot=False):
570
  """Sleep and poll for an instance's disk to sync.
571

  
572
  """
573
  if not instance.disks or disks is not None and not disks:
574
    return True
575

  
576
  disks = _ExpandCheckDisks(instance, disks)
577

  
578
  if not oneshot:
579
    lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
580

  
581
  node = instance.primary_node
582

  
583
  for dev in disks:
584
    lu.cfg.SetDiskID(dev, node)
585

  
586
  # TODO: Convert to utils.Retry
587

  
588
  retries = 0
589
  degr_retries = 10 # in seconds, as we sleep 1 second each time
590
  while True:
591
    max_time = 0
592
    done = True
593
    cumul_degraded = False
594
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
595
    msg = rstats.fail_msg
596
    if msg:
597
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
598
      retries += 1
599
      if retries >= 10:
600
        raise errors.RemoteError("Can't contact node %s for mirror data,"
601
                                 " aborting." % node)
602
      time.sleep(6)
603
      continue
604
    rstats = rstats.payload
605
    retries = 0
606
    for i, mstat in enumerate(rstats):
607
      if mstat is None:
608
        lu.LogWarning("Can't compute data for node %s/%s",
609
                           node, disks[i].iv_name)
610
        continue
611

  
612
      cumul_degraded = (cumul_degraded or
613
                        (mstat.is_degraded and mstat.sync_percent is None))
614
      if mstat.sync_percent is not None:
615
        done = False
616
        if mstat.estimated_time is not None:
617
          rem_time = ("%s remaining (estimated)" %
618
                      utils.FormatSeconds(mstat.estimated_time))
619
          max_time = mstat.estimated_time
620
        else:
621
          rem_time = "no time estimate"
622
        lu.LogInfo("- device %s: %5.2f%% done, %s",
623
                   disks[i].iv_name, mstat.sync_percent, rem_time)
624

  
625
    # if we're done but degraded, let's do a few small retries, to
626
    # make sure we see a stable and not transient situation; therefore
627
    # we force restart of the loop
628
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
629
      logging.info("Degraded disks found, %d retries left", degr_retries)
630
      degr_retries -= 1
631
      time.sleep(1)
632
      continue
633

  
634
    if done or oneshot:
635
      break
636

  
637
    time.sleep(min(60, max_time))
638

  
639
  if done:
640
    lu.LogInfo("Instance %s's disks are in sync", instance.name)
641

  
642
  return not cumul_degraded
643

  
644

  
645
def _BlockdevFind(lu, node, dev, instance):
646
  """Wrapper around call_blockdev_find to annotate diskparams.
647

  
648
  @param lu: A reference to the lu object
649
  @param node: The node to call out
650
  @param dev: The device to find
651
  @param instance: The instance object the device belongs to
652
  @returns The result of the rpc call
653

  
654
  """
655
  (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
656
  return lu.rpc.call_blockdev_find(node, disk)
657

  
658

  
659
def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
660
  """Wrapper around L{_CheckDiskConsistencyInner}.
661

  
662
  """
663
  (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
664
  return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
665
                                    ldisk=ldisk)
666

  
667

  
668
def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
669
                               ldisk=False):
670
  """Check that mirrors are not degraded.
671

  
672
  @attention: The device has to be annotated already.
673

  
674
  The ldisk parameter, if True, will change the test from the
675
  is_degraded attribute (which represents overall non-ok status for
676
  the device(s)) to the ldisk (representing the local storage status).
677

  
678
  """
679
  lu.cfg.SetDiskID(dev, node)
680

  
681
  result = True
682

  
683
  if on_primary or dev.AssembleOnSecondary():
684
    rstats = lu.rpc.call_blockdev_find(node, dev)
685
    msg = rstats.fail_msg
686
    if msg:
687
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
688
      result = False
689
    elif not rstats.payload:
690
      lu.LogWarning("Can't find disk on node %s", node)
691
      result = False
692
    else:
693
      if ldisk:
694
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
695
      else:
696
        result = result and not rstats.payload.is_degraded
697

  
698
  if dev.children:
699
    for child in dev.children:
700
      result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
701
                                                     on_primary)
702

  
703
  return result
704

  
705 92

  
706 93
class LUOobCommand(NoHooksLU):
707 94
  """Logical unit for OOB handling.
......
1001 388

  
1002 389
      data[os_name] = info
1003 390

  
1004
    # Prepare data in requested order
1005
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1006
            if name in data]
1007

  
1008

  
1009
class LUOsDiagnose(NoHooksLU):
1010
  """Logical unit for OS diagnose/query.
1011

  
1012
  """
1013
  REQ_BGL = False
1014

  
1015
  @staticmethod
1016
  def _BuildFilter(fields, names):
1017
    """Builds a filter for querying OSes.
1018

  
1019
    """
1020
    name_filter = qlang.MakeSimpleFilter("name", names)
1021

  
1022
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
1023
    # respective field is not requested
1024
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
1025
                     for fname in ["hidden", "blacklisted"]
1026
                     if fname not in fields]
1027
    if "valid" not in fields:
1028
      status_filter.append([qlang.OP_TRUE, "valid"])
1029

  
1030
    if status_filter:
1031
      status_filter.insert(0, qlang.OP_AND)
1032
    else:
1033
      status_filter = None
1034

  
1035
    if name_filter and status_filter:
1036
      return [qlang.OP_AND, name_filter, status_filter]
1037
    elif name_filter:
1038
      return name_filter
1039
    else:
1040
      return status_filter
1041

  
1042
  def CheckArguments(self):
1043
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
1044
                       self.op.output_fields, False)
1045

  
1046
  def ExpandNames(self):
1047
    self.oq.ExpandNames(self)
1048

  
1049
  def Exec(self, feedback_fn):
1050
    return self.oq.OldStyleQuery(self)
1051

  
1052

  
1053
class _ExtStorageQuery(_QueryBase):
1054
  FIELDS = query.EXTSTORAGE_FIELDS
1055

  
1056
  def ExpandNames(self, lu):
1057
    # Lock all nodes in shared mode
1058
    # Temporary removal of locks, should be reverted later
1059
    # TODO: reintroduce locks when they are lighter-weight
1060
    lu.needed_locks = {}
1061
    #self.share_locks[locking.LEVEL_NODE] = 1
1062
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1063

  
1064
    # The following variables interact with _QueryBase._GetNames
1065
    if self.names:
1066
      self.wanted = self.names
1067
    else:
1068
      self.wanted = locking.ALL_SET
1069

  
1070
    self.do_locking = self.use_locking
1071

  
1072
  def DeclareLocks(self, lu, level):
1073
    pass
1074

  
1075
  @staticmethod
1076
  def _DiagnoseByProvider(rlist):
1077
    """Remaps a per-node return list into an a per-provider per-node dictionary
1078

  
1079
    @param rlist: a map with node names as keys and ExtStorage objects as values
1080

  
1081
    @rtype: dict
1082
    @return: a dictionary with extstorage providers as keys and as
1083
        value another map, with nodes as keys and tuples of
1084
        (path, status, diagnose, parameters) as values, eg::
1085

  
1086
          {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
1087
                         "node2": [(/srv/..., False, "missing file")]
1088
                         "node3": [(/srv/..., True, "", [])]
1089
          }
1090

  
1091
    """
1092
    all_es = {}
1093
    # we build here the list of nodes that didn't fail the RPC (at RPC
1094
    # level), so that nodes with a non-responding node daemon don't
1095
    # make all OSes invalid
1096
    good_nodes = [node_name for node_name in rlist
1097
                  if not rlist[node_name].fail_msg]
1098
    for node_name, nr in rlist.items():
1099
      if nr.fail_msg or not nr.payload:
1100
        continue
1101
      for (name, path, status, diagnose, params) in nr.payload:
1102
        if name not in all_es:
1103
          # build a list of nodes for this os containing empty lists
1104
          # for each node in node_list
1105
          all_es[name] = {}
1106
          for nname in good_nodes:
1107
            all_es[name][nname] = []
1108
        # convert params from [name, help] to (name, help)
1109
        params = [tuple(v) for v in params]
1110
        all_es[name][node_name].append((path, status, diagnose, params))
1111
    return all_es
1112

  
1113
  def _GetQueryData(self, lu):
1114
    """Computes the list of nodes and their attributes.
1115

  
1116
    """
1117
    # Locking is not used
1118
    assert not (compat.any(lu.glm.is_owned(level)
1119
                           for level in locking.LEVELS
1120
                           if level != locking.LEVEL_CLUSTER) or
1121
                self.do_locking or self.use_locking)
1122

  
1123
    valid_nodes = [node.name
1124
                   for node in lu.cfg.GetAllNodesInfo().values()
1125
                   if not node.offline and node.vm_capable]
1126
    pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
1127

  
1128
    data = {}
1129

  
1130
    nodegroup_list = lu.cfg.GetNodeGroupList()
1131

  
1132
    for (es_name, es_data) in pol.items():
1133
      # For every provider compute the nodegroup validity.
1134
      # To do this we need to check the validity of each node in es_data
1135
      # and then construct the corresponding nodegroup dict:
1136
      #      { nodegroup1: status
1137
      #        nodegroup2: status
1138
      #      }
1139
      ndgrp_data = {}
1140
      for nodegroup in nodegroup_list:
1141
        ndgrp = lu.cfg.GetNodeGroup(nodegroup)
1142

  
1143
        nodegroup_nodes = ndgrp.members
1144
        nodegroup_name = ndgrp.name
1145
        node_statuses = []
1146

  
1147
        for node in nodegroup_nodes:
1148
          if node in valid_nodes:
1149
            if es_data[node] != []:
1150
              node_status = es_data[node][0][1]
1151
              node_statuses.append(node_status)
1152
            else:
1153
              node_statuses.append(False)
1154

  
1155
        if False in node_statuses:
1156
          ndgrp_data[nodegroup_name] = False
1157
        else:
1158
          ndgrp_data[nodegroup_name] = True
1159

  
1160
      # Compute the provider's parameters
1161
      parameters = set()
1162
      for idx, esl in enumerate(es_data.values()):
1163
        valid = bool(esl and esl[0][1])
1164
        if not valid:
1165
          break
1166

  
1167
        node_params = esl[0][3]
1168
        if idx == 0:
1169
          # First entry
1170
          parameters.update(node_params)
1171
        else:
1172
          # Filter out inconsistent values
1173
          parameters.intersection_update(node_params)
1174

  
1175
      params = list(parameters)
1176

  
1177
      # Now fill all the info for this provider
1178
      info = query.ExtStorageInfo(name=es_name, node_status=es_data,
1179
                                  nodegroup_status=ndgrp_data,
1180
                                  parameters=params)
1181

  
1182
      data[es_name] = info
1183

  
1184
    # Prepare data in requested order
1185
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1186
            if name in data]
1187

  
1188

  
1189
class LUExtStorageDiagnose(NoHooksLU):
1190
  """Logical unit for ExtStorage diagnose/query.
1191

  
1192
  """
1193
  REQ_BGL = False
1194

  
1195
  def CheckArguments(self):
1196
    self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
1197
                               self.op.output_fields, False)
1198

  
1199
  def ExpandNames(self):
1200
    self.eq.ExpandNames(self)
1201

  
1202
  def Exec(self, feedback_fn):
1203
    return self.eq.OldStyleQuery(self)
1204

  
1205

  
1206
class _InstanceQuery(_QueryBase):
1207
  FIELDS = query.INSTANCE_FIELDS
1208

  
1209
  def ExpandNames(self, lu):
1210
    lu.needed_locks = {}
1211
    lu.share_locks = _ShareAll()
1212

  
1213
    if self.names:
1214
      self.wanted = _GetWantedInstances(lu, self.names)
1215
    else:
1216
      self.wanted = locking.ALL_SET
1217

  
1218
    self.do_locking = (self.use_locking and
1219
                       query.IQ_LIVE in self.requested_data)
1220
    if self.do_locking:
1221
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
1222
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
1223
      lu.needed_locks[locking.LEVEL_NODE] = []
1224
      lu.needed_locks[locking.LEVEL_NETWORK] = []
1225
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1226

  
1227
    self.do_grouplocks = (self.do_locking and
1228
                          query.IQ_NODES in self.requested_data)
1229

  
1230
  def DeclareLocks(self, lu, level):
1231
    if self.do_locking:
1232
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
1233
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
1234

  
1235
        # Lock all groups used by instances optimistically; this requires going
1236
        # via the node before it's locked, requiring verification later on
1237
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
1238
          set(group_uuid
1239
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
1240
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
1241
      elif level == locking.LEVEL_NODE:
1242
        lu._LockInstancesNodes() # pylint: disable=W0212
1243

  
1244
      elif level == locking.LEVEL_NETWORK:
1245
        lu.needed_locks[locking.LEVEL_NETWORK] = \
1246
          frozenset(net_uuid
1247
                    for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
1248
                    for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
1249

  
1250
  @staticmethod
1251
  def _CheckGroupLocks(lu):
1252
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
1253
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
1254

  
1255
    # Check if node groups for locked instances are still correct
1256
    for instance_name in owned_instances:
1257
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
1258

  
1259
  def _GetQueryData(self, lu):
1260
    """Computes the list of instances and their attributes.
1261

  
1262
    """
1263
    if self.do_grouplocks:
1264
      self._CheckGroupLocks(lu)
1265

  
1266
    cluster = lu.cfg.GetClusterInfo()
1267
    all_info = lu.cfg.GetAllInstancesInfo()
1268

  
1269
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
1270

  
1271
    instance_list = [all_info[name] for name in instance_names]
1272
    nodes = frozenset(itertools.chain(*(inst.all_nodes
1273
                                        for inst in instance_list)))
1274
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
1275
    bad_nodes = []
1276
    offline_nodes = []
1277
    wrongnode_inst = set()
1278

  
1279
    # Gather data as requested
1280
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
1281
      live_data = {}
1282
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
1283
      for name in nodes:
1284
        result = node_data[name]
1285
        if result.offline:
1286
          # offline nodes will be in both lists
1287
          assert result.fail_msg
1288
          offline_nodes.append(name)
1289
        if result.fail_msg:
1290
          bad_nodes.append(name)
1291
        elif result.payload:
1292
          for inst in result.payload:
1293
            if inst in all_info:
1294
              if all_info[inst].primary_node == name:
1295
                live_data.update(result.payload)
1296
              else:
1297
                wrongnode_inst.add(inst)
1298
            else:
1299
              # orphan instance; we don't list it here as we don't
1300
              # handle this case yet in the output of instance listing
1301
              logging.warning("Orphan instance '%s' found on node %s",
1302
                              inst, name)
1303
        # else no instance is alive
1304
    else:
1305
      live_data = {}
1306

  
1307
    if query.IQ_DISKUSAGE in self.requested_data:
1308
      gmi = ganeti.masterd.instance
1309
      disk_usage = dict((inst.name,
1310
                         gmi.ComputeDiskSize(inst.disk_template,
1311
                                             [{constants.IDISK_SIZE: disk.size}
1312
                                              for disk in inst.disks]))
1313
                        for inst in instance_list)
1314
    else:
1315
      disk_usage = None
1316

  
1317
    if query.IQ_CONSOLE in self.requested_data:
1318
      consinfo = {}
1319
      for inst in instance_list:
1320
        if inst.name in live_data:
1321
          # Instance is running
1322
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
1323
        else:
1324
          consinfo[inst.name] = None
1325
      assert set(consinfo.keys()) == set(instance_names)
1326
    else:
1327
      consinfo = None
1328

  
1329
    if query.IQ_NODES in self.requested_data:
1330
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
1331
                                            instance_list)))
1332
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
1333
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
1334
                    for uuid in set(map(operator.attrgetter("group"),
1335
                                        nodes.values())))
1336
    else:
1337
      nodes = None
1338
      groups = None
1339

  
1340
    if query.IQ_NETWORKS in self.requested_data:
1341
      net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
1342
                                    for i in instance_list))
1343
      networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
1344
    else:
1345
      networks = None
1346

  
1347
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
1348
                                   disk_usage, offline_nodes, bad_nodes,
1349
                                   live_data, wrongnode_inst, consinfo,
1350
                                   nodes, groups, networks)
1351

  
1352

  
1353
class LUQuery(NoHooksLU):
1354
  """Query for resources/items of a certain kind.
1355

  
1356
  """
1357
  # pylint: disable=W0142
1358
  REQ_BGL = False
1359

  
1360
  def CheckArguments(self):
1361
    qcls = _GetQueryImplementation(self.op.what)
1362

  
1363
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
1364

  
1365
  def ExpandNames(self):
1366
    self.impl.ExpandNames(self)
1367

  
1368
  def DeclareLocks(self, level):
1369
    self.impl.DeclareLocks(self, level)
1370

  
1371
  def Exec(self, feedback_fn):
1372
    return self.impl.NewStyleQuery(self)
1373

  
1374

  
1375
class LUQueryFields(NoHooksLU):
1376
  """Query for resources/items of a certain kind.
1377

  
1378
  """
1379
  # pylint: disable=W0142
1380
  REQ_BGL = False
1381

  
1382
  def CheckArguments(self):
1383
    self.qcls = _GetQueryImplementation(self.op.what)
1384

  
1385
  def ExpandNames(self):
1386
    self.needed_locks = {}
1387

  
1388
  def Exec(self, feedback_fn):
1389
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
1390

  
1391

  
1392
class LUInstanceActivateDisks(NoHooksLU):
1393
  """Bring up an instance's disks.
1394

  
1395
  """
1396
  REQ_BGL = False
1397

  
1398
  def ExpandNames(self):
1399
    self._ExpandAndLockInstance()
1400
    self.needed_locks[locking.LEVEL_NODE] = []
1401
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1402

  
1403
  def DeclareLocks(self, level):
1404
    if level == locking.LEVEL_NODE:
1405
      self._LockInstancesNodes()
1406

  
1407
  def CheckPrereq(self):
1408
    """Check prerequisites.
1409

  
1410
    This checks that the instance is in the cluster.
1411

  
1412
    """
1413
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
1414
    assert self.instance is not None, \
1415
      "Cannot retrieve locked instance %s" % self.op.instance_name
1416
    _CheckNodeOnline(self, self.instance.primary_node)
1417

  
1418
  def Exec(self, feedback_fn):
1419
    """Activate the disks.
1420

  
1421
    """
1422
    disks_ok, disks_info = \
1423
              _AssembleInstanceDisks(self, self.instance,
1424
                                     ignore_size=self.op.ignore_size)
1425
    if not disks_ok:
1426
      raise errors.OpExecError("Cannot activate block devices")
1427

  
1428
    if self.op.wait_for_sync:
1429
      if not _WaitForSync(self, self.instance):
1430
        raise errors.OpExecError("Some disks of the instance are degraded!")
1431

  
1432
    return disks_info
1433

  
1434

  
1435
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
1436
                           ignore_size=False):
1437
  """Prepare the block devices for an instance.
1438

  
1439
  This sets up the block devices on all nodes.
1440

  
1441
  @type lu: L{LogicalUnit}
1442
  @param lu: the logical unit on whose behalf we execute
1443
  @type instance: L{objects.Instance}
1444
  @param instance: the instance for whose disks we assemble
1445
  @type disks: list of L{objects.Disk} or None
1446
  @param disks: which disks to assemble (or all, if None)
1447
  @type ignore_secondaries: boolean
1448
  @param ignore_secondaries: if true, errors on secondary nodes
1449
      won't result in an error return from the function
1450
  @type ignore_size: boolean
1451
  @param ignore_size: if true, the current known size of the disk
1452
      will not be used during the disk activation, useful for cases
1453
      when the size is wrong
1454
  @return: False if the operation failed, otherwise a list of
1455
      (host, instance_visible_name, node_visible_name)
1456
      with the mapping from node devices to instance devices
1457

  
1458
  """
1459
  device_info = []
1460
  disks_ok = True
1461
  iname = instance.name
1462
  disks = _ExpandCheckDisks(instance, disks)
1463

  
1464
  # With the two passes mechanism we try to reduce the window of
1465
  # opportunity for the race condition of switching DRBD to primary
1466
  # before handshaking occured, but we do not eliminate it
1467

  
1468
  # The proper fix would be to wait (with some limits) until the
1469
  # connection has been made and drbd transitions from WFConnection
1470
  # into any other network-connected state (Connected, SyncTarget,
1471
  # SyncSource, etc.)
1472

  
1473
  # 1st pass, assemble on all nodes in secondary mode
1474
  for idx, inst_disk in enumerate(disks):
1475
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1476
      if ignore_size:
1477
        node_disk = node_disk.Copy()
1478
        node_disk.UnsetSize()
1479
      lu.cfg.SetDiskID(node_disk, node)
1480
      result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
1481
                                             False, idx)
1482
      msg = result.fail_msg
1483
      if msg:
1484
        is_offline_secondary = (node in instance.secondary_nodes and
1485
                                result.offline)
1486
        lu.LogWarning("Could not prepare block device %s on node %s"
1487
                      " (is_primary=False, pass=1): %s",
1488
                      inst_disk.iv_name, node, msg)
1489
        if not (ignore_secondaries or is_offline_secondary):
1490
          disks_ok = False
1491

  
1492
  # FIXME: race condition on drbd migration to primary
1493

  
1494
  # 2nd pass, do only the primary node
1495
  for idx, inst_disk in enumerate(disks):
1496
    dev_path = None
1497

  
1498
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1499
      if node != instance.primary_node:
1500
        continue
1501
      if ignore_size:
1502
        node_disk = node_disk.Copy()
1503
        node_disk.UnsetSize()
1504
      lu.cfg.SetDiskID(node_disk, node)
1505
      result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
1506
                                             True, idx)
1507
      msg = result.fail_msg
1508
      if msg:
1509
        lu.LogWarning("Could not prepare block device %s on node %s"
1510
                      " (is_primary=True, pass=2): %s",
1511
                      inst_disk.iv_name, node, msg)
1512
        disks_ok = False
1513
      else:
1514
        dev_path = result.payload
1515

  
1516
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
1517

  
1518
  # leave the disks configured for the primary node
1519
  # this is a workaround that would be fixed better by
1520
  # improving the logical/physical id handling
1521
  for disk in disks:
1522
    lu.cfg.SetDiskID(disk, instance.primary_node)
1523

  
1524
  return disks_ok, device_info
1525

  
1526

  
1527
def _StartInstanceDisks(lu, instance, force):
1528
  """Start the disks of an instance.
1529

  
1530
  """
1531
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
1532
                                           ignore_secondaries=force)
1533
  if not disks_ok:
1534
    _ShutdownInstanceDisks(lu, instance)
1535
    if force is not None and not force:
1536
      lu.LogWarning("",
1537
                    hint=("If the message above refers to a secondary node,"
1538
                          " you can retry the operation using '--force'"))
1539
    raise errors.OpExecError("Disk consistency error")
1540

  
1541

  
1542
class LUInstanceDeactivateDisks(NoHooksLU):
1543
  """Shutdown an instance's disks.
1544

  
1545
  """
1546
  REQ_BGL = False
1547

  
1548
  def ExpandNames(self):
1549
    self._ExpandAndLockInstance()
1550
    self.needed_locks[locking.LEVEL_NODE] = []
1551
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1552

  
1553
  def DeclareLocks(self, level):
1554
    if level == locking.LEVEL_NODE:
1555
      self._LockInstancesNodes()
1556

  
1557
  def CheckPrereq(self):
1558
    """Check prerequisites.
1559

  
1560
    This checks that the instance is in the cluster.
1561

  
1562
    """
1563
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
1564
    assert self.instance is not None, \
1565
      "Cannot retrieve locked instance %s" % self.op.instance_name
1566

  
1567
  def Exec(self, feedback_fn):
1568
    """Deactivate the disks
1569

  
1570
    """
1571
    instance = self.instance
1572
    if self.op.force:
1573
      _ShutdownInstanceDisks(self, instance)
1574
    else:
1575
      _SafeShutdownInstanceDisks(self, instance)
1576

  
1577

  
1578
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
1579
  """Shutdown block devices of an instance.
1580

  
1581
  This function checks if an instance is running, before calling
1582
  _ShutdownInstanceDisks.
1583

  
1584
  """
1585
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
1586
  _ShutdownInstanceDisks(lu, instance, disks=disks)
1587

  
1588

  
1589
def _ExpandCheckDisks(instance, disks):
1590
  """Return the instance disks selected by the disks list
1591

  
1592
  @type disks: list of L{objects.Disk} or None
1593
  @param disks: selected disks
1594
  @rtype: list of L{objects.Disk}
1595
  @return: selected instance disks to act on
1596

  
1597
  """
1598
  if disks is None:
1599
    return instance.disks
1600
  else:
1601
    if not set(disks).issubset(instance.disks):
1602
      raise errors.ProgrammerError("Can only act on disks belonging to the"
1603
                                   " target instance")
1604
    return disks
1605

  
1606

  
1607
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
1608
  """Shutdown block devices of an instance.
1609

  
1610
  This does the shutdown on all nodes of the instance.
1611

  
1612
  If the ignore_primary is false, errors on the primary node are
1613
  ignored.
1614

  
1615
  """
1616
  all_result = True
1617
  disks = _ExpandCheckDisks(instance, disks)
1618

  
1619
  for disk in disks:
1620
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1621
      lu.cfg.SetDiskID(top_disk, node)
1622
      result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
1623
      msg = result.fail_msg
1624
      if msg:
1625
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
1626
                      disk.iv_name, node, msg)
1627
        if ((node == instance.primary_node and not ignore_primary) or
1628
            (node != instance.primary_node and not result.offline)):
1629
          all_result = False
1630
  return all_result
1631

  
1632

  
1633
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
1634
  """Checks if a node has enough free memory.
1635

  
1636
  This function checks if a given node has the needed amount of free
1637
  memory. In case the node has less memory or we cannot get the
1638
  information from the node, this function raises an OpPrereqError
1639
  exception.
1640

  
1641
  @type lu: C{LogicalUnit}
1642
  @param lu: a logical unit from which we get configuration data
1643
  @type node: C{str}
1644
  @param node: the node to check
1645
  @type reason: C{str}
1646
  @param reason: string to use in the error message
1647
  @type requested: C{int}
1648
  @param requested: the amount of memory in MiB to check for
1649
  @type hypervisor_name: C{str}
1650
  @param hypervisor_name: the hypervisor to ask for memory stats
1651
  @rtype: integer
1652
  @return: node current free memory
1653
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
1654
      we cannot check the node
1655

  
1656
  """
1657
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
1658
  nodeinfo[node].Raise("Can't get data from node %s" % node,
1659
                       prereq=True, ecode=errors.ECODE_ENVIRON)
1660
  (_, _, (hv_info, )) = nodeinfo[node].payload
1661

  
1662
  free_mem = hv_info.get("memory_free", None)
1663
  if not isinstance(free_mem, int):
1664
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
1665
                               " was '%s'" % (node, free_mem),
1666
                               errors.ECODE_ENVIRON)
1667
  if requested > free_mem:
1668
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
1669
                               " needed %s MiB, available %s MiB" %
1670
                               (node, reason, requested, free_mem),
1671
                               errors.ECODE_NORES)
1672
  return free_mem
1673

  
1674

  
1675
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
1676
  """Checks if nodes have enough free disk space in all the VGs.
1677

  
1678
  This function checks if all given nodes have the needed amount of
1679
  free disk. In case any node has less disk or we cannot get the
1680
  information from the node, this function raises an OpPrereqError
1681
  exception.
1682

  
1683
  @type lu: C{LogicalUnit}
1684
  @param lu: a logical unit from which we get configuration data
1685
  @type nodenames: C{list}
1686
  @param nodenames: the list of node names to check
1687
  @type req_sizes: C{dict}
1688
  @param req_sizes: the hash of vg and corresponding amount of disk in
1689
      MiB to check for
1690
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
1691
      or we cannot check the node
1692

  
1693
  """
1694
  for vg, req_size in req_sizes.items():
1695
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
1696

  
1697

  
1698
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
1699
  """Checks if nodes have enough free disk space in the specified VG.
1700

  
1701
  This function checks if all given nodes have the needed amount of
1702
  free disk. In case any node has less disk or we cannot get the
1703
  information from the node, this function raises an OpPrereqError
1704
  exception.
1705

  
1706
  @type lu: C{LogicalUnit}
1707
  @param lu: a logical unit from which we get configuration data
1708
  @type nodenames: C{list}
1709
  @param nodenames: the list of node names to check
1710
  @type vg: C{str}
1711
  @param vg: the volume group to check
1712
  @type requested: C{int}
1713
  @param requested: the amount of disk in MiB to check for
1714
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
1715
      or we cannot check the node
1716

  
1717
  """
1718
  es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
1719
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
1720
  for node in nodenames:
1721
    info = nodeinfo[node]
1722
    info.Raise("Cannot get current information from node %s" % node,
1723
               prereq=True, ecode=errors.ECODE_ENVIRON)
1724
    (_, (vg_info, ), _) = info.payload
1725
    vg_free = vg_info.get("vg_free", None)
1726
    if not isinstance(vg_free, int):
1727
      raise errors.OpPrereqError("Can't compute free disk space on node"
1728
                                 " %s for vg %s, result was '%s'" %
1729
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
1730
    if requested > vg_free:
1731
      raise errors.OpPrereqError("Not enough disk space on target node %s"
1732
                                 " vg %s: required %d MiB, available %d MiB" %
1733
                                 (node, vg, requested, vg_free),
1734
                                 errors.ECODE_NORES)
1735

  
1736

  
1737
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
1738
  """Checks if nodes have enough physical CPUs
1739

  
1740
  This function checks if all given nodes have the needed number of
1741
  physical CPUs. In case any node has less CPUs or we cannot get the
1742
  information from the node, this function raises an OpPrereqError
1743
  exception.
1744

  
1745
  @type lu: C{LogicalUnit}
1746
  @param lu: a logical unit from which we get configuration data
1747
  @type nodenames: C{list}
1748
  @param nodenames: the list of node names to check
1749
  @type requested: C{int}
1750
  @param requested: the minimum acceptable number of physical CPUs
1751
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
1752
      or we cannot check the node
1753

  
1754
  """
1755
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
1756
  for node in nodenames:
1757
    info = nodeinfo[node]
1758
    info.Raise("Cannot get current information from node %s" % node,
1759
               prereq=True, ecode=errors.ECODE_ENVIRON)
1760
    (_, _, (hv_info, )) = info.payload
1761
    num_cpus = hv_info.get("cpu_total", None)
1762
    if not isinstance(num_cpus, int):
1763
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
1764
                                 " on node %s, result was '%s'" %
1765
                                 (node, num_cpus), errors.ECODE_ENVIRON)
1766
    if requested > num_cpus:
1767
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
1768
                                 "required" % (node, num_cpus, requested),
1769
                                 errors.ECODE_NORES)
1770

  
1771

  
1772
class LUInstanceStartup(LogicalUnit):
1773
  """Starts an instance.
1774

  
1775
  """
1776
  HPATH = "instance-start"
1777
  HTYPE = constants.HTYPE_INSTANCE
1778
  REQ_BGL = False
1779

  
1780
  def CheckArguments(self):
1781
    # extra beparams
1782
    if self.op.beparams:
1783
      # fill the beparams dict
1784
      objects.UpgradeBeParams(self.op.beparams)
1785
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1786

  
1787
  def ExpandNames(self):
1788
    self._ExpandAndLockInstance()
1789
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
1790

  
1791
  def DeclareLocks(self, level):
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff