Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 29d376ec

History | View | Annotate | Download (81 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34
import logging
35
import tempfile
36
import zlib
37
import base64
38

    
39
from ganeti import errors
40
from ganeti import utils
41
from ganeti import ssh
42
from ganeti import hypervisor
43
from ganeti import constants
44
from ganeti import bdev
45
from ganeti import objects
46
from ganeti import ssconf
47

    
48

    
49
class RPCFail(Exception):
50
  """Class denoting RPC failure.
51

52
  Its argument is the error message.
53

54
  """
55

    
56
def _Fail(msg, *args, **kwargs):
57
  """Log an error and the raise an RPCFail exception.
58

59
  This exception is then handled specially in the ganeti daemon and
60
  turned into a 'failed' return type. As such, this function is a
61
  useful shortcut for logging the error and returning it to the master
62
  daemon.
63

64
  @type msg: string
65
  @param msg: the text of the exception
66
  @raise RPCFail
67

68
  """
69
  if args:
70
    msg = msg % args
71
  if "exc" in kwargs and kwargs["exc"]:
72
    logging.exception(msg)
73
  else:
74
    logging.error(msg)
75
  raise RPCFail(msg)
76

    
77

    
78
def _GetConfig():
79
  """Simple wrapper to return a SimpleStore.
80

81
  @rtype: L{ssconf.SimpleStore}
82
  @return: a SimpleStore instance
83

84
  """
85
  return ssconf.SimpleStore()
86

    
87

    
88
def _GetSshRunner(cluster_name):
89
  """Simple wrapper to return an SshRunner.
90

91
  @type cluster_name: str
92
  @param cluster_name: the cluster name, which is needed
93
      by the SshRunner constructor
94
  @rtype: L{ssh.SshRunner}
95
  @return: an SshRunner instance
96

97
  """
98
  return ssh.SshRunner(cluster_name)
99

    
100

    
101
def _Decompress(data):
102
  """Unpacks data compressed by the RPC client.
103

104
  @type data: list or tuple
105
  @param data: Data sent by RPC client
106
  @rtype: str
107
  @return: Decompressed data
108

109
  """
110
  assert isinstance(data, (list, tuple))
111
  assert len(data) == 2
112
  (encoding, content) = data
113
  if encoding == constants.RPC_ENCODING_NONE:
114
    return content
115
  elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
116
    return zlib.decompress(base64.b64decode(content))
117
  else:
118
    raise AssertionError("Unknown data encoding")
119

    
120

    
121
def _CleanDirectory(path, exclude=None):
122
  """Removes all regular files in a directory.
123

124
  @type path: str
125
  @param path: the directory to clean
126
  @type exclude: list
127
  @param exclude: list of files to be excluded, defaults
128
      to the empty list
129

130
  """
131
  if not os.path.isdir(path):
132
    return
133
  if exclude is None:
134
    exclude = []
135
  else:
136
    # Normalize excluded paths
137
    exclude = [os.path.normpath(i) for i in exclude]
138

    
139
  for rel_name in utils.ListVisibleFiles(path):
140
    full_name = os.path.normpath(os.path.join(path, rel_name))
141
    if full_name in exclude:
142
      continue
143
    if os.path.isfile(full_name) and not os.path.islink(full_name):
144
      utils.RemoveFile(full_name)
145

    
146

    
147
def JobQueuePurge():
148
  """Removes job queue files and archived jobs.
149

150
  @rtype: None
151

152
  """
153
  _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE])
154
  _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR)
155

    
156

    
157
def GetMasterInfo():
158
  """Returns master information.
159

160
  This is an utility function to compute master information, either
161
  for consumption here or from the node daemon.
162

163
  @rtype: tuple
164
  @return: (master_netdev, master_ip, master_name) if we have a good
165
      configuration, otherwise (None, None, None)
166

167
  """
168
  try:
169
    cfg = _GetConfig()
170
    master_netdev = cfg.GetMasterNetdev()
171
    master_ip = cfg.GetMasterIP()
172
    master_node = cfg.GetMasterNode()
173
  except errors.ConfigurationError, err:
174
    logging.exception("Cluster configuration incomplete")
175
    return (None, None, None)
176
  return (master_netdev, master_ip, master_node)
177

    
178

    
179
def StartMaster(start_daemons):
180
  """Activate local node as master node.
181

182
  The function will always try activate the IP address of the master
183
  (unless someone else has it). It will also start the master daemons,
184
  based on the start_daemons parameter.
185

186
  @type start_daemons: boolean
187
  @param start_daemons: whther to also start the master
188
      daemons (ganeti-masterd and ganeti-rapi)
189
  @rtype: None
190

191
  """
192
  ok = True
193
  master_netdev, master_ip, _ = GetMasterInfo()
194
  if not master_netdev:
195
    return False
196

    
197
  if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
198
    if utils.OwnIpAddress(master_ip):
199
      # we already have the ip:
200
      logging.debug("Already started")
201
    else:
202
      logging.error("Someone else has the master ip, not activating")
203
      ok = False
204
  else:
205
    result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
206
                           "dev", master_netdev, "label",
207
                           "%s:0" % master_netdev])
208
    if result.failed:
209
      logging.error("Can't activate master IP: %s", result.output)
210
      ok = False
211

    
212
    result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
213
                           "-s", master_ip, master_ip])
214
    # we'll ignore the exit code of arping
215

    
216
  # and now start the master and rapi daemons
217
  if start_daemons:
218
    for daemon in 'ganeti-masterd', 'ganeti-rapi':
219
      result = utils.RunCmd([daemon])
220
      if result.failed:
221
        logging.error("Can't start daemon %s: %s", daemon, result.output)
222
        ok = False
223
  return ok
224

    
225

    
226
def StopMaster(stop_daemons):
227
  """Deactivate this node as master.
228

229
  The function will always try to deactivate the IP address of the
230
  master. It will also stop the master daemons depending on the
231
  stop_daemons parameter.
232

233
  @type stop_daemons: boolean
234
  @param stop_daemons: whether to also stop the master daemons
235
      (ganeti-masterd and ganeti-rapi)
236
  @rtype: None
237

238
  """
239
  master_netdev, master_ip, _ = GetMasterInfo()
240
  if not master_netdev:
241
    return False
242

    
243
  result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
244
                         "dev", master_netdev])
245
  if result.failed:
246
    logging.error("Can't remove the master IP, error: %s", result.output)
247
    # but otherwise ignore the failure
248

    
249
  if stop_daemons:
250
    # stop/kill the rapi and the master daemon
251
    for daemon in constants.RAPI_PID, constants.MASTERD_PID:
252
      utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
253

    
254
  return True
255

    
256

    
257
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
258
  """Joins this node to the cluster.
259

260
  This does the following:
261
      - updates the hostkeys of the machine (rsa and dsa)
262
      - adds the ssh private key to the user
263
      - adds the ssh public key to the users' authorized_keys file
264

265
  @type dsa: str
266
  @param dsa: the DSA private key to write
267
  @type dsapub: str
268
  @param dsapub: the DSA public key to write
269
  @type rsa: str
270
  @param rsa: the RSA private key to write
271
  @type rsapub: str
272
  @param rsapub: the RSA public key to write
273
  @type sshkey: str
274
  @param sshkey: the SSH private key to write
275
  @type sshpub: str
276
  @param sshpub: the SSH public key to write
277
  @rtype: boolean
278
  @return: the success of the operation
279

280
  """
281
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
282
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
283
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
284
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
285
  for name, content, mode in sshd_keys:
286
    utils.WriteFile(name, data=content, mode=mode)
287

    
288
  try:
289
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
290
                                                    mkdir=True)
291
  except errors.OpExecError, err:
292
    _Fail("Error while processing user ssh files: %s", err, exc=True)
293

    
294
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
295
    utils.WriteFile(name, data=content, mode=0600)
296

    
297
  utils.AddAuthorizedKey(auth_keys, sshpub)
298

    
299
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
300

    
301
  return (True, "Node added successfully")
302

    
303

    
304
def LeaveCluster():
305
  """Cleans up and remove the current node.
306

307
  This function cleans up and prepares the current node to be removed
308
  from the cluster.
309

310
  If processing is successful, then it raises an
311
  L{errors.QuitGanetiException} which is used as a special case to
312
  shutdown the node daemon.
313

314
  """
315
  _CleanDirectory(constants.DATA_DIR)
316
  JobQueuePurge()
317

    
318
  try:
319
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
320
  except errors.OpExecError:
321
    logging.exception("Error while processing ssh files")
322
    return
323

    
324
  f = open(pub_key, 'r')
325
  try:
326
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
327
  finally:
328
    f.close()
329

    
330
  utils.RemoveFile(priv_key)
331
  utils.RemoveFile(pub_key)
332

    
333
  # Return a reassuring string to the caller, and quit
334
  raise errors.QuitGanetiException(False, 'Shutdown scheduled')
335

    
336

    
337
def GetNodeInfo(vgname, hypervisor_type):
338
  """Gives back a hash with different informations about the node.
339

340
  @type vgname: C{string}
341
  @param vgname: the name of the volume group to ask for disk space information
342
  @type hypervisor_type: C{str}
343
  @param hypervisor_type: the name of the hypervisor to ask for
344
      memory information
345
  @rtype: C{dict}
346
  @return: dictionary with the following keys:
347
      - vg_size is the size of the configured volume group in MiB
348
      - vg_free is the free size of the volume group in MiB
349
      - memory_dom0 is the memory allocated for domain0 in MiB
350
      - memory_free is the currently available (free) ram in MiB
351
      - memory_total is the total number of ram in MiB
352

353
  """
354
  outputarray = {}
355
  vginfo = _GetVGInfo(vgname)
356
  outputarray['vg_size'] = vginfo['vg_size']
357
  outputarray['vg_free'] = vginfo['vg_free']
358

    
359
  hyper = hypervisor.GetHypervisor(hypervisor_type)
360
  hyp_info = hyper.GetNodeInfo()
361
  if hyp_info is not None:
362
    outputarray.update(hyp_info)
363

    
364
  f = open("/proc/sys/kernel/random/boot_id", 'r')
365
  try:
366
    outputarray["bootid"] = f.read(128).rstrip("\n")
367
  finally:
368
    f.close()
369

    
370
  return outputarray
371

    
372

    
373
def VerifyNode(what, cluster_name):
374
  """Verify the status of the local node.
375

376
  Based on the input L{what} parameter, various checks are done on the
377
  local node.
378

379
  If the I{filelist} key is present, this list of
380
  files is checksummed and the file/checksum pairs are returned.
381

382
  If the I{nodelist} key is present, we check that we have
383
  connectivity via ssh with the target nodes (and check the hostname
384
  report).
385

386
  If the I{node-net-test} key is present, we check that we have
387
  connectivity to the given nodes via both primary IP and, if
388
  applicable, secondary IPs.
389

390
  @type what: C{dict}
391
  @param what: a dictionary of things to check:
392
      - filelist: list of files for which to compute checksums
393
      - nodelist: list of nodes we should check ssh communication with
394
      - node-net-test: list of nodes we should check node daemon port
395
        connectivity with
396
      - hypervisor: list with hypervisors to run the verify for
397
  @rtype: dict
398
  @return: a dictionary with the same keys as the input dict, and
399
      values representing the result of the checks
400

401
  """
402
  result = {}
403

    
404
  if constants.NV_HYPERVISOR in what:
405
    result[constants.NV_HYPERVISOR] = tmp = {}
406
    for hv_name in what[constants.NV_HYPERVISOR]:
407
      tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
408

    
409
  if constants.NV_FILELIST in what:
410
    result[constants.NV_FILELIST] = utils.FingerprintFiles(
411
      what[constants.NV_FILELIST])
412

    
413
  if constants.NV_NODELIST in what:
414
    result[constants.NV_NODELIST] = tmp = {}
415
    random.shuffle(what[constants.NV_NODELIST])
416
    for node in what[constants.NV_NODELIST]:
417
      success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
418
      if not success:
419
        tmp[node] = message
420

    
421
  if constants.NV_NODENETTEST in what:
422
    result[constants.NV_NODENETTEST] = tmp = {}
423
    my_name = utils.HostInfo().name
424
    my_pip = my_sip = None
425
    for name, pip, sip in what[constants.NV_NODENETTEST]:
426
      if name == my_name:
427
        my_pip = pip
428
        my_sip = sip
429
        break
430
    if not my_pip:
431
      tmp[my_name] = ("Can't find my own primary/secondary IP"
432
                      " in the node list")
433
    else:
434
      port = utils.GetNodeDaemonPort()
435
      for name, pip, sip in what[constants.NV_NODENETTEST]:
436
        fail = []
437
        if not utils.TcpPing(pip, port, source=my_pip):
438
          fail.append("primary")
439
        if sip != pip:
440
          if not utils.TcpPing(sip, port, source=my_sip):
441
            fail.append("secondary")
442
        if fail:
443
          tmp[name] = ("failure using the %s interface(s)" %
444
                       " and ".join(fail))
445

    
446
  if constants.NV_LVLIST in what:
447
    result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST])
448

    
449
  if constants.NV_INSTANCELIST in what:
450
    result[constants.NV_INSTANCELIST] = GetInstanceList(
451
      what[constants.NV_INSTANCELIST])
452

    
453
  if constants.NV_VGLIST in what:
454
    result[constants.NV_VGLIST] = ListVolumeGroups()
455

    
456
  if constants.NV_VERSION in what:
457
    result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
458
                                    constants.RELEASE_VERSION)
459

    
460
  if constants.NV_HVINFO in what:
461
    hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
462
    result[constants.NV_HVINFO] = hyper.GetNodeInfo()
463

    
464
  if constants.NV_DRBDLIST in what:
465
    try:
466
      used_minors = bdev.DRBD8.GetUsedDevs().keys()
467
    except errors.BlockDeviceError, err:
468
      logging.warning("Can't get used minors list", exc_info=True)
469
      used_minors = str(err)
470
    result[constants.NV_DRBDLIST] = used_minors
471

    
472
  return result
473

    
474

    
475
def GetVolumeList(vg_name):
476
  """Compute list of logical volumes and their size.
477

478
  @type vg_name: str
479
  @param vg_name: the volume group whose LVs we should list
480
  @rtype: dict
481
  @return:
482
      dictionary of all partions (key) with value being a tuple of
483
      their size (in MiB), inactive and online status::
484

485
        {'test1': ('20.06', True, True)}
486

487
      in case of errors, a string is returned with the error
488
      details.
489

490
  """
491
  lvs = {}
492
  sep = '|'
493
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
494
                         "--separator=%s" % sep,
495
                         "-olv_name,lv_size,lv_attr", vg_name])
496
  if result.failed:
497
    _Fail("Failed to list logical volumes, lvs output: %s", result.output)
498

    
499
  valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
500
  for line in result.stdout.splitlines():
501
    line = line.strip()
502
    match = valid_line_re.match(line)
503
    if not match:
504
      logging.error("Invalid line returned from lvs output: '%s'", line)
505
      continue
506
    name, size, attr = match.groups()
507
    inactive = attr[4] == '-'
508
    online = attr[5] == 'o'
509
    lvs[name] = (size, inactive, online)
510

    
511
  return lvs
512

    
513

    
514
def ListVolumeGroups():
515
  """List the volume groups and their size.
516

517
  @rtype: dict
518
  @return: dictionary with keys volume name and values the
519
      size of the volume
520

521
  """
522
  return utils.ListVolumeGroups()
523

    
524

    
525
def NodeVolumes():
526
  """List all volumes on this node.
527

528
  @rtype: list
529
  @return:
530
    A list of dictionaries, each having four keys:
531
      - name: the logical volume name,
532
      - size: the size of the logical volume
533
      - dev: the physical device on which the LV lives
534
      - vg: the volume group to which it belongs
535

536
    In case of errors, we return an empty list and log the
537
    error.
538

539
    Note that since a logical volume can live on multiple physical
540
    volumes, the resulting list might include a logical volume
541
    multiple times.
542

543
  """
544
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
545
                         "--separator=|",
546
                         "--options=lv_name,lv_size,devices,vg_name"])
547
  if result.failed:
548
    logging.error("Failed to list logical volumes, lvs output: %s",
549
                  result.output)
550
    return []
551

    
552
  def parse_dev(dev):
553
    if '(' in dev:
554
      return dev.split('(')[0]
555
    else:
556
      return dev
557

    
558
  def map_line(line):
559
    return {
560
      'name': line[0].strip(),
561
      'size': line[1].strip(),
562
      'dev': parse_dev(line[2].strip()),
563
      'vg': line[3].strip(),
564
    }
565

    
566
  return [map_line(line.split('|')) for line in result.stdout.splitlines()
567
          if line.count('|') >= 3]
568

    
569

    
570
def BridgesExist(bridges_list):
571
  """Check if a list of bridges exist on the current node.
572

573
  @rtype: boolean
574
  @return: C{True} if all of them exist, C{False} otherwise
575

576
  """
577
  for bridge in bridges_list:
578
    if not utils.BridgeExists(bridge):
579
      return False
580

    
581
  return True
582

    
583

    
584
def GetInstanceList(hypervisor_list):
585
  """Provides a list of instances.
586

587
  @type hypervisor_list: list
588
  @param hypervisor_list: the list of hypervisors to query information
589

590
  @rtype: list
591
  @return: a list of all running instances on the current node
592
    - instance1.example.com
593
    - instance2.example.com
594

595
  """
596
  results = []
597
  for hname in hypervisor_list:
598
    try:
599
      names = hypervisor.GetHypervisor(hname).ListInstances()
600
      results.extend(names)
601
    except errors.HypervisorError, err:
602
      logging.exception("Error enumerating instances for hypevisor %s", hname)
603
      raise
604

    
605
  return results
606

    
607

    
608
def GetInstanceInfo(instance, hname):
609
  """Gives back the informations about an instance as a dictionary.
610

611
  @type instance: string
612
  @param instance: the instance name
613
  @type hname: string
614
  @param hname: the hypervisor type of the instance
615

616
  @rtype: dict
617
  @return: dictionary with the following keys:
618
      - memory: memory size of instance (int)
619
      - state: xen state of instance (string)
620
      - time: cpu time of instance (float)
621

622
  """
623
  output = {}
624

    
625
  iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
626
  if iinfo is not None:
627
    output['memory'] = iinfo[2]
628
    output['state'] = iinfo[4]
629
    output['time'] = iinfo[5]
630

    
631
  return output
632

    
633

    
634
def GetInstanceMigratable(instance):
635
  """Gives whether an instance can be migrated.
636

637
  @type instance: L{objects.Instance}
638
  @param instance: object representing the instance to be checked.
639

640
  @rtype: tuple
641
  @return: tuple of (result, description) where:
642
      - result: whether the instance can be migrated or not
643
      - description: a description of the issue, if relevant
644

645
  """
646
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
647
  if instance.name not in hyper.ListInstances():
648
    return (False, 'not running')
649

    
650
  for idx in range(len(instance.disks)):
651
    link_name = _GetBlockDevSymlinkPath(instance.name, idx)
652
    if not os.path.islink(link_name):
653
      return (False, 'not restarted since ganeti 1.2.5')
654

    
655
  return (True, '')
656

    
657

    
658
def GetAllInstancesInfo(hypervisor_list):
659
  """Gather data about all instances.
660

661
  This is the equivalent of L{GetInstanceInfo}, except that it
662
  computes data for all instances at once, thus being faster if one
663
  needs data about more than one instance.
664

665
  @type hypervisor_list: list
666
  @param hypervisor_list: list of hypervisors to query for instance data
667

668
  @rtype: dict
669
  @return: dictionary of instance: data, with data having the following keys:
670
      - memory: memory size of instance (int)
671
      - state: xen state of instance (string)
672
      - time: cpu time of instance (float)
673
      - vcpus: the number of vcpus
674

675
  """
676
  output = {}
677

    
678
  for hname in hypervisor_list:
679
    iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
680
    if iinfo:
681
      for name, inst_id, memory, vcpus, state, times in iinfo:
682
        value = {
683
          'memory': memory,
684
          'vcpus': vcpus,
685
          'state': state,
686
          'time': times,
687
          }
688
        if name in output:
689
          # we only check static parameters, like memory and vcpus,
690
          # and not state and time which can change between the
691
          # invocations of the different hypervisors
692
          for key in 'memory', 'vcpus':
693
            if value[key] != output[name][key]:
694
              raise errors.HypervisorError("Instance %s is running twice"
695
                                           " with different parameters" % name)
696
        output[name] = value
697

    
698
  return output
699

    
700

    
701
def InstanceOsAdd(instance, reinstall):
702
  """Add an OS to an instance.
703

704
  @type instance: L{objects.Instance}
705
  @param instance: Instance whose OS is to be installed
706
  @type reinstall: boolean
707
  @param reinstall: whether this is an instance reinstall
708
  @rtype: boolean
709
  @return: the success of the operation
710

711
  """
712
  try:
713
    inst_os = OSFromDisk(instance.os)
714
  except errors.InvalidOS, err:
715
    os_name, os_dir, os_err = err.args
716
    if os_dir is None:
717
      return (False, "Can't find OS '%s': %s" % (os_name, os_err))
718
    else:
719
      return (False, "Error parsing OS '%s' in directory %s: %s" %
720
              (os_name, os_dir, os_err))
721

    
722
  create_env = OSEnvironment(instance)
723
  if reinstall:
724
    create_env['INSTANCE_REINSTALL'] = "1"
725

    
726
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
727
                                     instance.name, int(time.time()))
728

    
729
  result = utils.RunCmd([inst_os.create_script], env=create_env,
730
                        cwd=inst_os.path, output=logfile,)
731
  if result.failed:
732
    logging.error("os create command '%s' returned error: %s, logfile: %s,"
733
                  " output: %s", result.cmd, result.fail_reason, logfile,
734
                  result.output)
735
    lines = [utils.SafeEncode(val)
736
             for val in utils.TailFile(logfile, lines=20)]
737
    return (False, "OS create script failed (%s), last lines in the"
738
            " log file:\n%s" % (result.fail_reason, "\n".join(lines)))
739

    
740
  return (True, "Successfully installed")
741

    
742

    
743
def RunRenameInstance(instance, old_name):
744
  """Run the OS rename script for an instance.
745

746
  @type instance: L{objects.Instance}
747
  @param instance: Instance whose OS is to be installed
748
  @type old_name: string
749
  @param old_name: previous instance name
750
  @rtype: boolean
751
  @return: the success of the operation
752

753
  """
754
  inst_os = OSFromDisk(instance.os)
755

    
756
  rename_env = OSEnvironment(instance)
757
  rename_env['OLD_INSTANCE_NAME'] = old_name
758

    
759
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
760
                                           old_name,
761
                                           instance.name, int(time.time()))
762

    
763
  result = utils.RunCmd([inst_os.rename_script], env=rename_env,
764
                        cwd=inst_os.path, output=logfile)
765

    
766
  if result.failed:
767
    logging.error("os create command '%s' returned error: %s output: %s",
768
                  result.cmd, result.fail_reason, result.output)
769
    lines = [utils.SafeEncode(val)
770
             for val in utils.TailFile(logfile, lines=20)]
771
    return (False, "OS rename script failed (%s), last lines in the"
772
            " log file:\n%s" % (result.fail_reason, "\n".join(lines)))
773

    
774
  return (True, "Rename successful")
775

    
776

    
777
def _GetVGInfo(vg_name):
778
  """Get informations about the volume group.
779

780
  @type vg_name: str
781
  @param vg_name: the volume group which we query
782
  @rtype: dict
783
  @return:
784
    A dictionary with the following keys:
785
      - C{vg_size} is the total size of the volume group in MiB
786
      - C{vg_free} is the free size of the volume group in MiB
787
      - C{pv_count} are the number of physical disks in that VG
788

789
    If an error occurs during gathering of data, we return the same dict
790
    with keys all set to None.
791

792
  """
793
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
794

    
795
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
796
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
797

    
798
  if retval.failed:
799
    logging.error("volume group %s not present", vg_name)
800
    return retdic
801
  valarr = retval.stdout.strip().rstrip(':').split(':')
802
  if len(valarr) == 3:
803
    try:
804
      retdic = {
805
        "vg_size": int(round(float(valarr[0]), 0)),
806
        "vg_free": int(round(float(valarr[1]), 0)),
807
        "pv_count": int(valarr[2]),
808
        }
809
    except ValueError, err:
810
      logging.exception("Fail to parse vgs output")
811
  else:
812
    logging.error("vgs output has the wrong number of fields (expected"
813
                  " three): %s", str(valarr))
814
  return retdic
815

    
816

    
817
def _GetBlockDevSymlinkPath(instance_name, idx):
818
  return os.path.join(constants.DISK_LINKS_DIR,
819
                      "%s:%d" % (instance_name, idx))
820

    
821

    
822
def _SymlinkBlockDev(instance_name, device_path, idx):
823
  """Set up symlinks to a instance's block device.
824

825
  This is an auxiliary function run when an instance is start (on the primary
826
  node) or when an instance is migrated (on the target node).
827

828

829
  @param instance_name: the name of the target instance
830
  @param device_path: path of the physical block device, on the node
831
  @param idx: the disk index
832
  @return: absolute path to the disk's symlink
833

834
  """
835
  link_name = _GetBlockDevSymlinkPath(instance_name, idx)
836
  try:
837
    os.symlink(device_path, link_name)
838
  except OSError, err:
839
    if err.errno == errno.EEXIST:
840
      if (not os.path.islink(link_name) or
841
          os.readlink(link_name) != device_path):
842
        os.remove(link_name)
843
        os.symlink(device_path, link_name)
844
    else:
845
      raise
846

    
847
  return link_name
848

    
849

    
850
def _RemoveBlockDevLinks(instance_name, disks):
851
  """Remove the block device symlinks belonging to the given instance.
852

853
  """
854
  for idx, disk in enumerate(disks):
855
    link_name = _GetBlockDevSymlinkPath(instance_name, idx)
856
    if os.path.islink(link_name):
857
      try:
858
        os.remove(link_name)
859
      except OSError:
860
        logging.exception("Can't remove symlink '%s'", link_name)
861

    
862

    
863
def _GatherAndLinkBlockDevs(instance):
864
  """Set up an instance's block device(s).
865

866
  This is run on the primary node at instance startup. The block
867
  devices must be already assembled.
868

869
  @type instance: L{objects.Instance}
870
  @param instance: the instance whose disks we shoul assemble
871
  @rtype: list
872
  @return: list of (disk_object, device_path)
873

874
  """
875
  block_devices = []
876
  for idx, disk in enumerate(instance.disks):
877
    device = _RecursiveFindBD(disk)
878
    if device is None:
879
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
880
                                    str(disk))
881
    device.Open()
882
    try:
883
      link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
884
    except OSError, e:
885
      raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
886
                                    e.strerror)
887

    
888
    block_devices.append((disk, link_name))
889

    
890
  return block_devices
891

    
892

    
893
def StartInstance(instance):
894
  """Start an instance.
895

896
  @type instance: L{objects.Instance}
897
  @param instance: the instance object
898
  @rtype: boolean
899
  @return: whether the startup was successful or not
900

901
  """
902
  running_instances = GetInstanceList([instance.hypervisor])
903

    
904
  if instance.name in running_instances:
905
    return (True, "Already running")
906

    
907
  try:
908
    block_devices = _GatherAndLinkBlockDevs(instance)
909
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
910
    hyper.StartInstance(instance, block_devices)
911
  except errors.BlockDeviceError, err:
912
    _Fail("Block device error: %s", err, exc=True)
913
  except errors.HypervisorError, err:
914
    _RemoveBlockDevLinks(instance.name, instance.disks)
915
    _Fail("Hypervisor error: %s", err, exc=True)
916

    
917
  return (True, "Instance started successfully")
918

    
919

    
920
def InstanceShutdown(instance):
921
  """Shut an instance down.
922

923
  @note: this functions uses polling with a hardcoded timeout.
924

925
  @type instance: L{objects.Instance}
926
  @param instance: the instance object
927
  @rtype: boolean
928
  @return: whether the startup was successful or not
929

930
  """
931
  hv_name = instance.hypervisor
932
  running_instances = GetInstanceList([hv_name])
933

    
934
  if instance.name not in running_instances:
935
    return (True, "Instance already stopped")
936

    
937
  hyper = hypervisor.GetHypervisor(hv_name)
938
  try:
939
    hyper.StopInstance(instance)
940
  except errors.HypervisorError, err:
941
    _Fail("Failed to stop instance %s: %s", instance.name, err)
942

    
943
  # test every 10secs for 2min
944

    
945
  time.sleep(1)
946
  for dummy in range(11):
947
    if instance.name not in GetInstanceList([hv_name]):
948
      break
949
    time.sleep(10)
950
  else:
951
    # the shutdown did not succeed
952
    logging.error("Shutdown of '%s' unsuccessful, using destroy",
953
                  instance.name)
954

    
955
    try:
956
      hyper.StopInstance(instance, force=True)
957
    except errors.HypervisorError, err:
958
      _Fail("Failed to force stop instance %s: %s", instance.name, err)
959

    
960
    time.sleep(1)
961
    if instance.name in GetInstanceList([hv_name]):
962
      _Fail("Could not shutdown instance %s even by destroy", instance.name)
963

    
964
  _RemoveBlockDevLinks(instance.name, instance.disks)
965

    
966
  return (True, "Instance has been shutdown successfully")
967

    
968

    
969
def InstanceReboot(instance, reboot_type):
970
  """Reboot an instance.
971

972
  @type instance: L{objects.Instance}
973
  @param instance: the instance object to reboot
974
  @type reboot_type: str
975
  @param reboot_type: the type of reboot, one the following
976
    constants:
977
      - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
978
        instance OS, do not recreate the VM
979
      - L{constants.INSTANCE_REBOOT_HARD}: tear down and
980
        restart the VM (at the hypervisor level)
981
      - the other reboot type (L{constants.INSTANCE_REBOOT_HARD})
982
        is not accepted here, since that mode is handled
983
        differently
984
  @rtype: boolean
985
  @return: the success of the operation
986

987
  """
988
  running_instances = GetInstanceList([instance.hypervisor])
989

    
990
  if instance.name not in running_instances:
991
    _Fail("Cannot reboot instance %s that is not running", instance.name)
992

    
993
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
994
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
995
    try:
996
      hyper.RebootInstance(instance)
997
    except errors.HypervisorError, err:
998
      _Fail("Failed to soft reboot instance %s: %s", instance.name, err)
999
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
1000
    try:
1001
      stop_result = InstanceShutdown(instance)
1002
      if not stop_result[0]:
1003
        return stop_result
1004
      return StartInstance(instance)
1005
    except errors.HypervisorError, err:
1006
      _Fail("Failed to hard reboot instance %s: %s", instance.name, err)
1007
  else:
1008
    _Fail("Invalid reboot_type received: %s", reboot_type)
1009

    
1010
  return (True, "Reboot successful")
1011

    
1012

    
1013
def MigrationInfo(instance):
1014
  """Gather information about an instance to be migrated.
1015

1016
  @type instance: L{objects.Instance}
1017
  @param instance: the instance definition
1018

1019
  """
1020
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
1021
  try:
1022
    info = hyper.MigrationInfo(instance)
1023
  except errors.HypervisorError, err:
1024
    _Fail("Failed to fetch migration information: %s", err, exc=True)
1025
  return (True, info)
1026

    
1027

    
1028
def AcceptInstance(instance, info, target):
1029
  """Prepare the node to accept an instance.
1030

1031
  @type instance: L{objects.Instance}
1032
  @param instance: the instance definition
1033
  @type info: string/data (opaque)
1034
  @param info: migration information, from the source node
1035
  @type target: string
1036
  @param target: target host (usually ip), on this node
1037

1038
  """
1039
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
1040
  try:
1041
    hyper.AcceptInstance(instance, info, target)
1042
  except errors.HypervisorError, err:
1043
    _Fail("Failed to accept instance: %s", err, exc=True)
1044
  return (True, "Accept successfull")
1045

    
1046

    
1047
def FinalizeMigration(instance, info, success):
1048
  """Finalize any preparation to accept an instance.
1049

1050
  @type instance: L{objects.Instance}
1051
  @param instance: the instance definition
1052
  @type info: string/data (opaque)
1053
  @param info: migration information, from the source node
1054
  @type success: boolean
1055
  @param success: whether the migration was a success or a failure
1056

1057
  """
1058
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
1059
  try:
1060
    hyper.FinalizeMigration(instance, info, success)
1061
  except errors.HypervisorError, err:
1062
    _Fail("Failed to finalize migration: %s", err, exc=True)
1063
  return (True, "Migration Finalized")
1064

    
1065

    
1066
def MigrateInstance(instance, target, live):
1067
  """Migrates an instance to another node.
1068

1069
  @type instance: L{objects.Instance}
1070
  @param instance: the instance definition
1071
  @type target: string
1072
  @param target: the target node name
1073
  @type live: boolean
1074
  @param live: whether the migration should be done live or not (the
1075
      interpretation of this parameter is left to the hypervisor)
1076
  @rtype: tuple
1077
  @return: a tuple of (success, msg) where:
1078
      - succes is a boolean denoting the success/failure of the operation
1079
      - msg is a string with details in case of failure
1080

1081
  """
1082
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
1083

    
1084
  try:
1085
    hyper.MigrateInstance(instance.name, target, live)
1086
  except errors.HypervisorError, err:
1087
    _Fail("Failed to migrate instance: %s", err, exc=True)
1088
  return (True, "Migration successfull")
1089

    
1090

    
1091
def BlockdevCreate(disk, size, owner, on_primary, info):
1092
  """Creates a block device for an instance.
1093

1094
  @type disk: L{objects.Disk}
1095
  @param disk: the object describing the disk we should create
1096
  @type size: int
1097
  @param size: the size of the physical underlying device, in MiB
1098
  @type owner: str
1099
  @param owner: the name of the instance for which disk is created,
1100
      used for device cache data
1101
  @type on_primary: boolean
1102
  @param on_primary:  indicates if it is the primary node or not
1103
  @type info: string
1104
  @param info: string that will be sent to the physical device
1105
      creation, used for example to set (LVM) tags on LVs
1106

1107
  @return: the new unique_id of the device (this can sometime be
1108
      computed only after creation), or None. On secondary nodes,
1109
      it's not required to return anything.
1110

1111
  """
1112
  clist = []
1113
  if disk.children:
1114
    for child in disk.children:
1115
      try:
1116
        crdev = _RecursiveAssembleBD(child, owner, on_primary)
1117
      except errors.BlockDeviceError, err:
1118
        _Fail("Can't assemble device %s: %s", child, err)
1119
      if on_primary or disk.AssembleOnSecondary():
1120
        # we need the children open in case the device itself has to
1121
        # be assembled
1122
        try:
1123
          crdev.Open()
1124
        except errors.BlockDeviceError, err:
1125
          _Fail("Can't make child '%s' read-write: %s", child, err)
1126
      clist.append(crdev)
1127

    
1128
  try:
1129
    device = bdev.Create(disk.dev_type, disk.physical_id, clist, size)
1130
  except errors.BlockDeviceError, err:
1131
    _Fail("Can't create block device: %s", err)
1132

    
1133
  if on_primary or disk.AssembleOnSecondary():
1134
    try:
1135
      device.Assemble()
1136
    except errors.BlockDeviceError, err:
1137
      _Fail("Can't assemble device after creation, unusual event: %s", err)
1138
    device.SetSyncSpeed(constants.SYNC_SPEED)
1139
    if on_primary or disk.OpenOnSecondary():
1140
      try:
1141
        device.Open(force=True)
1142
      except errors.BlockDeviceError, err:
1143
        _Fail("Can't make device r/w after creation, unusual event: %s", err)
1144
    DevCacheManager.UpdateCache(device.dev_path, owner,
1145
                                on_primary, disk.iv_name)
1146

    
1147
  device.SetInfo(info)
1148

    
1149
  physical_id = device.unique_id
1150
  return True, physical_id
1151

    
1152

    
1153
def BlockdevRemove(disk):
1154
  """Remove a block device.
1155

1156
  @note: This is intended to be called recursively.
1157

1158
  @type disk: L{objects.Disk}
1159
  @param disk: the disk object we should remove
1160
  @rtype: boolean
1161
  @return: the success of the operation
1162

1163
  """
1164
  msgs = []
1165
  result = True
1166
  try:
1167
    rdev = _RecursiveFindBD(disk)
1168
  except errors.BlockDeviceError, err:
1169
    # probably can't attach
1170
    logging.info("Can't attach to device %s in remove", disk)
1171
    rdev = None
1172
  if rdev is not None:
1173
    r_path = rdev.dev_path
1174
    try:
1175
      rdev.Remove()
1176
    except errors.BlockDeviceError, err:
1177
      msgs.append(str(err))
1178
      result = False
1179
    if result:
1180
      DevCacheManager.RemoveCache(r_path)
1181

    
1182
  if disk.children:
1183
    for child in disk.children:
1184
      c_status, c_msg = BlockdevRemove(child)
1185
      result = result and c_status
1186
      if c_msg: # not an empty message
1187
        msgs.append(c_msg)
1188

    
1189
  return (result, "; ".join(msgs))
1190

    
1191

    
1192
def _RecursiveAssembleBD(disk, owner, as_primary):
1193
  """Activate a block device for an instance.
1194

1195
  This is run on the primary and secondary nodes for an instance.
1196

1197
  @note: this function is called recursively.
1198

1199
  @type disk: L{objects.Disk}
1200
  @param disk: the disk we try to assemble
1201
  @type owner: str
1202
  @param owner: the name of the instance which owns the disk
1203
  @type as_primary: boolean
1204
  @param as_primary: if we should make the block device
1205
      read/write
1206

1207
  @return: the assembled device or None (in case no device
1208
      was assembled)
1209
  @raise errors.BlockDeviceError: in case there is an error
1210
      during the activation of the children or the device
1211
      itself
1212

1213
  """
1214
  children = []
1215
  if disk.children:
1216
    mcn = disk.ChildrenNeeded()
1217
    if mcn == -1:
1218
      mcn = 0 # max number of Nones allowed
1219
    else:
1220
      mcn = len(disk.children) - mcn # max number of Nones
1221
    for chld_disk in disk.children:
1222
      try:
1223
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
1224
      except errors.BlockDeviceError, err:
1225
        if children.count(None) >= mcn:
1226
          raise
1227
        cdev = None
1228
        logging.error("Error in child activation (but continuing): %s",
1229
                      str(err))
1230
      children.append(cdev)
1231

    
1232
  if as_primary or disk.AssembleOnSecondary():
1233
    r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children)
1234
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
1235
    result = r_dev
1236
    if as_primary or disk.OpenOnSecondary():
1237
      r_dev.Open()
1238
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
1239
                                as_primary, disk.iv_name)
1240

    
1241
  else:
1242
    result = True
1243
  return result
1244

    
1245

    
1246
def BlockdevAssemble(disk, owner, as_primary):
1247
  """Activate a block device for an instance.
1248

1249
  This is a wrapper over _RecursiveAssembleBD.
1250

1251
  @rtype: str or boolean
1252
  @return: a C{/dev/...} path for primary nodes, and
1253
      C{True} for secondary nodes
1254

1255
  """
1256
  status = True
1257
  result = "no error information"
1258
  try:
1259
    result = _RecursiveAssembleBD(disk, owner, as_primary)
1260
    if isinstance(result, bdev.BlockDev):
1261
      result = result.dev_path
1262
  except errors.BlockDeviceError, err:
1263
    result = "Error while assembling disk: %s" % str(err)
1264
    status = False
1265
  return (status, result)
1266

    
1267

    
1268
def BlockdevShutdown(disk):
1269
  """Shut down a block device.
1270

1271
  First, if the device is assembled (Attach() is successfull), then
1272
  the device is shutdown. Then the children of the device are
1273
  shutdown.
1274

1275
  This function is called recursively. Note that we don't cache the
1276
  children or such, as oppossed to assemble, shutdown of different
1277
  devices doesn't require that the upper device was active.
1278

1279
  @type disk: L{objects.Disk}
1280
  @param disk: the description of the disk we should
1281
      shutdown
1282
  @rtype: boolean
1283
  @return: the success of the operation
1284

1285
  """
1286
  msgs = []
1287
  result = True
1288
  r_dev = _RecursiveFindBD(disk)
1289
  if r_dev is not None:
1290
    r_path = r_dev.dev_path
1291
    try:
1292
      r_dev.Shutdown()
1293
      DevCacheManager.RemoveCache(r_path)
1294
    except errors.BlockDeviceError, err:
1295
      msgs.append(str(err))
1296
      result = False
1297

    
1298
  if disk.children:
1299
    for child in disk.children:
1300
      c_status, c_msg = BlockdevShutdown(child)
1301
      result = result and c_status
1302
      if c_msg: # not an empty message
1303
        msgs.append(c_msg)
1304

    
1305
  return (result, "; ".join(msgs))
1306

    
1307

    
1308
def BlockdevAddchildren(parent_cdev, new_cdevs):
1309
  """Extend a mirrored block device.
1310

1311
  @type parent_cdev: L{objects.Disk}
1312
  @param parent_cdev: the disk to which we should add children
1313
  @type new_cdevs: list of L{objects.Disk}
1314
  @param new_cdevs: the list of children which we should add
1315
  @rtype: boolean
1316
  @return: the success of the operation
1317

1318
  """
1319
  parent_bdev = _RecursiveFindBD(parent_cdev)
1320
  if parent_bdev is None:
1321
    _Fail("Can't find parent device '%s' in add children", parent_cdev)
1322
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
1323
  if new_bdevs.count(None) > 0:
1324
    _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
1325
  parent_bdev.AddChildren(new_bdevs)
1326
  return (True, None)
1327

    
1328

    
1329
def BlockdevRemovechildren(parent_cdev, new_cdevs):
1330
  """Shrink a mirrored block device.
1331

1332
  @type parent_cdev: L{objects.Disk}
1333
  @param parent_cdev: the disk from which we should remove children
1334
  @type new_cdevs: list of L{objects.Disk}
1335
  @param new_cdevs: the list of children which we should remove
1336
  @rtype: boolean
1337
  @return: the success of the operation
1338

1339
  """
1340
  parent_bdev = _RecursiveFindBD(parent_cdev)
1341
  if parent_bdev is None:
1342
    _Fail("Can't find parent device '%s' in remove children", parent_cdev)
1343
  devs = []
1344
  for disk in new_cdevs:
1345
    rpath = disk.StaticDevPath()
1346
    if rpath is None:
1347
      bd = _RecursiveFindBD(disk)
1348
      if bd is None:
1349
        _Fail("Can't find device %s while removing children", disk)
1350
      else:
1351
        devs.append(bd.dev_path)
1352
    else:
1353
      devs.append(rpath)
1354
  parent_bdev.RemoveChildren(devs)
1355
  return (True, None)
1356

    
1357

    
1358
def BlockdevGetmirrorstatus(disks):
1359
  """Get the mirroring status of a list of devices.
1360

1361
  @type disks: list of L{objects.Disk}
1362
  @param disks: the list of disks which we should query
1363
  @rtype: disk
1364
  @return:
1365
      a list of (mirror_done, estimated_time) tuples, which
1366
      are the result of L{bdev.BlockDev.CombinedSyncStatus}
1367
  @raise errors.BlockDeviceError: if any of the disks cannot be
1368
      found
1369

1370
  """
1371
  stats = []
1372
  for dsk in disks:
1373
    rbd = _RecursiveFindBD(dsk)
1374
    if rbd is None:
1375
      _Fail("Can't find device %s", dsk)
1376
    stats.append(rbd.CombinedSyncStatus())
1377
  return True, stats
1378

    
1379

    
1380
def _RecursiveFindBD(disk):
1381
  """Check if a device is activated.
1382

1383
  If so, return informations about the real device.
1384

1385
  @type disk: L{objects.Disk}
1386
  @param disk: the disk object we need to find
1387

1388
  @return: None if the device can't be found,
1389
      otherwise the device instance
1390

1391
  """
1392
  children = []
1393
  if disk.children:
1394
    for chdisk in disk.children:
1395
      children.append(_RecursiveFindBD(chdisk))
1396

    
1397
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
1398

    
1399

    
1400
def BlockdevFind(disk):
1401
  """Check if a device is activated.
1402

1403
  If it is, return informations about the real device.
1404

1405
  @type disk: L{objects.Disk}
1406
  @param disk: the disk to find
1407
  @rtype: None or tuple
1408
  @return: None if the disk cannot be found, otherwise a
1409
      tuple (device_path, major, minor, sync_percent,
1410
      estimated_time, is_degraded)
1411

1412
  """
1413
  try:
1414
    rbd = _RecursiveFindBD(disk)
1415
  except errors.BlockDeviceError, err:
1416
    _Fail("Failed to find device: %s", err, exc=True)
1417
  if rbd is None:
1418
    return (True, None)
1419
  return (True, (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus())
1420

    
1421

    
1422
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
1423
  """Write a file to the filesystem.
1424

1425
  This allows the master to overwrite(!) a file. It will only perform
1426
  the operation if the file belongs to a list of configuration files.
1427

1428
  @type file_name: str
1429
  @param file_name: the target file name
1430
  @type data: str
1431
  @param data: the new contents of the file
1432
  @type mode: int
1433
  @param mode: the mode to give the file (can be None)
1434
  @type uid: int
1435
  @param uid: the owner of the file (can be -1 for default)
1436
  @type gid: int
1437
  @param gid: the group of the file (can be -1 for default)
1438
  @type atime: float
1439
  @param atime: the atime to set on the file (can be None)
1440
  @type mtime: float
1441
  @param mtime: the mtime to set on the file (can be None)
1442
  @rtype: boolean
1443
  @return: the success of the operation; errors are logged
1444
      in the node daemon log
1445

1446
  """
1447
  if not os.path.isabs(file_name):
1448
    _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name)
1449

    
1450
  allowed_files = set([
1451
    constants.CLUSTER_CONF_FILE,
1452
    constants.ETC_HOSTS,
1453
    constants.SSH_KNOWN_HOSTS_FILE,
1454
    constants.VNC_PASSWORD_FILE,
1455
    constants.RAPI_CERT_FILE,
1456
    constants.RAPI_USERS_FILE,
1457
    ])
1458

    
1459
  for hv_name in constants.HYPER_TYPES:
1460
    hv_class = hypervisor.GetHypervisor(hv_name)
1461
    allowed_files.update(hv_class.GetAncillaryFiles())
1462

    
1463
  if file_name not in allowed_files:
1464
    _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'",
1465
          file_name)
1466

    
1467
  raw_data = _Decompress(data)
1468

    
1469
  utils.WriteFile(file_name, data=raw_data, mode=mode, uid=uid, gid=gid,
1470
                  atime=atime, mtime=mtime)
1471
  return (True, "success")
1472

    
1473

    
1474
def WriteSsconfFiles(values):
1475
  """Update all ssconf files.
1476

1477
  Wrapper around the SimpleStore.WriteFiles.
1478

1479
  """
1480
  ssconf.SimpleStore().WriteFiles(values)
1481

    
1482

    
1483
def _ErrnoOrStr(err):
1484
  """Format an EnvironmentError exception.
1485

1486
  If the L{err} argument has an errno attribute, it will be looked up
1487
  and converted into a textual C{E...} description. Otherwise the
1488
  string representation of the error will be returned.
1489

1490
  @type err: L{EnvironmentError}
1491
  @param err: the exception to format
1492

1493
  """
1494
  if hasattr(err, 'errno'):
1495
    detail = errno.errorcode[err.errno]
1496
  else:
1497
    detail = str(err)
1498
  return detail
1499

    
1500

    
1501
def _OSOndiskVersion(name, os_dir):
1502
  """Compute and return the API version of a given OS.
1503

1504
  This function will try to read the API version of the OS given by
1505
  the 'name' parameter and residing in the 'os_dir' directory.
1506

1507
  @type name: str
1508
  @param name: the OS name we should look for
1509
  @type os_dir: str
1510
  @param os_dir: the directory inwhich we should look for the OS
1511
  @rtype: int or None
1512
  @return:
1513
      Either an integer denoting the version or None in the
1514
      case when this is not a valid OS name.
1515
  @raise errors.InvalidOS: if the OS cannot be found
1516

1517
  """
1518
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1519

    
1520
  try:
1521
    st = os.stat(api_file)
1522
  except EnvironmentError, err:
1523
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1524
                           " found (%s)" % _ErrnoOrStr(err))
1525

    
1526
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1527
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1528
                           " a regular file")
1529

    
1530
  try:
1531
    f = open(api_file)
1532
    try:
1533
      api_versions = f.readlines()
1534
    finally:
1535
      f.close()
1536
  except EnvironmentError, err:
1537
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1538
                           " API version (%s)" % _ErrnoOrStr(err))
1539

    
1540
  api_versions = [version.strip() for version in api_versions]
1541
  try:
1542
    api_versions = [int(version) for version in api_versions]
1543
  except (TypeError, ValueError), err:
1544
    raise errors.InvalidOS(name, os_dir,
1545
                           "API version is not integer (%s)" % str(err))
1546

    
1547
  return api_versions
1548

    
1549

    
1550
def DiagnoseOS(top_dirs=None):
1551
  """Compute the validity for all OSes.
1552

1553
  @type top_dirs: list
1554
  @param top_dirs: the list of directories in which to
1555
      search (if not given defaults to
1556
      L{constants.OS_SEARCH_PATH})
1557
  @rtype: list of L{objects.OS}
1558
  @return: an OS object for each name in all the given
1559
      directories
1560

1561
  """
1562
  if top_dirs is None:
1563
    top_dirs = constants.OS_SEARCH_PATH
1564

    
1565
  result = []
1566
  for dir_name in top_dirs:
1567
    if os.path.isdir(dir_name):
1568
      try:
1569
        f_names = utils.ListVisibleFiles(dir_name)
1570
      except EnvironmentError, err:
1571
        logging.exception("Can't list the OS directory %s", dir_name)
1572
        break
1573
      for name in f_names:
1574
        try:
1575
          os_inst = OSFromDisk(name, base_dir=dir_name)
1576
          result.append(os_inst)
1577
        except errors.InvalidOS, err:
1578
          result.append(objects.OS.FromInvalidOS(err))
1579

    
1580
  return result
1581

    
1582

    
1583
def OSFromDisk(name, base_dir=None):
1584
  """Create an OS instance from disk.
1585

1586
  This function will return an OS instance if the given name is a
1587
  valid OS name. Otherwise, it will raise an appropriate
1588
  L{errors.InvalidOS} exception, detailing why this is not a valid OS.
1589

1590
  @type base_dir: string
1591
  @keyword base_dir: Base directory containing OS installations.
1592
                     Defaults to a search in all the OS_SEARCH_PATH dirs.
1593
  @rtype: L{objects.OS}
1594
  @return: the OS instance if we find a valid one
1595
  @raise errors.InvalidOS: if we don't find a valid OS
1596

1597
  """
1598
  if base_dir is None:
1599
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1600
    if os_dir is None:
1601
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1602
  else:
1603
    os_dir = os.path.sep.join([base_dir, name])
1604

    
1605
  api_versions = _OSOndiskVersion(name, os_dir)
1606

    
1607
  if constants.OS_API_VERSION not in api_versions:
1608
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1609
                           " (found %s want %s)"
1610
                           % (api_versions, constants.OS_API_VERSION))
1611

    
1612
  # OS Scripts dictionary, we will populate it with the actual script names
1613
  os_scripts = dict.fromkeys(constants.OS_SCRIPTS)
1614

    
1615
  for script in os_scripts:
1616
    os_scripts[script] = os.path.sep.join([os_dir, script])
1617

    
1618
    try:
1619
      st = os.stat(os_scripts[script])
1620
    except EnvironmentError, err:
1621
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1622
                             (script, _ErrnoOrStr(err)))
1623

    
1624
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1625
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1626
                             script)
1627

    
1628
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1629
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1630
                             script)
1631

    
1632

    
1633
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1634
                    create_script=os_scripts[constants.OS_SCRIPT_CREATE],
1635
                    export_script=os_scripts[constants.OS_SCRIPT_EXPORT],
1636
                    import_script=os_scripts[constants.OS_SCRIPT_IMPORT],
1637
                    rename_script=os_scripts[constants.OS_SCRIPT_RENAME],
1638
                    api_versions=api_versions)
1639

    
1640
def OSEnvironment(instance, debug=0):
1641
  """Calculate the environment for an os script.
1642

1643
  @type instance: L{objects.Instance}
1644
  @param instance: target instance for the os script run
1645
  @type debug: integer
1646
  @param debug: debug level (0 or 1, for OS Api 10)
1647
  @rtype: dict
1648
  @return: dict of environment variables
1649
  @raise errors.BlockDeviceError: if the block device
1650
      cannot be found
1651

1652
  """
1653
  result = {}
1654
  result['OS_API_VERSION'] = '%d' % constants.OS_API_VERSION
1655
  result['INSTANCE_NAME'] = instance.name
1656
  result['INSTANCE_OS'] = instance.os
1657
  result['HYPERVISOR'] = instance.hypervisor
1658
  result['DISK_COUNT'] = '%d' % len(instance.disks)
1659
  result['NIC_COUNT'] = '%d' % len(instance.nics)
1660
  result['DEBUG_LEVEL'] = '%d' % debug
1661
  for idx, disk in enumerate(instance.disks):
1662
    real_disk = _RecursiveFindBD(disk)
1663
    if real_disk is None:
1664
      raise errors.BlockDeviceError("Block device '%s' is not set up" %
1665
                                    str(disk))
1666
    real_disk.Open()
1667
    result['DISK_%d_PATH' % idx] = real_disk.dev_path
1668
    result['DISK_%d_ACCESS' % idx] = disk.mode
1669
    if constants.HV_DISK_TYPE in instance.hvparams:
1670
      result['DISK_%d_FRONTEND_TYPE' % idx] = \
1671
        instance.hvparams[constants.HV_DISK_TYPE]
1672
    if disk.dev_type in constants.LDS_BLOCK:
1673
      result['DISK_%d_BACKEND_TYPE' % idx] = 'block'
1674
    elif disk.dev_type == constants.LD_FILE:
1675
      result['DISK_%d_BACKEND_TYPE' % idx] = \
1676
        'file:%s' % disk.physical_id[0]
1677
  for idx, nic in enumerate(instance.nics):
1678
    result['NIC_%d_MAC' % idx] = nic.mac
1679
    if nic.ip:
1680
      result['NIC_%d_IP' % idx] = nic.ip
1681
    result['NIC_%d_MODE' % idx] = nic.nicparams[constants.NIC_MODE]
1682
    if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1683
      result['NIC_%d_BRIDGE' % idx] = nic.nicparams[constants.NIC_LINK]
1684
    if nic.nicparams[constants.NIC_LINK]:
1685
      result['NIC_%d_LINK' % idx] = nic.nicparams[constants.NIC_LINK]
1686
    if constants.HV_NIC_TYPE in instance.hvparams:
1687
      result['NIC_%d_FRONTEND_TYPE' % idx] = \
1688
        instance.hvparams[constants.HV_NIC_TYPE]
1689

    
1690
  return result
1691

    
1692
def BlockdevGrow(disk, amount):
1693
  """Grow a stack of block devices.
1694

1695
  This function is called recursively, with the childrens being the
1696
  first ones to resize.
1697

1698
  @type disk: L{objects.Disk}
1699
  @param disk: the disk to be grown
1700
  @rtype: (status, result)
1701
  @return: a tuple with the status of the operation
1702
      (True/False), and the errors message if status
1703
      is False
1704

1705
  """
1706
  r_dev = _RecursiveFindBD(disk)
1707
  if r_dev is None:
1708
    return False, "Cannot find block device %s" % (disk,)
1709

    
1710
  try:
1711
    r_dev.Grow(amount)
1712
  except errors.BlockDeviceError, err:
1713
    _Fail("Failed to grow block device: %s", err, exc=True)
1714

    
1715
  return True, None
1716

    
1717

    
1718
def BlockdevSnapshot(disk):
1719
  """Create a snapshot copy of a block device.
1720

1721
  This function is called recursively, and the snapshot is actually created
1722
  just for the leaf lvm backend device.
1723

1724
  @type disk: L{objects.Disk}
1725
  @param disk: the disk to be snapshotted
1726
  @rtype: string
1727
  @return: snapshot disk path
1728

1729
  """
1730
  if disk.children:
1731
    if len(disk.children) == 1:
1732
      # only one child, let's recurse on it
1733
      return BlockdevSnapshot(disk.children[0])
1734
    else:
1735
      # more than one child, choose one that matches
1736
      for child in disk.children:
1737
        if child.size == disk.size:
1738
          # return implies breaking the loop
1739
          return BlockdevSnapshot(child)
1740
  elif disk.dev_type == constants.LD_LV:
1741
    r_dev = _RecursiveFindBD(disk)
1742
    if r_dev is not None:
1743
      # let's stay on the safe side and ask for the full size, for now
1744
      return True, r_dev.Snapshot(disk.size)
1745
    else:
1746
      _Fail("Cannot find block device %s", disk)
1747
  else:
1748
    _Fail("Cannot snapshot non-lvm block device '%s' of type '%s'",
1749
          disk.unique_id, disk.dev_type)
1750

    
1751

    
1752
def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
1753
  """Export a block device snapshot to a remote node.
1754

1755
  @type disk: L{objects.Disk}
1756
  @param disk: the description of the disk to export
1757
  @type dest_node: str
1758
  @param dest_node: the destination node to export to
1759
  @type instance: L{objects.Instance}
1760
  @param instance: the instance object to whom the disk belongs
1761
  @type cluster_name: str
1762
  @param cluster_name: the cluster name, needed for SSH hostalias
1763
  @type idx: int
1764
  @param idx: the index of the disk in the instance's disk list,
1765
      used to export to the OS scripts environment
1766
  @rtype: boolean
1767
  @return: the success of the operation
1768

1769
  """
1770
  export_env = OSEnvironment(instance)
1771

    
1772
  inst_os = OSFromDisk(instance.os)
1773
  export_script = inst_os.export_script
1774

    
1775
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1776
                                     instance.name, int(time.time()))
1777
  if not os.path.exists(constants.LOG_OS_DIR):
1778
    os.mkdir(constants.LOG_OS_DIR, 0750)
1779
  real_disk = _RecursiveFindBD(disk)
1780
  if real_disk is None:
1781
    _Fail("Block device '%s' is not set up", disk)
1782

    
1783
  real_disk.Open()
1784

    
1785
  export_env['EXPORT_DEVICE'] = real_disk.dev_path
1786
  export_env['EXPORT_INDEX'] = str(idx)
1787

    
1788
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1789
  destfile = disk.physical_id[1]
1790

    
1791
  # the target command is built out of three individual commands,
1792
  # which are joined by pipes; we check each individual command for
1793
  # valid parameters
1794
  expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
1795
                               export_script, logfile)
1796

    
1797
  comprcmd = "gzip"
1798

    
1799
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1800
                                destdir, destdir, destfile)
1801
  remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
1802
                                                   constants.GANETI_RUNAS,
1803
                                                   destcmd)
1804

    
1805
  # all commands have been checked, so we're safe to combine them
1806
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1807

    
1808
  result = utils.RunCmd(command, env=export_env)
1809

    
1810
  if result.failed:
1811
    _Fail("OS snapshot export command '%s' returned error: %s"
1812
          " output: %s", command, result.fail_reason, result.output)
1813

    
1814
  return (True, None)
1815

    
1816

    
1817
def FinalizeExport(instance, snap_disks):
1818
  """Write out the export configuration information.
1819

1820
  @type instance: L{objects.Instance}
1821
  @param instance: the instance which we export, used for
1822
      saving configuration
1823
  @type snap_disks: list of L{objects.Disk}
1824
  @param snap_disks: list of snapshot block devices, which
1825
      will be used to get the actual name of the dump file
1826

1827
  @rtype: boolean
1828
  @return: the success of the operation
1829

1830
  """
1831
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1832
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1833

    
1834
  config = objects.SerializableConfigParser()
1835

    
1836
  config.add_section(constants.INISECT_EXP)
1837
  config.set(constants.INISECT_EXP, 'version', '0')
1838
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1839
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1840
  config.set(constants.INISECT_EXP, 'os', instance.os)
1841
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1842

    
1843
  config.add_section(constants.INISECT_INS)
1844
  config.set(constants.INISECT_INS, 'name', instance.name)
1845
  config.set(constants.INISECT_INS, 'memory', '%d' %
1846
             instance.beparams[constants.BE_MEMORY])
1847
  config.set(constants.INISECT_INS, 'vcpus', '%d' %
1848
             instance.beparams[constants.BE_VCPUS])
1849
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1850

    
1851
  nic_total = 0
1852
  for nic_count, nic in enumerate(instance.nics):
1853
    nic_total += 1
1854
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1855
               nic_count, '%s' % nic.mac)
1856
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1857
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count,
1858
               '%s' % nic.bridge)
1859
  # TODO: redundant: on load can read nics until it doesn't exist
1860
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_total)
1861

    
1862
  disk_total = 0
1863
  for disk_count, disk in enumerate(snap_disks):
1864
    if disk:
1865
      disk_total += 1
1866
      config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1867
                 ('%s' % disk.iv_name))
1868
      config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1869
                 ('%s' % disk.physical_id[1]))
1870
      config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1871
                 ('%d' % disk.size))
1872

    
1873
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_total)
1874

    
1875
  utils.WriteFile(os.path.join(destdir, constants.EXPORT_CONF_FILE),
1876
                  data=config.Dumps())
1877
  shutil.rmtree(finaldestdir, True)
1878
  shutil.move(destdir, finaldestdir)
1879

    
1880
  return True, None
1881

    
1882

    
1883
def ExportInfo(dest):
1884
  """Get export configuration information.
1885

1886
  @type dest: str
1887
  @param dest: directory containing the export
1888

1889
  @rtype: L{objects.SerializableConfigParser}
1890
  @return: a serializable config file containing the
1891
      export info
1892

1893
  """
1894
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1895

    
1896
  config = objects.SerializableConfigParser()
1897
  config.read(cff)
1898

    
1899
  if (not config.has_section(constants.INISECT_EXP) or
1900
      not config.has_section(constants.INISECT_INS)):
1901
    _Fail("Export info file doesn't have the required fields")
1902

    
1903
  return True, config.Dumps()
1904

    
1905

    
1906
def ImportOSIntoInstance(instance, src_node, src_images, cluster_name):
1907
  """Import an os image into an instance.
1908

1909
  @type instance: L{objects.Instance}
1910
  @param instance: instance to import the disks into
1911
  @type src_node: string
1912
  @param src_node: source node for the disk images
1913
  @type src_images: list of string
1914
  @param src_images: absolute paths of the disk images
1915
  @rtype: list of boolean
1916
  @return: each boolean represent the success of importing the n-th disk
1917

1918
  """
1919
  import_env = OSEnvironment(instance)
1920
  inst_os = OSFromDisk(instance.os)
1921
  import_script = inst_os.import_script
1922

    
1923
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1924
                                        instance.name, int(time.time()))
1925
  if not os.path.exists(constants.LOG_OS_DIR):
1926
    os.mkdir(constants.LOG_OS_DIR, 0750)
1927

    
1928
  comprcmd = "gunzip"
1929
  impcmd = utils.BuildShellCmd("(cd %s; %s >%s 2>&1)", inst_os.path,
1930
                               import_script, logfile)
1931

    
1932
  final_result = []
1933
  for idx, image in enumerate(src_images):
1934
    if image:
1935
      destcmd = utils.BuildShellCmd('cat %s', image)
1936
      remotecmd = _GetSshRunner(cluster_name).BuildCmd(src_node,
1937
                                                       constants.GANETI_RUNAS,
1938
                                                       destcmd)
1939
      command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1940
      import_env['IMPORT_DEVICE'] = import_env['DISK_%d_PATH' % idx]
1941
      import_env['IMPORT_INDEX'] = str(idx)
1942
      result = utils.RunCmd(command, env=import_env)
1943
      if result.failed:
1944
        logging.error("Disk import command '%s' returned error: %s"
1945
                      " output: %s", command, result.fail_reason,
1946
                      result.output)
1947
        final_result.append(False)
1948
      else:
1949
        final_result.append(True)
1950
    else:
1951
      final_result.append(True)
1952

    
1953
  return final_result
1954

    
1955

    
1956
def ListExports():
1957
  """Return a list of exports currently available on this machine.
1958

1959
  @rtype: list
1960
  @return: list of the exports
1961

1962
  """
1963
  if os.path.isdir(constants.EXPORT_DIR):
1964
    return True, utils.ListVisibleFiles(constants.EXPORT_DIR)
1965
  else:
1966
    return False, "No exports directory"
1967

    
1968

    
1969
def RemoveExport(export):
1970
  """Remove an existing export from the node.
1971

1972
  @type export: str
1973
  @param export: the name of the export to remove
1974
  @rtype: boolean
1975
  @return: the success of the operation
1976

1977
  """
1978
  target = os.path.join(constants.EXPORT_DIR, export)
1979

    
1980
  try:
1981
    shutil.rmtree(target)
1982
  except EnvironmentError, err:
1983
    _Fail("Error while removing the export: %s", err, exc=True)
1984

    
1985
  return True, None
1986

    
1987

    
1988
def BlockdevRename(devlist):
1989
  """Rename a list of block devices.
1990

1991
  @type devlist: list of tuples
1992
  @param devlist: list of tuples of the form  (disk,
1993
      new_logical_id, new_physical_id); disk is an
1994
      L{objects.Disk} object describing the current disk,
1995
      and new logical_id/physical_id is the name we
1996
      rename it to
1997
  @rtype: boolean
1998
  @return: True if all renames succeeded, False otherwise
1999

2000
  """
2001
  msgs = []
2002
  result = True
2003
  for disk, unique_id in devlist:
2004
    dev = _RecursiveFindBD(disk)
2005
    if dev is None:
2006
      msgs.append("Can't find device %s in rename" % str(disk))
2007
      result = False
2008
      continue
2009
    try:
2010
      old_rpath = dev.dev_path
2011
      dev.Rename(unique_id)
2012
      new_rpath = dev.dev_path
2013
      if old_rpath != new_rpath:
2014
        DevCacheManager.RemoveCache(old_rpath)
2015
        # FIXME: we should add the new cache information here, like:
2016
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
2017
        # but we don't have the owner here - maybe parse from existing
2018
        # cache? for now, we only lose lvm data when we rename, which
2019
        # is less critical than DRBD or MD
2020
    except errors.BlockDeviceError, err:
2021
      msgs.append("Can't rename device '%s' to '%s': %s" %
2022
                  (dev, unique_id, err))
2023
      logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
2024
      result = False
2025
  return (result, "; ".join(msgs))
2026

    
2027

    
2028
def _TransformFileStorageDir(file_storage_dir):
2029
  """Checks whether given file_storage_dir is valid.
2030

2031
  Checks wheter the given file_storage_dir is within the cluster-wide
2032
  default file_storage_dir stored in SimpleStore. Only paths under that
2033
  directory are allowed.
2034

2035
  @type file_storage_dir: str
2036
  @param file_storage_dir: the path to check
2037

2038
  @return: the normalized path if valid, None otherwise
2039

2040
  """
2041
  cfg = _GetConfig()
2042
  file_storage_dir = os.path.normpath(file_storage_dir)
2043
  base_file_storage_dir = cfg.GetFileStorageDir()
2044
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
2045
      base_file_storage_dir):
2046
    logging.error("file storage directory '%s' is not under base file"
2047
                  " storage directory '%s'",
2048
                  file_storage_dir, base_file_storage_dir)
2049
    return None
2050
  return file_storage_dir
2051

    
2052

    
2053
def CreateFileStorageDir(file_storage_dir):
2054
  """Create file storage directory.
2055

2056
  @type file_storage_dir: str
2057
  @param file_storage_dir: directory to create
2058

2059
  @rtype: tuple
2060
  @return: tuple with first element a boolean indicating wheter dir
2061
      creation was successful or not
2062

2063
  """
2064
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
2065
  result = True,
2066
  if not file_storage_dir:
2067
    result = False,
2068
  else:
2069
    if os.path.exists(file_storage_dir):
2070
      if not os.path.isdir(file_storage_dir):
2071
        logging.error("'%s' is not a directory", file_storage_dir)
2072
        result = False,
2073
    else:
2074
      try:
2075
        os.makedirs(file_storage_dir, 0750)
2076
      except OSError, err:
2077
        logging.error("Cannot create file storage directory '%s': %s",
2078
                      file_storage_dir, err)
2079
        result = False,
2080
  return result
2081

    
2082

    
2083
def RemoveFileStorageDir(file_storage_dir):
2084
  """Remove file storage directory.
2085

2086
  Remove it only if it's empty. If not log an error and return.
2087

2088
  @type file_storage_dir: str
2089
  @param file_storage_dir: the directory we should cleanup
2090
  @rtype: tuple (success,)
2091
  @return: tuple of one element, C{success}, denoting
2092
      whether the operation was successfull
2093

2094
  """
2095
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
2096
  result = True,
2097
  if not file_storage_dir:
2098
    result = False,
2099
  else:
2100
    if os.path.exists(file_storage_dir):
2101
      if not os.path.isdir(file_storage_dir):
2102
        logging.error("'%s' is not a directory", file_storage_dir)
2103
        result = False,
2104
      # deletes dir only if empty, otherwise we want to return False
2105
      try:
2106
        os.rmdir(file_storage_dir)
2107
      except OSError, err:
2108
        logging.exception("Cannot remove file storage directory '%s'",
2109
                          file_storage_dir)
2110
        result = False,
2111
  return result
2112

    
2113

    
2114
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
2115
  """Rename the file storage directory.
2116

2117
  @type old_file_storage_dir: str
2118
  @param old_file_storage_dir: the current path
2119
  @type new_file_storage_dir: str
2120
  @param new_file_storage_dir: the name we should rename to
2121
  @rtype: tuple (success,)
2122
  @return: tuple of one element, C{success}, denoting
2123
      whether the operation was successful
2124

2125
  """
2126
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
2127
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
2128
  result = True,
2129
  if not old_file_storage_dir or not new_file_storage_dir:
2130
    result = False,
2131
  else:
2132
    if not os.path.exists(new_file_storage_dir):
2133
      if os.path.isdir(old_file_storage_dir):
2134
        try:
2135
          os.rename(old_file_storage_dir, new_file_storage_dir)
2136
        except OSError, err:
2137
          logging.exception("Cannot rename '%s' to '%s'",
2138
                            old_file_storage_dir, new_file_storage_dir)
2139
          result =  False,
2140
      else:
2141
        logging.error("'%s' is not a directory", old_file_storage_dir)
2142
        result = False,
2143
    else:
2144
      if os.path.exists(old_file_storage_dir):
2145
        logging.error("Cannot rename '%s' to '%s'. Both locations exist.",
2146
                      old_file_storage_dir, new_file_storage_dir)
2147
        result = False,
2148
  return result
2149

    
2150

    
2151
def _IsJobQueueFile(file_name):
2152
  """Checks whether the given filename is in the queue directory.
2153

2154
  @type file_name: str
2155
  @param file_name: the file name we should check
2156
  @rtype: boolean
2157
  @return: whether the file is under the queue directory
2158

2159
  """
2160
  queue_dir = os.path.normpath(constants.QUEUE_DIR)
2161
  result = (os.path.commonprefix([queue_dir, file_name]) == queue_dir)
2162

    
2163
  if not result:
2164
    logging.error("'%s' is not a file in the queue directory",
2165
                  file_name)
2166

    
2167
  return result
2168

    
2169

    
2170
def JobQueueUpdate(file_name, content):
2171
  """Updates a file in the queue directory.
2172

2173
  This is just a wrapper over L{utils.WriteFile}, with proper
2174
  checking.
2175

2176
  @type file_name: str
2177
  @param file_name: the job file name
2178
  @type content: str
2179
  @param content: the new job contents
2180
  @rtype: boolean
2181
  @return: the success of the operation
2182

2183
  """
2184
  if not _IsJobQueueFile(file_name):
2185
    return False
2186

    
2187
  # Write and replace the file atomically
2188
  utils.WriteFile(file_name, data=_Decompress(content))
2189

    
2190
  return True
2191

    
2192

    
2193
def JobQueueRename(old, new):
2194
  """Renames a job queue file.
2195

2196
  This is just a wrapper over os.rename with proper checking.
2197

2198
  @type old: str
2199
  @param old: the old (actual) file name
2200
  @type new: str
2201
  @param new: the desired file name
2202
  @rtype: boolean
2203
  @return: the success of the operation
2204

2205
  """
2206
  if not (_IsJobQueueFile(old) and _IsJobQueueFile(new)):
2207
    return False
2208

    
2209
  utils.RenameFile(old, new, mkdir=True)
2210

    
2211
  return True
2212

    
2213

    
2214
def JobQueueSetDrainFlag(drain_flag):
2215
  """Set the drain flag for the queue.
2216

2217
  This will set or unset the queue drain flag.
2218

2219
  @type drain_flag: boolean
2220
  @param drain_flag: if True, will set the drain flag, otherwise reset it.
2221
  @rtype: boolean
2222
  @return: always True
2223
  @warning: the function always returns True
2224

2225
  """
2226
  if drain_flag:
2227
    utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True)
2228
  else:
2229
    utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE)
2230

    
2231
  return True
2232

    
2233

    
2234
def BlockdevClose(instance_name, disks):
2235
  """Closes the given block devices.
2236

2237
  This means they will be switched to secondary mode (in case of
2238
  DRBD).
2239

2240
  @param instance_name: if the argument is not empty, the symlinks
2241
      of this instance will be removed
2242
  @type disks: list of L{objects.Disk}
2243
  @param disks: the list of disks to be closed
2244
  @rtype: tuple (success, message)
2245
  @return: a tuple of success and message, where success
2246
      indicates the succes of the operation, and message
2247
      which will contain the error details in case we
2248
      failed
2249

2250
  """
2251
  bdevs = []
2252
  for cf in disks:
2253
    rd = _RecursiveFindBD(cf)
2254
    if rd is None:
2255
      _Fail("Can't find device %s", cf)
2256
    bdevs.append(rd)
2257

    
2258
  msg = []
2259
  for rd in bdevs:
2260
    try:
2261
      rd.Close()
2262
    except errors.BlockDeviceError, err:
2263
      msg.append(str(err))
2264
  if msg:
2265
    return (False, "Can't make devices secondary: %s" % ",".join(msg))
2266
  else:
2267
    if instance_name:
2268
      _RemoveBlockDevLinks(instance_name, disks)
2269
    return (True, "All devices secondary")
2270

    
2271

    
2272
def ValidateHVParams(hvname, hvparams):
2273
  """Validates the given hypervisor parameters.
2274

2275
  @type hvname: string
2276
  @param hvname: the hypervisor name
2277
  @type hvparams: dict
2278
  @param hvparams: the hypervisor parameters to be validated
2279
  @rtype: tuple (success, message)
2280
  @return: a tuple of success and message, where success
2281
      indicates the succes of the operation, and message
2282
      which will contain the error details in case we
2283
      failed
2284

2285
  """
2286
  try:
2287
    hv_type = hypervisor.GetHypervisor(hvname)
2288
    hv_type.ValidateParameters(hvparams)
2289
    return (True, "Validation passed")
2290
  except errors.HypervisorError, err:
2291
    return (False, str(err))
2292

    
2293

    
2294
def DemoteFromMC():
2295
  """Demotes the current node from master candidate role.
2296

2297
  """
2298
  # try to ensure we're not the master by mistake
2299
  master, myself = ssconf.GetMasterAndMyself()
2300
  if master == myself:
2301
    return (False, "ssconf status shows I'm the master node, will not demote")
2302
  pid_file = utils.DaemonPidFileName(constants.MASTERD_PID)
2303
  if utils.IsProcessAlive(utils.ReadPidFile(pid_file)):
2304
    return (False, "The master daemon is running, will not demote")
2305
  try:
2306
    utils.CreateBackup(constants.CLUSTER_CONF_FILE)
2307
  except EnvironmentError, err:
2308
    if err.errno != errno.ENOENT:
2309
      return (False, "Error while backing up cluster file: %s" % str(err))
2310
  utils.RemoveFile(constants.CLUSTER_CONF_FILE)
2311
  return (True, "Done")
2312

    
2313

    
2314
def _FindDisks(nodes_ip, disks):
2315
  """Sets the physical ID on disks and returns the block devices.
2316

2317
  """
2318
  # set the correct physical ID
2319
  my_name = utils.HostInfo().name
2320
  for cf in disks:
2321
    cf.SetPhysicalID(my_name, nodes_ip)
2322

    
2323
  bdevs = []
2324

    
2325
  for cf in disks:
2326
    rd = _RecursiveFindBD(cf)
2327
    if rd is None:
2328
      return (False, "Can't find device %s" % cf)
2329
    bdevs.append(rd)
2330
  return (True, bdevs)
2331

    
2332

    
2333
def DrbdDisconnectNet(nodes_ip, disks):
2334
  """Disconnects the network on a list of drbd devices.
2335

2336
  """
2337
  status, bdevs = _FindDisks(nodes_ip, disks)
2338
  if not status:
2339
    return status, bdevs
2340

    
2341
  # disconnect disks
2342
  for rd in bdevs:
2343
    try:
2344
      rd.DisconnectNet()
2345
    except errors.BlockDeviceError, err:
2346
      _Fail("Can't change network configuration to standalone mode: %s",
2347
            err, exc=True)
2348
  return (True, "All disks are now disconnected")
2349

    
2350

    
2351
def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster):
2352
  """Attaches the network on a list of drbd devices.
2353

2354
  """
2355
  status, bdevs = _FindDisks(nodes_ip, disks)
2356
  if not status:
2357
    return status, bdevs
2358

    
2359
  if multimaster:
2360
    for idx, rd in enumerate(bdevs):
2361
      try:
2362
        _SymlinkBlockDev(instance_name, rd.dev_path, idx)
2363
      except EnvironmentError, err:
2364
        _Fail("Can't create symlink: %s", err)
2365
  # reconnect disks, switch to new master configuration and if
2366
  # needed primary mode
2367
  for rd in bdevs:
2368
    try:
2369
      rd.AttachNet(multimaster)
2370
    except errors.BlockDeviceError, err:
2371
      _Fail("Can't change network configuration: %s", err)
2372
  # wait until the disks are connected; we need to retry the re-attach
2373
  # if the device becomes standalone, as this might happen if the one
2374
  # node disconnects and reconnects in a different mode before the
2375
  # other node reconnects; in this case, one or both of the nodes will
2376
  # decide it has wrong configuration and switch to standalone
2377
  RECONNECT_TIMEOUT = 2 * 60
2378
  sleep_time = 0.100 # start with 100 miliseconds
2379
  timeout_limit = time.time() + RECONNECT_TIMEOUT
2380
  while time.time() < timeout_limit:
2381
    all_connected = True
2382
    for rd in bdevs:
2383
      stats = rd.GetProcStatus()
2384
      if not (stats.is_connected or stats.is_in_resync):
2385
        all_connected = False
2386
      if stats.is_standalone:
2387
        # peer had different config info and this node became
2388
        # standalone, even though this should not happen with the
2389
        # new staged way of changing disk configs
2390
        try:
2391
          rd.ReAttachNet(multimaster)
2392
        except errors.BlockDeviceError, err:
2393
          _Fail("Can't change network configuration: %s", err)
2394
    if all_connected:
2395
      break
2396
    time.sleep(sleep_time)
2397
    sleep_time = min(5, sleep_time * 1.5)
2398
  if not all_connected:
2399
    return (False, "Timeout in disk reconnecting")
2400
  if multimaster:
2401
    # change to primary mode
2402
    for rd in bdevs:
2403
      try:
2404
        rd.Open()
2405
      except errors.BlockDeviceError, err:
2406
        _Fail("Can't change to primary mode: %s", err)
2407
  if multimaster:
2408
    msg = "multi-master and primary"
2409
  else:
2410
    msg = "single-master"
2411
  return (True, "Disks are now configured as %s" % msg)
2412

    
2413

    
2414
def DrbdWaitSync(nodes_ip, disks):
2415
  """Wait until DRBDs have synchronized.
2416

2417
  """
2418
  status, bdevs = _FindDisks(nodes_ip, disks)
2419
  if not status:
2420
    return status, bdevs
2421

    
2422
  min_resync = 100
2423
  alldone = True
2424
  failure = False
2425
  for rd in bdevs:
2426
    stats = rd.GetProcStatus()
2427
    if not (stats.is_connected or stats.is_in_resync):
2428
      failure = True
2429
      break
2430
    alldone = alldone and (not stats.is_in_resync)
2431
    if stats.sync_percent is not None:
2432
      min_resync = min(min_resync, stats.sync_percent)
2433
  return (not failure, (alldone, min_resync))
2434

    
2435

    
2436
def PowercycleNode(hypervisor_type):
2437
  """Hard-powercycle the node.
2438

2439
  Because we need to return first, and schedule the powercycle in the
2440
  background, we won't be able to report failures nicely.
2441

2442
  """
2443
  hyper = hypervisor.GetHypervisor(hypervisor_type)
2444
  try:
2445
    pid = os.fork()
2446
  except OSError, err:
2447
    # if we can't fork, we'll pretend that we're in the child process
2448
    pid = 0
2449
  if pid > 0:
2450
    return (True, "Reboot scheduled in 5 seconds")
2451
  time.sleep(5)
2452
  hyper.PowercycleNode()
2453

    
2454

    
2455
class HooksRunner(object):
2456
  """Hook runner.
2457

2458
  This class is instantiated on the node side (ganeti-noded) and not
2459
  on the master side.
2460

2461
  """
2462
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
2463

    
2464
  def __init__(self, hooks_base_dir=None):
2465
    """Constructor for hooks runner.
2466

2467
    @type hooks_base_dir: str or None
2468
    @param hooks_base_dir: if not None, this overrides the
2469
        L{constants.HOOKS_BASE_DIR} (useful for unittests)
2470

2471
    """
2472
    if hooks_base_dir is None:
2473
      hooks_base_dir = constants.HOOKS_BASE_DIR
2474
    self._BASE_DIR = hooks_base_dir
2475

    
2476
  @staticmethod
2477
  def ExecHook(script, env):
2478
    """Exec one hook script.
2479

2480
    @type script: str
2481
    @param script: the full path to the script
2482
    @type env: dict
2483
    @param env: the environment with which to exec the script
2484
    @rtype: tuple (success, message)
2485
    @return: a tuple of success and message, where success
2486
        indicates the succes of the operation, and message
2487
        which will contain the error details in case we
2488
        failed
2489

2490
    """
2491
    # exec the process using subprocess and log the output
2492
    fdstdin = None
2493
    try:
2494
      fdstdin = open("/dev/null", "r")
2495
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
2496
                               stderr=subprocess.STDOUT, close_fds=True,
2497
                               shell=False, cwd="/", env=env)
2498
      output = ""
2499
      try:
2500
        output = child.stdout.read(4096)
2501
        child.stdout.close()
2502
      except EnvironmentError, err:
2503
        output += "Hook script error: %s" % str(err)
2504

    
2505
      while True:
2506
        try:
2507
          result = child.wait()
2508
          break
2509
        except EnvironmentError, err:
2510
          if err.errno == errno.EINTR:
2511
            continue
2512
          raise
2513
    finally:
2514
      # try not to leak fds
2515
      for fd in (fdstdin, ):
2516
        if fd is not None:
2517
          try:
2518
            fd.close()
2519
          except EnvironmentError, err:
2520
            # just log the error
2521
            #logging.exception("Error while closing fd %s", fd)
2522
            pass
2523

    
2524
    return result == 0, utils.SafeEncode(output.strip())
2525

    
2526
  def RunHooks(self, hpath, phase, env):
2527
    """Run the scripts in the hooks directory.
2528

2529
    @type hpath: str
2530
    @param hpath: the path to the hooks directory which
2531
        holds the scripts
2532
    @type phase: str
2533
    @param phase: either L{constants.HOOKS_PHASE_PRE} or
2534
        L{constants.HOOKS_PHASE_POST}
2535
    @type env: dict
2536
    @param env: dictionary with the environment for the hook
2537
    @rtype: list
2538
    @return: list of 3-element tuples:
2539
      - script path
2540
      - script result, either L{constants.HKR_SUCCESS} or
2541
        L{constants.HKR_FAIL}
2542
      - output of the script
2543

2544
    @raise errors.ProgrammerError: for invalid input
2545
        parameters
2546

2547
    """
2548
    if phase == constants.HOOKS_PHASE_PRE:
2549
      suffix = "pre"
2550
    elif phase == constants.HOOKS_PHASE_POST:
2551
      suffix = "post"
2552
    else:
2553
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
2554
    rr = []
2555

    
2556
    subdir = "%s-%s.d" % (hpath, suffix)
2557
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
2558
    try:
2559
      dir_contents = utils.ListVisibleFiles(dir_name)
2560
    except OSError, err:
2561
      # FIXME: must log output in case of failures
2562
      return rr
2563

    
2564
    # we use the standard python sort order,
2565
    # so 00name is the recommended naming scheme
2566
    dir_contents.sort()
2567
    for relname in dir_contents:
2568
      fname = os.path.join(dir_name, relname)
2569
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
2570
          self.RE_MASK.match(relname) is not None):
2571
        rrval = constants.HKR_SKIP
2572
        output = ""
2573
      else:
2574
        result, output = self.ExecHook(fname, env)
2575
        if not result:
2576
          rrval = constants.HKR_FAIL
2577
        else:
2578
          rrval = constants.HKR_SUCCESS
2579
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
2580

    
2581
    return rr
2582

    
2583

    
2584
class IAllocatorRunner(object):
2585
  """IAllocator runner.
2586

2587
  This class is instantiated on the node side (ganeti-noded) and not on
2588
  the master side.
2589

2590
  """
2591
  def Run(self, name, idata):
2592
    """Run an iallocator script.
2593

2594
    @type name: str
2595
    @param name: the iallocator script name
2596
    @type idata: str
2597
    @param idata: the allocator input data
2598

2599
    @rtype: tuple
2600
    @return: four element tuple of:
2601
       - run status (one of the IARUN_ constants)
2602
       - stdout
2603
       - stderr
2604
       - fail reason (as from L{utils.RunResult})
2605

2606
    """
2607
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
2608
                                  os.path.isfile)
2609
    if alloc_script is None:
2610
      return (constants.IARUN_NOTFOUND, None, None, None)
2611

    
2612
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
2613
    try:
2614
      os.write(fd, idata)
2615
      os.close(fd)
2616
      result = utils.RunCmd([alloc_script, fin_name])
2617
      if result.failed:
2618
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
2619
                result.fail_reason)
2620
    finally:
2621
      os.unlink(fin_name)
2622

    
2623
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
2624

    
2625

    
2626
class DevCacheManager(object):
2627
  """Simple class for managing a cache of block device information.
2628

2629
  """
2630
  _DEV_PREFIX = "/dev/"
2631
  _ROOT_DIR = constants.BDEV_CACHE_DIR
2632

    
2633
  @classmethod
2634
  def _ConvertPath(cls, dev_path):
2635
    """Converts a /dev/name path to the cache file name.
2636

2637
    This replaces slashes with underscores and strips the /dev
2638
    prefix. It then returns the full path to the cache file.
2639

2640
    @type dev_path: str
2641
    @param dev_path: the C{/dev/} path name
2642
    @rtype: str
2643
    @return: the converted path name
2644

2645
    """
2646
    if dev_path.startswith(cls._DEV_PREFIX):
2647
      dev_path = dev_path[len(cls._DEV_PREFIX):]
2648
    dev_path = dev_path.replace("/", "_")
2649
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
2650
    return fpath
2651

    
2652
  @classmethod
2653
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
2654
    """Updates the cache information for a given device.
2655

2656
    @type dev_path: str
2657
    @param dev_path: the pathname of the device
2658
    @type owner: str
2659
    @param owner: the owner (instance name) of the device
2660
    @type on_primary: bool
2661
    @param on_primary: whether this is the primary
2662
        node nor not
2663
    @type iv_name: str
2664
    @param iv_name: the instance-visible name of the
2665
        device, as in objects.Disk.iv_name
2666

2667
    @rtype: None
2668

2669
    """
2670
    if dev_path is None:
2671
      logging.error("DevCacheManager.UpdateCache got a None dev_path")
2672
      return
2673
    fpath = cls._ConvertPath(dev_path)
2674
    if on_primary:
2675
      state = "primary"
2676
    else:
2677
      state = "secondary"
2678
    if iv_name is None:
2679
      iv_name = "not_visible"
2680
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
2681
    try:
2682
      utils.WriteFile(fpath, data=fdata)
2683
    except EnvironmentError, err:
2684
      logging.exception("Can't update bdev cache for %s", dev_path)
2685

    
2686
  @classmethod
2687
  def RemoveCache(cls, dev_path):
2688
    """Remove data for a dev_path.
2689

2690
    This is just a wrapper over L{utils.RemoveFile} with a converted
2691
    path name and logging.
2692

2693
    @type dev_path: str
2694
    @param dev_path: the pathname of the device
2695

2696
    @rtype: None
2697

2698
    """
2699
    if dev_path is None:
2700
      logging.error("DevCacheManager.RemoveCache got a None dev_path")
2701
      return
2702
    fpath = cls._ConvertPath(dev_path)
2703
    try:
2704
      utils.RemoveFile(fpath)
2705
    except EnvironmentError, err:
2706
      logging.exception("Can't update bdev cache for %s", dev_path)