From f3845095c7f8802c4dd40d7679e14831d7f70b59 Mon Sep 17 00:00:00 2001 From: Nikos Skalkotos Date: Mon, 31 Dec 2012 00:37:07 +0200 Subject: [PATCH] Retry cleanup commands if they fail Add a new try_fail_repeat function that wraps cleanup system commands and retries them if they fail. There are cases where commands like dmsetup, umount and losetup -d may fail with device is busy. In most of the cases, sleeping for halve a second and retrying does the job. --- image_creator/bundle_volume.py | 14 +++++++++----- image_creator/disk.py | 13 +++++-------- image_creator/util.py | 20 +++++++++++++++++++- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/image_creator/bundle_volume.py b/image_creator/bundle_volume.py index 27839fa..bd418ce 100644 --- a/image_creator/bundle_volume.py +++ b/image_creator/bundle_volume.py @@ -34,7 +34,6 @@ import os import re import tempfile -import time from collections import namedtuple import parted @@ -42,6 +41,7 @@ import parted from image_creator.rsync import Rsync from image_creator.util import get_command from image_creator.util import FatalError +from image_creator.util import try_fail_repeat findfs = get_command('findfs') dd = get_command('dd') @@ -66,9 +66,11 @@ MKFS_OPTS = { } -class BundleVolume(): +class BundleVolume(object): + """This class can be used to create an image out of the running system""" def __init__(self, out, meta): + """Create an instance of the BundleVolume class.""" self.out = out self.meta = meta @@ -256,8 +258,7 @@ class BundleVolume(): if not os.path.exists(dev): return - dmsetup('remove', dev.split('/dev/mapper/')[1]) - time.sleep(0.1) + try_fail_repeat(dmsetup, 'remove', dev.split('/dev/mapper/')[1]) def _mount(self, target, devs): @@ -276,7 +277,7 @@ class BundleVolume(): mpoints.sort() for mpoint in reversed(mpoints): - umount(mpoint) + try_fail_repeat(umount, mpoint) def _to_exclude(self): excluded = ['/tmp', '/var/tmp'] @@ -391,6 +392,9 @@ class BundleVolume(): losetup('-d', loop) def create_image(self, image): + """Given an image filename, this method will create an image out of the + running system. + """ size = self.disk.device.getLength() * self.disk.device.sectorSize diff --git a/image_creator/disk.py b/image_creator/disk.py index 8a06f78..bb90816 100644 --- a/image_creator/disk.py +++ b/image_creator/disk.py @@ -33,6 +33,7 @@ from image_creator.util import get_command from image_creator.util import FatalError +from image_creator.util import try_fail_repeat from image_creator.gpt import GPTPartitionTable from image_creator.bundle_volume import BundleVolume @@ -43,7 +44,6 @@ import uuid import re import sys import guestfs -import time from sendfile import sendfile @@ -63,7 +63,8 @@ class Disk(object): def __init__(self, source, output): """Create a new Disk instance out of a source media. The source - media can be an image file, a block device or a directory.""" + media can be an image file, a block device or a directory. + """ self._cleanup_jobs = [] self._devices = [] self.source = source @@ -76,7 +77,7 @@ class Disk(object): def _losetup(self, fname): loop = losetup('-f', '--show', fname) loop = loop.strip() # remove the new-line char - self._add_cleanup(losetup, '-d', loop) + self._add_cleanup(try_fail_repeat, losetup, '-d', loop) return loop def _dir_to_disk(self): @@ -144,11 +145,7 @@ class Disk(object): os.write(tablefd, "0 %d snapshot %s %s n 8" % (int(size), sourcedev, cowdev)) dmsetup('create', snapshot, table) - self._add_cleanup(dmsetup, 'remove', snapshot) - # Sometimes dmsetup remove fails with Device or resource busy, - # although everything is cleaned up and the snapshot is not - # used by anyone. Add a 2 seconds delay to be on the safe side. - self._add_cleanup(time.sleep, 2) + self._add_cleanup(try_fail_repeat, dmsetup, 'remove', snapshot) finally: os.unlink(table) diff --git a/image_creator/util.py b/image_creator/util.py index 61cc2a6..1cd97ff 100644 --- a/image_creator/util.py +++ b/image_creator/util.py @@ -34,6 +34,7 @@ import sys import sh import hashlib +import time class FatalError(Exception): @@ -54,12 +55,29 @@ def get_command(command): return find_sbin_command(command, e) +def try_fail_repeat(command, *args): + + times = (0.1, 0.5, 1, 2) + i = iter(times) + while True: + try: + command(*args) + return + except sh.ErrorReturnCode: + try: + wait = i.next() + except StopIteration: + break + time.sleep(wait) + + raise FatalError("Command: `%s %s' failed" % (command, " ".join(args))) + + class MD5: def __init__(self, output): self.out = output def compute(self, filename, size): - MB = 2 ** 20 BLOCKSIZE = 4 * MB # 4MB -- 1.7.10.4