Cleanup monitoring code
authorNikos Skalkotos <skalkoto@grnet.gr>
Fri, 25 May 2012 14:57:46 +0000 (17:57 +0300)
committerNikos Skalkotos <skalkoto@grnet.gr>
Fri, 25 May 2012 14:57:46 +0000 (17:57 +0300)
 * Add nanoseconds field in timestamp
 * Remove id field in the notifications
 * Fix cleanup code

snf-image-helper/common.sh
snf-image-host/common.sh.in
snf-image-host/copy-monitor.py
snf-image-host/create

index 1813d91..1523cd2 100644 (file)
@@ -59,14 +59,13 @@ warn() {
 
 report_start_task() {
 
-    local id="$SNF_IMAGE_HOSTNAME"
-    local type="ganeti-start-task"
-    local timestamp=$(date +%s)
+    local type="start-task"
+    local timestamp=$(date +%s.%N)
     local name="${PROGNAME}"
 
     report="{\"id\":\"$id\","
     report+="\"type\":\"$type\"," \
-    report+="\"timestamp\":$(date +%s)," \
+    report+="\"timestamp\":$(date +%s.%N)," \
     report+="\"name\":\"$name\"}"
 
     echo "$report" > "$MONITOR"
@@ -89,9 +88,8 @@ json_list() {
 
 report_end_task() {
 
-    local id="$SNF_IMAGE_HOSTNAME"
-    local type="ganeti-end-task"
-    local timestam=$(date +%s)
+    local type="end-task"
+    local timestam=$(date +%s.%N)
     local name=${PROGNAME}
     local warnings=$(json_list WARNINGS[@])
 
@@ -105,9 +103,8 @@ report_end_task() {
 }
 
 report_error() {
-    local id="$SNF_IMAGE_HOSTNAME"
     local type="ganeti-error"
-    local timestamp=$(date +%s)
+    local timestamp=$(date +%s.%N)
     local location="${PROGNAME}"
     local errors=$(json_list ERRORS[@])
     local warnings=$(json_list WARNINGS[@])
@@ -406,7 +403,7 @@ task_cleanup() {
     rc=$?
 
     if [ $rc -eq 0 ]; then
-       report_end_task 
+       report_end_task
     else
        report_error
     fi
index f0f6c13..ff482e9 100644 (file)
@@ -74,7 +74,7 @@ report_error() {
 
     report="{\"id\":\"$id\","
     report+="\"type\":\"$type\"," \
-    report+="\"timestamp\":$(date +%s)," \
+    report+="\"timestamp\":$(date +%s.%N)," \
     report+="\"location\":\"$location\"," \
     report+="\"messages\":$msg," \
     report+="\"stderr\":\"$stderr\"}"
@@ -328,20 +328,18 @@ EOF
     mv -f "$tmp_cache" "$HELPER_CACHE_FILE"
 }
 
-cleanup() {
-    # Carefull this should be the first command in the function. We want to
-    # store the last exit code to see if cleanup was triggered by an abnormal
-    # termination of the script.
-    local rc=$?
-    local err_file=$1
+report_and_cleanup(){
+
+    local err_file="$1"
+
+    report_error "$err_file"
+    cleanup
+}
 
+cleanup() {
     # if something fails here, it souldn't call cleanup again...
     trap - EXIT
 
-    if [ $rc -ne 0 -a -f "$err_file" ]; then
-        report_error "$err_file"
-    fi
-
     if [ ${#CLEANUP[*]} -gt 0 ]; then
         LAST_ELEMENT=$((${#CLEANUP[*]}-1))
         REVERSE_INDEXES=$(seq ${LAST_ELEMENT} -1 0)
index 6b85719..1d2bbc9 100755 (executable)
@@ -42,7 +42,7 @@ def parse_arguments(args):
         "%prog runs 'command' with the specified arguments, monitoring the " \
         "number of bytes read by it. 'command' is assumed to be " \
         "A program used to install the OS for a Ganeti instance. %prog " \
-        "periodically issues notifications of type 'ganeti-copy-progress'."
+        "periodically issues notifications of type 'copy-progress'."
 
     parser = OptionParser(**kw)
     parser.disable_interspersed_args()
@@ -52,19 +52,11 @@ def parse_arguments(args):
                       help="The expected number of bytes to be read, " \
                            "used to compute input progress",
                       default=None)
-    parser.add_option("-i", "--instance-name", dest="instance_name",
-                    default=None, metavar="GANETI_INSTANCE",
-                      help="The Ganeti instance name to be used in AMQP " \
-                           "notifications")
     parser.add_option("-o", "--output", dest="output", default=None,
                     metavar="FILE",
                     help="Write output notifications to this file")
 
     (opts, args) = parser.parse_args(args)
-    if opts.instance_name is None:
-        sys.stderr.write("Fatal: Option '-i' is mandatory.\n")
-        parser.print_help()
-        sys.exit(1)
 
     if opts.read_bytes is None:
         sys.stderr.write("Fatal: Option '-r' is mandatory.\n")
@@ -102,7 +94,7 @@ def report_wait_status(pid, status):
 
 
 def send_message(to, message):
-    message['timestamp'] = int(time.time())
+    message['timestamp'] = time.time()
     to.write("%s\n" % json.dumps(message))
     to.flush()
 
@@ -132,8 +124,7 @@ def main():
                          (sys.argv[0], pid, iofname))
 
         message = {}
-        message['id'] = opts.instance_name
-        message['type'] = 'ganeti-copy-progress'
+        message['type'] = 'copy-progress'
         message['total'] = opts.read_bytes
 
         while True:
@@ -147,7 +138,7 @@ def main():
                         return 1
                     else:
                         message['position'] = message['total']
-                        message['rprogress'] = float(100)
+                        message['progress'] = float(100)
                         send_message(out, message)
                         return 0
 
@@ -155,7 +146,7 @@ def main():
             for l in iof.readlines():
                 if l.startswith("rchar:"):
                     message['position'] = int(l.split(': ')[1])
-                    message['rprogress'] = float(100) if opts.read_bytes == 0 \
+                    message['progress'] = float(100) if opts.read_bytes == 0 \
                         else float("%2.2f" % (
                             message['position'] * 100.0 / message['total']))
                     send_message(out, message)
index b7c3fdb..d910520 100755 (executable)
@@ -32,16 +32,23 @@ mkfifo -m 600 "$monitor_pipe"
 add_cleanup rm -f "$monitor_pipe"
 
 if [ -n "$PROGRESS_MONITOR" ]; then
-    { cat "$monitor_pipe" | tee >( $PROGRESS_MONITOR ) ; } &
+    { sleep 1; tee >( $PROGRESS_MONITOR ) < "$monitor_pipe" ; } &
+    monitor_pid="$!"
 else
     cat "$monitor_pipe" &
+    monitor_pid="$!"
 fi
 
 # Create file descriptor to monitor_pipe
 eval "exec ${MONITOR_FD}>${monitor_pipe}"
 add_cleanup  close_fd ${MONITOR_FD}
 
-trap "cleanup $(printf "%q" "$stderr")" EXIT
+# Ignore sigpipe signals. If progress monitor is dead and snf-image-host tries
+# to output something to the opened pipe, then a sigpipe signal will be raised.
+# If I do not catch this, the program will terminate.
+trap "" SIGPIPE
+
+trap "report_and_cleanup $(printf "%q" "$stderr")" EXIT
 
 ganeti_os_main
 
@@ -101,8 +108,7 @@ case "$IMAGE_TYPE" in
         ;;
 esac
 
-monitor="./copy-monitor.py -o $(printf "%q" "$monitor_pipe") \
-               -i $(printf "%q" "$INSTANCE_NAME") -r $image_size"
+monitor="./copy-monitor.py -o $(printf "%q" "$monitor_pipe") -r $image_size"
 if [ "$BACKEND_TYPE" = "local" ]; then
     # dd the dump to its new home :-)
     # Deploying an image file on a target block device is a streaming copy
@@ -164,6 +170,10 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
+# Do not report errors after this. If the result is not "SUCCESS" then the
+# helper VM should have reported the error.
+trap cleanup EXIT
+
 # Read the first line. This will remove \r and \n chars
 result=$(sed 's|\r||g' "$result_file" | head -1)