Statistics
| Branch: | Tag: | Revision:

root / tools / ganeti-listrunner @ 72bb6b4e

History | View | Annotate | Download (18.5 kB)

1 da7e44ee Michael Hanselmann
#!/usr/bin/python
2 da7e44ee Michael Hanselmann
#
3 da7e44ee Michael Hanselmann
4 99a11adc Iustin Pop
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5 da7e44ee Michael Hanselmann
#
6 da7e44ee Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 da7e44ee Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 da7e44ee Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 da7e44ee Michael Hanselmann
# (at your option) any later version.
10 da7e44ee Michael Hanselmann
#
11 da7e44ee Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 da7e44ee Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 da7e44ee Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 da7e44ee Michael Hanselmann
# General Public License for more details.
15 da7e44ee Michael Hanselmann
#
16 da7e44ee Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 da7e44ee Michael Hanselmann
# along with this program; if not, write to the Free Software
18 da7e44ee Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 da7e44ee Michael Hanselmann
# 02110-1301, USA.
20 da7e44ee Michael Hanselmann
21 da7e44ee Michael Hanselmann
"""Run an executable on a list of hosts.
22 da7e44ee Michael Hanselmann
23 da7e44ee Michael Hanselmann
Script to serially run an executable on a list of hosts via ssh
24 da7e44ee Michael Hanselmann
with password auth as root. If the provided log dir does not yet
25 da7e44ee Michael Hanselmann
exist, it will try to create it.
26 da7e44ee Michael Hanselmann
27 da7e44ee Michael Hanselmann
Implementation:
28 da7e44ee Michael Hanselmann
 - the main process spawns up to batch_size children, which:
29 da7e44ee Michael Hanselmann
 - connects to the remote host via ssh as root
30 da7e44ee Michael Hanselmann
 - uploads the executable with a random name to /tmp via sftp
31 da7e44ee Michael Hanselmann
 - chmod 500s it
32 da7e44ee Michael Hanselmann
 - via ssh: chdirs into the upload directory and runs the script
33 da7e44ee Michael Hanselmann
 - deletes it
34 da7e44ee Michael Hanselmann
 - writes status messages and all output to one logfile per host
35 da7e44ee Michael Hanselmann
 - the main process gathers then the status of the children and
36 da7e44ee Michael Hanselmann
   reports the success/failure ratio
37 da7e44ee Michael Hanselmann
 - entire script can be aborted with Ctrl-C
38 da7e44ee Michael Hanselmann
39 da7e44ee Michael Hanselmann
Security considerations:
40 da7e44ee Michael Hanselmann
 - the root password for the remote hosts is stored in memory for the
41 da7e44ee Michael Hanselmann
   runtime of the script
42 da7e44ee Michael Hanselmann
 - the executable to be run on the remote host is handled the following way:
43 da7e44ee Michael Hanselmann
   - try to create a random directory with permissions 700 on the
44 da7e44ee Michael Hanselmann
     remote host, abort furter processing on this host if this failes
45 da7e44ee Michael Hanselmann
   - upload the executable with to a random filename in that directory
46 da7e44ee Michael Hanselmann
   - set executable permissions to 500
47 da7e44ee Michael Hanselmann
   - run the executable
48 da7e44ee Michael Hanselmann
   - delete the execuable and the directory on the remote host
49 da7e44ee Michael Hanselmann
50 da7e44ee Michael Hanselmann
"""
51 da7e44ee Michael Hanselmann
52 b459a848 Andrea Spadaccini
# pylint: disable=C0103
53 da7e44ee Michael Hanselmann
# C0103: Invalid name ganeti-listrunner
54 da7e44ee Michael Hanselmann
55 da7e44ee Michael Hanselmann
import errno
56 b74c0684 Iustin Pop
import optparse
57 da7e44ee Michael Hanselmann
import getpass
58 da7e44ee Michael Hanselmann
import logging
59 da7e44ee Michael Hanselmann
import os
60 da7e44ee Michael Hanselmann
import random
61 da7e44ee Michael Hanselmann
import select
62 da7e44ee Michael Hanselmann
import socket
63 da7e44ee Michael Hanselmann
import sys
64 da7e44ee Michael Hanselmann
import time
65 da7e44ee Michael Hanselmann
import traceback
66 da7e44ee Michael Hanselmann
67 da7e44ee Michael Hanselmann
import paramiko
68 da7e44ee Michael Hanselmann
69 da7e44ee Michael Hanselmann
70 da7e44ee Michael Hanselmann
REMOTE_PATH_BASE = "/tmp/listrunner"
71 da7e44ee Michael Hanselmann
72 b74c0684 Iustin Pop
USAGE = ("%prog -l logdir {-c command | -x /path/to/file} [-b batch_size]"
73 b74c0684 Iustin Pop
         " {-f hostfile|-h hosts} [-u username]"
74 b74c0684 Iustin Pop
         " [-p password_file | -A]")
75 b74c0684 Iustin Pop
76 da7e44ee Michael Hanselmann
77 da7e44ee Michael Hanselmann
def LogDirUseable(logdir):
78 da7e44ee Michael Hanselmann
  """Ensure log file directory is available and usable."""
79 da7e44ee Michael Hanselmann
  testfile = "%s/test-%s-%s.deleteme" % (logdir, random.random(),
80 da7e44ee Michael Hanselmann
                                         random.random())
81 da7e44ee Michael Hanselmann
  try:
82 da7e44ee Michael Hanselmann
    os.mkdir(logdir)
83 da7e44ee Michael Hanselmann
  except OSError, err:
84 da7e44ee Michael Hanselmann
    if err.errno != errno.EEXIST:
85 da7e44ee Michael Hanselmann
      raise
86 da7e44ee Michael Hanselmann
  try:
87 da7e44ee Michael Hanselmann
    logtest = open(testfile, "aw")
88 da7e44ee Michael Hanselmann
    logtest.writelines("log file writeability test\n")
89 da7e44ee Michael Hanselmann
    logtest.close()
90 da7e44ee Michael Hanselmann
    os.unlink(testfile)
91 da7e44ee Michael Hanselmann
    return True
92 da7e44ee Michael Hanselmann
  except (OSError, IOError):
93 da7e44ee Michael Hanselmann
    return False
94 da7e44ee Michael Hanselmann
95 da7e44ee Michael Hanselmann
96 da7e44ee Michael Hanselmann
def GetTimeStamp(timestamp=None):
97 da7e44ee Michael Hanselmann
  """Return ISO8601 timestamp.
98 da7e44ee Michael Hanselmann
99 da7e44ee Michael Hanselmann
  Returns ISO8601 timestamp, optionally expects a time.localtime() tuple
100 da7e44ee Michael Hanselmann
  in timestamp, but will use the current time if this argument is not
101 da7e44ee Michael Hanselmann
  supplied.
102 da7e44ee Michael Hanselmann
  """
103 da7e44ee Michael Hanselmann
  if timestamp is None:
104 da7e44ee Michael Hanselmann
    timestamp = time.localtime()
105 da7e44ee Michael Hanselmann
106 da7e44ee Michael Hanselmann
  isotime = time.strftime("%Y-%m-%dT%H:%M:%S", timestamp)
107 da7e44ee Michael Hanselmann
  return isotime
108 da7e44ee Michael Hanselmann
109 da7e44ee Michael Hanselmann
110 da7e44ee Michael Hanselmann
def PingByTcp(target, port, timeout=10, live_port_needed=False, source=None):
111 da7e44ee Michael Hanselmann
  """Simple ping implementation using TCP connect(2).
112 da7e44ee Michael Hanselmann
113 da7e44ee Michael Hanselmann
  Try to do a TCP connect(2) from an optional source IP to the
114 da7e44ee Michael Hanselmann
  specified target IP and the specified target port. If the optional
115 da7e44ee Michael Hanselmann
  parameter live_port_needed is set to true, requires the remote end
116 da7e44ee Michael Hanselmann
  to accept the connection. The timeout is specified in seconds and
117 da7e44ee Michael Hanselmann
  defaults to 10 seconds. If the source optional argument is not
118 da7e44ee Michael Hanselmann
  passed, the source address selection is left to the kernel,
119 da7e44ee Michael Hanselmann
  otherwise we try to connect using the passed address (failures to
120 da7e44ee Michael Hanselmann
  bind other than EADDRNOTAVAIL will be ignored).
121 da7e44ee Michael Hanselmann
122 da7e44ee Michael Hanselmann
  """
123 da7e44ee Michael Hanselmann
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
124 da7e44ee Michael Hanselmann
125 da7e44ee Michael Hanselmann
  success = False
126 da7e44ee Michael Hanselmann
127 da7e44ee Michael Hanselmann
  if source is not None:
128 da7e44ee Michael Hanselmann
    try:
129 da7e44ee Michael Hanselmann
      sock.bind((source, 0))
130 da7e44ee Michael Hanselmann
    except socket.error, (errcode):
131 da7e44ee Michael Hanselmann
      if errcode == errno.EADDRNOTAVAIL:
132 da7e44ee Michael Hanselmann
        success = False
133 da7e44ee Michael Hanselmann
134 da7e44ee Michael Hanselmann
  sock.settimeout(timeout)
135 da7e44ee Michael Hanselmann
136 da7e44ee Michael Hanselmann
  try:
137 da7e44ee Michael Hanselmann
    sock.connect((target, port))
138 da7e44ee Michael Hanselmann
    sock.close()
139 da7e44ee Michael Hanselmann
    success = True
140 da7e44ee Michael Hanselmann
  except socket.timeout:
141 da7e44ee Michael Hanselmann
    success = False
142 da7e44ee Michael Hanselmann
  except socket.error, (errcode):
143 da7e44ee Michael Hanselmann
    success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
144 da7e44ee Michael Hanselmann
145 da7e44ee Michael Hanselmann
  return success
146 da7e44ee Michael Hanselmann
147 da7e44ee Michael Hanselmann
148 da7e44ee Michael Hanselmann
def GetHosts(hostsfile):
149 da7e44ee Michael Hanselmann
  """Return list of hosts from hostfile.
150 da7e44ee Michael Hanselmann
151 da7e44ee Michael Hanselmann
  Reads the hostslist file and returns a list of hosts.
152 da7e44ee Michael Hanselmann
  Expects the hostslist file to contain one hostname per line.
153 da7e44ee Michael Hanselmann
154 da7e44ee Michael Hanselmann
  """
155 da7e44ee Michael Hanselmann
  try:
156 da7e44ee Michael Hanselmann
    datafile = open(hostsfile, "r")
157 da7e44ee Michael Hanselmann
  except IOError, msg:
158 da7e44ee Michael Hanselmann
    print "Failed to open hosts file %s: %s" % (hostsfile, msg)
159 da7e44ee Michael Hanselmann
    sys.exit(2)
160 da7e44ee Michael Hanselmann
161 da7e44ee Michael Hanselmann
  hosts = datafile.readlines()
162 da7e44ee Michael Hanselmann
  datafile.close()
163 da7e44ee Michael Hanselmann
164 da7e44ee Michael Hanselmann
  return hosts
165 da7e44ee Michael Hanselmann
166 da7e44ee Michael Hanselmann
167 da7e44ee Michael Hanselmann
def WriteLog(message, logfile):
168 da7e44ee Michael Hanselmann
  """Writes message, terminated by newline, to logfile."""
169 da7e44ee Michael Hanselmann
  try:
170 da7e44ee Michael Hanselmann
    logfile = open(logfile, "aw")
171 da7e44ee Michael Hanselmann
  except IOError, msg:
172 da7e44ee Michael Hanselmann
    print "failed to open log file %s: %s" % (logfile, msg)
173 da7e44ee Michael Hanselmann
    print "log message was: %s" % message
174 da7e44ee Michael Hanselmann
    sys.exit(1)  # no being able to log is critical
175 da7e44ee Michael Hanselmann
  try:
176 da7e44ee Michael Hanselmann
    timestamp = GetTimeStamp()
177 da7e44ee Michael Hanselmann
    logfile.writelines("%s %s\n" % (timestamp, message))
178 da7e44ee Michael Hanselmann
    logfile.close()
179 da7e44ee Michael Hanselmann
  except IOError, msg:
180 da7e44ee Michael Hanselmann
    print "failed to write to logfile %s: %s" % (logfile, msg)
181 da7e44ee Michael Hanselmann
    print "log message was: %s" % message
182 da7e44ee Michael Hanselmann
    sys.exit(1)  # no being able to log is critical
183 da7e44ee Michael Hanselmann
184 da7e44ee Michael Hanselmann
185 da7e44ee Michael Hanselmann
def GetAgentKeys():
186 da7e44ee Michael Hanselmann
  """Tries to get a list of ssh keys from an agent."""
187 da7e44ee Michael Hanselmann
  try:
188 da7e44ee Michael Hanselmann
    agent = paramiko.Agent()
189 da7e44ee Michael Hanselmann
    return list(agent.get_keys())
190 da7e44ee Michael Hanselmann
  except paramiko.SSHException:
191 da7e44ee Michael Hanselmann
    return []
192 da7e44ee Michael Hanselmann
193 da7e44ee Michael Hanselmann
194 99a11adc Iustin Pop
def SetupSshConnection(host, username, password, use_agent, logfile):
195 da7e44ee Michael Hanselmann
  """Setup the ssh connection used for all later steps.
196 da7e44ee Michael Hanselmann
197 da7e44ee Michael Hanselmann
  This function sets up the ssh connection that will be used both
198 da7e44ee Michael Hanselmann
  for upload and remote command execution.
199 da7e44ee Michael Hanselmann
200 da7e44ee Michael Hanselmann
  On success, it will return paramiko.Transport object with an
201 da7e44ee Michael Hanselmann
  already logged in session. On failure, False will be returned.
202 da7e44ee Michael Hanselmann
203 da7e44ee Michael Hanselmann
  """
204 da7e44ee Michael Hanselmann
  # check if target is willing to talk to us at all
205 da7e44ee Michael Hanselmann
  if not PingByTcp(host, 22, live_port_needed=True):
206 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_NOT_REACHABLE", logfile)
207 da7e44ee Michael Hanselmann
    print "  - ERROR: host not reachable on 22/tcp"
208 da7e44ee Michael Hanselmann
    return False
209 da7e44ee Michael Hanselmann
210 99a11adc Iustin Pop
  if use_agent:
211 99a11adc Iustin Pop
    keys = GetAgentKeys()
212 99a11adc Iustin Pop
  else:
213 99a11adc Iustin Pop
    keys = []
214 da7e44ee Michael Hanselmann
  all_kwargs = [{"pkey": k} for k in keys]
215 da7e44ee Michael Hanselmann
  all_desc = ["key %d" % d for d in range(len(keys))]
216 da7e44ee Michael Hanselmann
  if password is not None:
217 da7e44ee Michael Hanselmann
    all_kwargs.append({"password": password})
218 da7e44ee Michael Hanselmann
    all_desc.append("password")
219 da7e44ee Michael Hanselmann
220 da7e44ee Michael Hanselmann
  # deal with logging out of paramiko.transport
221 da7e44ee Michael Hanselmann
  handler = None
222 da7e44ee Michael Hanselmann
223 da7e44ee Michael Hanselmann
  for desc, kwargs in zip(all_desc, all_kwargs):
224 da7e44ee Michael Hanselmann
    try:
225 da7e44ee Michael Hanselmann
      transport = paramiko.Transport((host, 22))
226 da7e44ee Michael Hanselmann
227 da7e44ee Michael Hanselmann
      # only try to setup the logging handler once
228 da7e44ee Michael Hanselmann
      if not handler:
229 da7e44ee Michael Hanselmann
        handler = logging.StreamHandler()
230 da7e44ee Michael Hanselmann
        handler.setLevel(logging.ERROR)
231 da7e44ee Michael Hanselmann
        log = logging.getLogger(transport.get_log_channel())
232 da7e44ee Michael Hanselmann
        log.addHandler(handler)
233 da7e44ee Michael Hanselmann
234 b459a848 Andrea Spadaccini
      transport.connect(username=username, **kwargs) # pylint: disable=W0142
235 da7e44ee Michael Hanselmann
      WriteLog("ssh connection established using %s" % desc, logfile)
236 da7e44ee Michael Hanselmann
      # strange ... when establishing the session and the immediately
237 da7e44ee Michael Hanselmann
      # setting up the channels for sftp & shell from that, it sometimes
238 da7e44ee Michael Hanselmann
      # fails, but waiting 1 second after session setup makes it always work
239 da7e44ee Michael Hanselmann
      # time.sleep(1)
240 da7e44ee Michael Hanselmann
      # FIXME apparently needfull to give sshd some time
241 da7e44ee Michael Hanselmann
      return transport
242 da7e44ee Michael Hanselmann
    except (socket.gaierror, socket.error, paramiko.SSHException):
243 da7e44ee Michael Hanselmann
      continue
244 da7e44ee Michael Hanselmann
245 da7e44ee Michael Hanselmann
  methods = ", ".join(all_desc)
246 da7e44ee Michael Hanselmann
  WriteLog("ERROR: FAILURE_CONNECTION_SETUP (tried %s) " % methods, logfile)
247 da7e44ee Michael Hanselmann
  WriteLog("aborted", logfile)
248 da7e44ee Michael Hanselmann
  print "  - ERROR: connection setup failed (tried %s)" % methods
249 da7e44ee Michael Hanselmann
250 da7e44ee Michael Hanselmann
  return False
251 da7e44ee Michael Hanselmann
252 da7e44ee Michael Hanselmann
253 da7e44ee Michael Hanselmann
def UploadFiles(connection, executable, filelist, logfile):
254 da7e44ee Michael Hanselmann
  """Uploads the specified files via sftp.
255 da7e44ee Michael Hanselmann
256 da7e44ee Michael Hanselmann
  Uploads the specified files to a random, freshly created directory with
257 da7e44ee Michael Hanselmann
  a temporary name under /tmp. All uploaded files are chmod 0400 after upload
258 da7e44ee Michael Hanselmann
  with the exception of executable, with is chmod 500.
259 da7e44ee Michael Hanselmann
260 da7e44ee Michael Hanselmann
  Upon success, returns the absolute path to the remote upload directory,
261 da7e44ee Michael Hanselmann
  but will return False upon failure.
262 da7e44ee Michael Hanselmann
  """
263 da7e44ee Michael Hanselmann
  remote_dir = "%s.%s-%s" % (REMOTE_PATH_BASE,
264 da7e44ee Michael Hanselmann
                             random.random(), random.random())
265 da7e44ee Michael Hanselmann
266 da7e44ee Michael Hanselmann
  try:
267 da7e44ee Michael Hanselmann
    sftp = paramiko.SFTPClient.from_transport(connection)
268 da7e44ee Michael Hanselmann
    sftp.mkdir(remote_dir, mode=0700)
269 da7e44ee Michael Hanselmann
    for item in filelist:
270 2c094917 Michael Hanselmann
      remote_file = "%s/%s" % (remote_dir, os.path.basename(item))
271 da7e44ee Michael Hanselmann
      WriteLog("uploading %s to remote %s" % (item, remote_file), logfile)
272 da7e44ee Michael Hanselmann
      sftp.put(item, remote_file)
273 da7e44ee Michael Hanselmann
      if item == executable:
274 da7e44ee Michael Hanselmann
        sftp.chmod(remote_file, 0500)
275 da7e44ee Michael Hanselmann
      else:
276 da7e44ee Michael Hanselmann
        sftp.chmod(remote_file, 0400)
277 da7e44ee Michael Hanselmann
    sftp.close()
278 da7e44ee Michael Hanselmann
  except IOError, err:
279 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_UPLOAD: %s" % err, logfile)
280 da7e44ee Michael Hanselmann
    return False
281 da7e44ee Michael Hanselmann
282 da7e44ee Michael Hanselmann
  return remote_dir
283 da7e44ee Michael Hanselmann
284 da7e44ee Michael Hanselmann
285 da7e44ee Michael Hanselmann
def CleanupRemoteDir(connection, upload_dir, filelist, logfile):
286 da7e44ee Michael Hanselmann
  """Cleanes out and removes the remote work directory."""
287 da7e44ee Michael Hanselmann
  try:
288 da7e44ee Michael Hanselmann
    sftp = paramiko.SFTPClient.from_transport(connection)
289 da7e44ee Michael Hanselmann
    for item in filelist:
290 2c094917 Michael Hanselmann
      fullpath = "%s/%s" % (upload_dir, os.path.basename(item))
291 da7e44ee Michael Hanselmann
      WriteLog("removing remote %s" % fullpath, logfile)
292 da7e44ee Michael Hanselmann
      sftp.remove(fullpath)
293 da7e44ee Michael Hanselmann
    sftp.rmdir(upload_dir)
294 da7e44ee Michael Hanselmann
    sftp.close()
295 da7e44ee Michael Hanselmann
  except IOError, err:
296 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_CLEANUP: %s" % err, logfile)
297 da7e44ee Michael Hanselmann
    return False
298 da7e44ee Michael Hanselmann
299 da7e44ee Michael Hanselmann
  return True
300 da7e44ee Michael Hanselmann
301 da7e44ee Michael Hanselmann
302 da7e44ee Michael Hanselmann
def RunRemoteCommand(connection, command, logfile):
303 da7e44ee Michael Hanselmann
  """Execute the command via ssh on the remote host."""
304 da7e44ee Michael Hanselmann
  session = connection.open_session()
305 da7e44ee Michael Hanselmann
  session.setblocking(0)
306 da7e44ee Michael Hanselmann
307 da7e44ee Michael Hanselmann
  # the following dance is needed because paramiko changed APIs:
308 da7e44ee Michael Hanselmann
  # from returning True/False for success to always returning None
309 da7e44ee Michael Hanselmann
  # and throwing an exception in case of problems.
310 da7e44ee Michael Hanselmann
  # And I want to support both the old and the new API.
311 da7e44ee Michael Hanselmann
  result = True  # being optimistic here, I know
312 da7e44ee Michael Hanselmann
  message = None
313 da7e44ee Michael Hanselmann
  try:
314 da7e44ee Michael Hanselmann
    if session.exec_command("%s 2>&1" % command) is False:
315 da7e44ee Michael Hanselmann
      result = False
316 da7e44ee Michael Hanselmann
  except paramiko.SSHException, message:
317 da7e44ee Michael Hanselmann
    result = False
318 da7e44ee Michael Hanselmann
319 da7e44ee Michael Hanselmann
  if not result:
320 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_COMMAND_EXECUTION: %s" % message, logfile)
321 da7e44ee Michael Hanselmann
    return False
322 da7e44ee Michael Hanselmann
323 da7e44ee Michael Hanselmann
   ### Read when data is available
324 da7e44ee Michael Hanselmann
  output = ""
325 da7e44ee Michael Hanselmann
  while select.select([session], [], []):
326 6bc1c168 Michael Hanselmann
    try:
327 6bc1c168 Michael Hanselmann
      data = session.recv(1024)
328 6bc1c168 Michael Hanselmann
    except socket.timeout, err:
329 6bc1c168 Michael Hanselmann
      data = None
330 6bc1c168 Michael Hanselmann
      WriteLog("FAILED: socket.timeout %s" % err, logfile)
331 6bc1c168 Michael Hanselmann
    except socket.error, err:
332 6bc1c168 Michael Hanselmann
      data = None
333 6bc1c168 Michael Hanselmann
      WriteLog("FAILED: socket.error %s" % err, logfile)
334 da7e44ee Michael Hanselmann
    if not data:
335 da7e44ee Michael Hanselmann
      break
336 da7e44ee Michael Hanselmann
    output += data
337 da7e44ee Michael Hanselmann
    select.select([], [], [], .1)
338 da7e44ee Michael Hanselmann
339 da7e44ee Michael Hanselmann
  WriteLog("SUCCESS: command output follows", logfile)
340 2c094917 Michael Hanselmann
  for line in output.splitlines():
341 e687ec01 Michael Hanselmann
    WriteLog("output = %s" % line, logfile)
342 da7e44ee Michael Hanselmann
  WriteLog("command execution completed", logfile)
343 da7e44ee Michael Hanselmann
  session.close()
344 da7e44ee Michael Hanselmann
345 da7e44ee Michael Hanselmann
  return True
346 da7e44ee Michael Hanselmann
347 da7e44ee Michael Hanselmann
348 99a11adc Iustin Pop
def HostWorker(logdir, username, password, use_agent, hostname,
349 6eedd356 Michael Hanselmann
               executable, exec_args, command, filelist):
350 da7e44ee Michael Hanselmann
  """Per-host worker.
351 da7e44ee Michael Hanselmann
352 da7e44ee Michael Hanselmann
  This function does not return - it's the main code of the childs,
353 da7e44ee Michael Hanselmann
  which exit at the end of this function. The exit code 0 or 1 will be
354 da7e44ee Michael Hanselmann
  interpreted by the parent.
355 da7e44ee Michael Hanselmann
356 2a0f9372 Michael Hanselmann
  @param logdir: the directory where the logfiles must be created
357 2a0f9372 Michael Hanselmann
  @param username: SSH username
358 2a0f9372 Michael Hanselmann
  @param password: SSH password
359 99a11adc Iustin Pop
  @param use_agent: whether we should instead use an agent
360 2a0f9372 Michael Hanselmann
  @param hostname: the hostname to connect to
361 2a0f9372 Michael Hanselmann
  @param executable: the executable to upload, if not None
362 6eedd356 Michael Hanselmann
  @param exec_args: Additional arguments for executable
363 2a0f9372 Michael Hanselmann
  @param command: the command to run
364 2a0f9372 Michael Hanselmann
  @param filelist: auxiliary files to upload
365 da7e44ee Michael Hanselmann
366 da7e44ee Michael Hanselmann
  """
367 da7e44ee Michael Hanselmann
  # in the child/worker process
368 da7e44ee Michael Hanselmann
  logfile = "%s/%s.log" % (logdir, hostname)
369 da7e44ee Michael Hanselmann
  print "%s - starting" % hostname
370 da7e44ee Michael Hanselmann
  result = 0  # optimism, I know
371 da7e44ee Michael Hanselmann
  try:
372 da7e44ee Michael Hanselmann
    connection = SetupSshConnection(hostname, username,
373 99a11adc Iustin Pop
                                    password, use_agent, logfile)
374 da7e44ee Michael Hanselmann
    if connection is not False:
375 da7e44ee Michael Hanselmann
      if executable is not None:
376 da7e44ee Michael Hanselmann
        print "  %s: uploading files" % hostname
377 da7e44ee Michael Hanselmann
        upload_dir = UploadFiles(connection, executable,
378 da7e44ee Michael Hanselmann
                                 filelist, logfile)
379 0c009cc5 Michael Hanselmann
        command = ("cd %s && ./%s" %
380 0c009cc5 Michael Hanselmann
                   (upload_dir, os.path.basename(executable)))
381 0c009cc5 Michael Hanselmann
        if exec_args:
382 0c009cc5 Michael Hanselmann
          command += " %s" % exec_args
383 da7e44ee Michael Hanselmann
      print "  %s: executing remote command" % hostname
384 da7e44ee Michael Hanselmann
      cmd_result = RunRemoteCommand(connection, command, logfile)
385 da7e44ee Michael Hanselmann
      if cmd_result is True:
386 da7e44ee Michael Hanselmann
        print "  %s: remote command execution successful" % hostname
387 da7e44ee Michael Hanselmann
      else:
388 da7e44ee Michael Hanselmann
        print ("  %s: remote command execution failed,"
389 da7e44ee Michael Hanselmann
               " check log for details" % hostname)
390 da7e44ee Michael Hanselmann
        result = 1
391 da7e44ee Michael Hanselmann
      if executable is not None:
392 da7e44ee Michael Hanselmann
        print "  %s: cleaning up remote work dir" % hostname
393 da7e44ee Michael Hanselmann
        cln_result = CleanupRemoteDir(connection, upload_dir,
394 da7e44ee Michael Hanselmann
                                      filelist, logfile)
395 da7e44ee Michael Hanselmann
        if cln_result is False:
396 da7e44ee Michael Hanselmann
          print ("  %s: remote work dir cleanup failed, check"
397 da7e44ee Michael Hanselmann
                 " log for details" % hostname)
398 da7e44ee Michael Hanselmann
          result = 1
399 da7e44ee Michael Hanselmann
      connection.close()
400 da7e44ee Michael Hanselmann
    else:
401 da7e44ee Michael Hanselmann
      print "  %s: connection setup failed, skipping" % hostname
402 da7e44ee Michael Hanselmann
      result = 1
403 da7e44ee Michael Hanselmann
  except KeyboardInterrupt:
404 da7e44ee Michael Hanselmann
    print "  %s: received KeyboardInterrupt, aborting" % hostname
405 da7e44ee Michael Hanselmann
    WriteLog("ERROR: ABORT_KEYBOARD_INTERRUPT", logfile)
406 da7e44ee Michael Hanselmann
    result = 1
407 da7e44ee Michael Hanselmann
  except Exception, err:
408 da7e44ee Michael Hanselmann
    result = 1
409 da7e44ee Michael Hanselmann
    trace = traceback.format_exc()
410 da7e44ee Michael Hanselmann
    msg = "ERROR: UNHANDLED_EXECPTION_ERROR: %s\nTrace: %s" % (err, trace)
411 da7e44ee Michael Hanselmann
    WriteLog(msg, logfile)
412 da7e44ee Michael Hanselmann
    print "  %s: %s" % (hostname, msg)
413 da7e44ee Michael Hanselmann
  # and exit with exit code 0 or 1, so the parent can compute statistics
414 da7e44ee Michael Hanselmann
  sys.exit(result)
415 da7e44ee Michael Hanselmann
416 da7e44ee Michael Hanselmann
417 99a11adc Iustin Pop
def LaunchWorker(child_pids, logdir, username, password, use_agent, hostname,
418 6eedd356 Michael Hanselmann
                 executable, exec_args, command, filelist):
419 da7e44ee Michael Hanselmann
  """Launch the per-host worker.
420 da7e44ee Michael Hanselmann
421 da7e44ee Michael Hanselmann
  Arguments are the same as for HostWorker, except for child_pids,
422 da7e44ee Michael Hanselmann
  which is a dictionary holding the pid-to-hostname mapping.
423 da7e44ee Michael Hanselmann
424 da7e44ee Michael Hanselmann
  """
425 da7e44ee Michael Hanselmann
  hostname = hostname.rstrip("\n")
426 da7e44ee Michael Hanselmann
  pid = os.fork()
427 da7e44ee Michael Hanselmann
  if pid > 0:
428 da7e44ee Michael Hanselmann
    # controller just record the pids
429 da7e44ee Michael Hanselmann
    child_pids[pid] = hostname
430 da7e44ee Michael Hanselmann
  else:
431 99a11adc Iustin Pop
    HostWorker(logdir, username, password, use_agent, hostname,
432 6eedd356 Michael Hanselmann
               executable, exec_args, command, filelist)
433 da7e44ee Michael Hanselmann
434 da7e44ee Michael Hanselmann
435 b74c0684 Iustin Pop
def ParseOptions():
436 b74c0684 Iustin Pop
  """Parses the command line options.
437 b74c0684 Iustin Pop
438 b74c0684 Iustin Pop
  In case of command line errors, it will show the usage and exit the
439 b74c0684 Iustin Pop
  program.
440 b74c0684 Iustin Pop
441 b74c0684 Iustin Pop
  @return: the options in a tuple
442 b74c0684 Iustin Pop
443 b74c0684 Iustin Pop
  """
444 b74c0684 Iustin Pop
  # resolve because original used -h for hostfile, which conflicts
445 b74c0684 Iustin Pop
  # with -h for help
446 b74c0684 Iustin Pop
  parser = optparse.OptionParser(usage="\n%s" % USAGE,
447 b74c0684 Iustin Pop
                                 conflict_handler="resolve")
448 b74c0684 Iustin Pop
449 b74c0684 Iustin Pop
  parser.add_option("-l", dest="logdir", default=None,
450 b74c0684 Iustin Pop
                    help="directory to write logfiles to")
451 b74c0684 Iustin Pop
  parser.add_option("-x", dest="executable", default=None,
452 b74c0684 Iustin Pop
                    help="executable to run on remote host(s)",)
453 b74c0684 Iustin Pop
  parser.add_option("-f", dest="hostfile", default=None,
454 b74c0684 Iustin Pop
                    help="hostlist file (one host per line)")
455 b74c0684 Iustin Pop
  parser.add_option("-h", dest="hostlist", default=None, metavar="HOSTS",
456 b74c0684 Iustin Pop
                    help="comma-separated list of hosts or single hostname",)
457 b74c0684 Iustin Pop
  parser.add_option("-a", dest="auxfiles", action="append", default=[],
458 b74c0684 Iustin Pop
                    help="optional auxiliary file to upload"
459 6eedd356 Michael Hanselmann
                    " (can be given multiple times)",
460 b74c0684 Iustin Pop
                    metavar="FILE")
461 b74c0684 Iustin Pop
  parser.add_option("-c", dest="command", default=None,
462 b74c0684 Iustin Pop
                    help="shell command to run on remote host(s)")
463 b74c0684 Iustin Pop
  parser.add_option("-b", dest="batch_size", default=15, type="int",
464 b74c0684 Iustin Pop
                    help="batch-size, how many hosts to process"
465 b74c0684 Iustin Pop
                    " in parallel [15]")
466 b74c0684 Iustin Pop
  parser.add_option("-u", dest="username", default="root",
467 b74c0684 Iustin Pop
                    help="username used to connect [root]")
468 b74c0684 Iustin Pop
  parser.add_option("-p", dest="password", default=None,
469 b74c0684 Iustin Pop
                    help="password used to authenticate (when not"
470 b74c0684 Iustin Pop
                    " using an agent)")
471 b74c0684 Iustin Pop
  parser.add_option("-A", dest="use_agent", default=False, action="store_true",
472 b74c0684 Iustin Pop
                    help="instead of password, use keys from an SSH agent")
473 6eedd356 Michael Hanselmann
  parser.add_option("--args", dest="exec_args", default=None,
474 6eedd356 Michael Hanselmann
                    help="Arguments to be passed to executable (-x)")
475 b74c0684 Iustin Pop
476 b74c0684 Iustin Pop
  opts, args = parser.parse_args()
477 b74c0684 Iustin Pop
478 b74c0684 Iustin Pop
  if opts.executable and opts.command:
479 b74c0684 Iustin Pop
    parser.error("Options -x and -c conflict with each other")
480 b74c0684 Iustin Pop
  if not (opts.executable or opts.command):
481 b74c0684 Iustin Pop
    parser.error("One of -x and -c must be given")
482 6eedd356 Michael Hanselmann
  if opts.command and opts.exec_args:
483 6eedd356 Michael Hanselmann
    parser.error("Can't specify arguments when using custom command")
484 b74c0684 Iustin Pop
  if not opts.logdir:
485 b74c0684 Iustin Pop
    parser.error("Option -l is required")
486 b74c0684 Iustin Pop
  if opts.hostfile and opts.hostlist:
487 b74c0684 Iustin Pop
    parser.error("Options -f and -h conflict with each other")
488 b74c0684 Iustin Pop
  if not (opts.hostfile or opts.hostlist):
489 b74c0684 Iustin Pop
    parser.error("One of -f or -h must be given")
490 b74c0684 Iustin Pop
  if args:
491 b74c0684 Iustin Pop
    parser.error("This program doesn't take any arguments, passed in: %s" %
492 b74c0684 Iustin Pop
                 ", ".join(args))
493 b74c0684 Iustin Pop
494 6eedd356 Michael Hanselmann
  return (opts.logdir, opts.executable, opts.exec_args,
495 6eedd356 Michael Hanselmann
          opts.hostfile, opts.hostlist,
496 b74c0684 Iustin Pop
          opts.command, opts.use_agent, opts.auxfiles, opts.username,
497 b74c0684 Iustin Pop
          opts.password, opts.batch_size)
498 b74c0684 Iustin Pop
499 b74c0684 Iustin Pop
500 da7e44ee Michael Hanselmann
def main():
501 da7e44ee Michael Hanselmann
  """main."""
502 6eedd356 Michael Hanselmann
  (logdir, executable, exec_args, hostfile, hostlist,
503 b74c0684 Iustin Pop
   command, use_agent, auxfiles, username,
504 b74c0684 Iustin Pop
   password, batch_size) = ParseOptions()
505 da7e44ee Michael Hanselmann
506 da7e44ee Michael Hanselmann
  ### Unbuffered sys.stdout
507 da7e44ee Michael Hanselmann
  sys.stdout = os.fdopen(1, "w", 0)
508 da7e44ee Michael Hanselmann
509 da7e44ee Michael Hanselmann
  if LogDirUseable(logdir) is False:
510 da7e44ee Michael Hanselmann
    print "ERROR: cannot create logfiles in dir %s, aborting" % logdir
511 da7e44ee Michael Hanselmann
    sys.exit(1)
512 da7e44ee Michael Hanselmann
513 da7e44ee Michael Hanselmann
  if use_agent:
514 99a11adc Iustin Pop
    pass
515 da7e44ee Michael Hanselmann
  elif password:
516 da7e44ee Michael Hanselmann
    try:
517 da7e44ee Michael Hanselmann
      fh = file(password)
518 da7e44ee Michael Hanselmann
      pwvalue = fh.readline().strip()
519 da7e44ee Michael Hanselmann
      fh.close()
520 da7e44ee Michael Hanselmann
    except IOError, e:
521 da7e44ee Michael Hanselmann
      print "error: can not read in from password file %s: %s" % (password, e)
522 da7e44ee Michael Hanselmann
      sys.exit(1)
523 da7e44ee Michael Hanselmann
    password = pwvalue
524 da7e44ee Michael Hanselmann
  else:
525 da7e44ee Michael Hanselmann
    password = getpass.getpass("%s's password for all nodes: " % username)
526 da7e44ee Michael Hanselmann
527 da7e44ee Michael Hanselmann
  if hostfile:
528 da7e44ee Michael Hanselmann
    hosts = GetHosts(hostfile)
529 da7e44ee Michael Hanselmann
  else:
530 da7e44ee Michael Hanselmann
    if "," in hostlist:
531 da7e44ee Michael Hanselmann
      hostlist = hostlist.rstrip(",")  # commandline robustness
532 da7e44ee Michael Hanselmann
      hosts = hostlist.split(",")
533 da7e44ee Michael Hanselmann
    else:
534 da7e44ee Michael Hanselmann
      hosts = [hostlist]
535 da7e44ee Michael Hanselmann
536 da7e44ee Michael Hanselmann
  successes = failures = 0
537 da7e44ee Michael Hanselmann
538 da7e44ee Michael Hanselmann
  filelist = auxfiles[:]
539 da7e44ee Michael Hanselmann
  filelist.append(executable)
540 da7e44ee Michael Hanselmann
541 da7e44ee Michael Hanselmann
  # initial batch
542 da7e44ee Michael Hanselmann
  batch = hosts[:batch_size]
543 da7e44ee Michael Hanselmann
  hosts = hosts[batch_size:]
544 da7e44ee Michael Hanselmann
  child_pids = {}
545 da7e44ee Michael Hanselmann
  for hostname in batch:
546 99a11adc Iustin Pop
    LaunchWorker(child_pids, logdir, username, password, use_agent, hostname,
547 6eedd356 Michael Hanselmann
                 executable, exec_args, command, filelist)
548 da7e44ee Michael Hanselmann
549 da7e44ee Michael Hanselmann
  while child_pids:
550 da7e44ee Michael Hanselmann
    pid, status = os.wait()
551 da7e44ee Michael Hanselmann
    hostname = child_pids.pop(pid, "<unknown host>")
552 da7e44ee Michael Hanselmann
    print "  %s: done (in parent)" % hostname
553 da7e44ee Michael Hanselmann
    if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0:
554 da7e44ee Michael Hanselmann
      successes += 1
555 da7e44ee Michael Hanselmann
    else:
556 da7e44ee Michael Hanselmann
      failures += 1
557 da7e44ee Michael Hanselmann
    if hosts:
558 99a11adc Iustin Pop
      LaunchWorker(child_pids, logdir, username, password, use_agent,
559 6eedd356 Michael Hanselmann
                   hosts.pop(0), executable, exec_args, command, filelist)
560 da7e44ee Michael Hanselmann
561 da7e44ee Michael Hanselmann
  print
562 da7e44ee Michael Hanselmann
  print "All done, %s successful and %s failed hosts" % (successes, failures)
563 da7e44ee Michael Hanselmann
564 da7e44ee Michael Hanselmann
  sys.exit(0)
565 da7e44ee Michael Hanselmann
566 da7e44ee Michael Hanselmann
567 da7e44ee Michael Hanselmann
if __name__ == "__main__":
568 da7e44ee Michael Hanselmann
  try:
569 da7e44ee Michael Hanselmann
    main()
570 da7e44ee Michael Hanselmann
  except KeyboardInterrupt:
571 da7e44ee Michael Hanselmann
    print "Received KeyboardInterrupt, aborting"
572 da7e44ee Michael Hanselmann
    sys.exit(1)