Statistics
| Branch: | Tag: | Revision:

root / tools / ganeti-listrunner @ e1ab08db

History | View | Annotate | Download (16.9 kB)

1 da7e44ee Michael Hanselmann
#!/usr/bin/python
2 da7e44ee Michael Hanselmann
#
3 da7e44ee Michael Hanselmann
4 da7e44ee Michael Hanselmann
# Copyright (C) 2006, 2007, 2010 Google Inc.
5 da7e44ee Michael Hanselmann
#
6 da7e44ee Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 da7e44ee Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 da7e44ee Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 da7e44ee Michael Hanselmann
# (at your option) any later version.
10 da7e44ee Michael Hanselmann
#
11 da7e44ee Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 da7e44ee Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 da7e44ee Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 da7e44ee Michael Hanselmann
# General Public License for more details.
15 da7e44ee Michael Hanselmann
#
16 da7e44ee Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 da7e44ee Michael Hanselmann
# along with this program; if not, write to the Free Software
18 da7e44ee Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 da7e44ee Michael Hanselmann
# 02110-1301, USA.
20 da7e44ee Michael Hanselmann
21 da7e44ee Michael Hanselmann
"""Run an executable on a list of hosts.
22 da7e44ee Michael Hanselmann
23 da7e44ee Michael Hanselmann
Script to serially run an executable on a list of hosts via ssh
24 da7e44ee Michael Hanselmann
with password auth as root. If the provided log dir does not yet
25 da7e44ee Michael Hanselmann
exist, it will try to create it.
26 da7e44ee Michael Hanselmann
27 da7e44ee Michael Hanselmann
Implementation:
28 da7e44ee Michael Hanselmann
 - the main process spawns up to batch_size children, which:
29 da7e44ee Michael Hanselmann
 - connects to the remote host via ssh as root
30 da7e44ee Michael Hanselmann
 - uploads the executable with a random name to /tmp via sftp
31 da7e44ee Michael Hanselmann
 - chmod 500s it
32 da7e44ee Michael Hanselmann
 - via ssh: chdirs into the upload directory and runs the script
33 da7e44ee Michael Hanselmann
 - deletes it
34 da7e44ee Michael Hanselmann
 - writes status messages and all output to one logfile per host
35 da7e44ee Michael Hanselmann
 - the main process gathers then the status of the children and
36 da7e44ee Michael Hanselmann
   reports the success/failure ratio
37 da7e44ee Michael Hanselmann
 - entire script can be aborted with Ctrl-C
38 da7e44ee Michael Hanselmann
39 da7e44ee Michael Hanselmann
Security considerations:
40 da7e44ee Michael Hanselmann
 - the root password for the remote hosts is stored in memory for the
41 da7e44ee Michael Hanselmann
   runtime of the script
42 da7e44ee Michael Hanselmann
 - the executable to be run on the remote host is handled the following way:
43 da7e44ee Michael Hanselmann
   - try to create a random directory with permissions 700 on the
44 da7e44ee Michael Hanselmann
     remote host, abort furter processing on this host if this failes
45 da7e44ee Michael Hanselmann
   - upload the executable with to a random filename in that directory
46 da7e44ee Michael Hanselmann
   - set executable permissions to 500
47 da7e44ee Michael Hanselmann
   - run the executable
48 da7e44ee Michael Hanselmann
   - delete the execuable and the directory on the remote host
49 da7e44ee Michael Hanselmann
50 da7e44ee Michael Hanselmann
"""
51 da7e44ee Michael Hanselmann
52 da7e44ee Michael Hanselmann
# pylint: disable-msg=C0103
53 da7e44ee Michael Hanselmann
# C0103: Invalid name ganeti-listrunner
54 da7e44ee Michael Hanselmann
55 da7e44ee Michael Hanselmann
import errno
56 da7e44ee Michael Hanselmann
import getopt
57 da7e44ee Michael Hanselmann
import getpass
58 da7e44ee Michael Hanselmann
import logging
59 da7e44ee Michael Hanselmann
import os
60 da7e44ee Michael Hanselmann
import random
61 da7e44ee Michael Hanselmann
import select
62 da7e44ee Michael Hanselmann
import socket
63 da7e44ee Michael Hanselmann
import sys
64 da7e44ee Michael Hanselmann
import time
65 da7e44ee Michael Hanselmann
import traceback
66 da7e44ee Michael Hanselmann
67 da7e44ee Michael Hanselmann
import paramiko
68 da7e44ee Michael Hanselmann
69 da7e44ee Michael Hanselmann
70 da7e44ee Michael Hanselmann
REMOTE_PATH_BASE = "/tmp/listrunner"
71 da7e44ee Michael Hanselmann
72 da7e44ee Michael Hanselmann
73 da7e44ee Michael Hanselmann
def LogDirUseable(logdir):
74 da7e44ee Michael Hanselmann
  """Ensure log file directory is available and usable."""
75 da7e44ee Michael Hanselmann
  testfile = "%s/test-%s-%s.deleteme" % (logdir, random.random(),
76 da7e44ee Michael Hanselmann
                                         random.random())
77 da7e44ee Michael Hanselmann
  try:
78 da7e44ee Michael Hanselmann
    os.mkdir(logdir)
79 da7e44ee Michael Hanselmann
  except OSError, err:
80 da7e44ee Michael Hanselmann
    if err.errno != errno.EEXIST:
81 da7e44ee Michael Hanselmann
      raise
82 da7e44ee Michael Hanselmann
  try:
83 da7e44ee Michael Hanselmann
    logtest = open(testfile, "aw")
84 da7e44ee Michael Hanselmann
    logtest.writelines("log file writeability test\n")
85 da7e44ee Michael Hanselmann
    logtest.close()
86 da7e44ee Michael Hanselmann
    os.unlink(testfile)
87 da7e44ee Michael Hanselmann
    return True
88 da7e44ee Michael Hanselmann
  except (OSError, IOError):
89 da7e44ee Michael Hanselmann
    return False
90 da7e44ee Michael Hanselmann
91 da7e44ee Michael Hanselmann
92 da7e44ee Michael Hanselmann
def ShowHelp(executable):
93 da7e44ee Michael Hanselmann
  """Print short usage information."""
94 da7e44ee Michael Hanselmann
  print ("usage: %s -l logdir [-c|-x] value [-b batch_size]"
95 da7e44ee Michael Hanselmann
         " [-f hostfile|-h hosts] [-u username]"
96 da7e44ee Michael Hanselmann
         " [-p password_file]" % executable)
97 da7e44ee Michael Hanselmann
  print """        -l logdir to write logfiles to
98 da7e44ee Michael Hanselmann
        -x executable to run on remote host(s)
99 da7e44ee Michael Hanselmann
        -c shell command to run on remote host(s)
100 da7e44ee Michael Hanselmann
        -f hostlist file (one host per line)
101 da7e44ee Michael Hanselmann
        -a optional auxiliary file to upload (can be given multiple times)
102 da7e44ee Michael Hanselmann
        -b batch-size, how many hosts to process in parallel [15]
103 da7e44ee Michael Hanselmann
        -h comma-separated list of hosts or single hostname
104 da7e44ee Michael Hanselmann
        -u username used to connect [root]
105 da7e44ee Michael Hanselmann
        -p password used to authenticate"""
106 da7e44ee Michael Hanselmann
107 da7e44ee Michael Hanselmann
108 da7e44ee Michael Hanselmann
def GetTimeStamp(timestamp=None):
109 da7e44ee Michael Hanselmann
  """Return ISO8601 timestamp.
110 da7e44ee Michael Hanselmann
111 da7e44ee Michael Hanselmann
  Returns ISO8601 timestamp, optionally expects a time.localtime() tuple
112 da7e44ee Michael Hanselmann
  in timestamp, but will use the current time if this argument is not
113 da7e44ee Michael Hanselmann
  supplied.
114 da7e44ee Michael Hanselmann
  """
115 da7e44ee Michael Hanselmann
  if timestamp is None:
116 da7e44ee Michael Hanselmann
    timestamp = time.localtime()
117 da7e44ee Michael Hanselmann
118 da7e44ee Michael Hanselmann
  isotime = time.strftime("%Y-%m-%dT%H:%M:%S", timestamp)
119 da7e44ee Michael Hanselmann
  return isotime
120 da7e44ee Michael Hanselmann
121 da7e44ee Michael Hanselmann
122 da7e44ee Michael Hanselmann
def PingByTcp(target, port, timeout=10, live_port_needed=False, source=None):
123 da7e44ee Michael Hanselmann
  """Simple ping implementation using TCP connect(2).
124 da7e44ee Michael Hanselmann
125 da7e44ee Michael Hanselmann
  Try to do a TCP connect(2) from an optional source IP to the
126 da7e44ee Michael Hanselmann
  specified target IP and the specified target port. If the optional
127 da7e44ee Michael Hanselmann
  parameter live_port_needed is set to true, requires the remote end
128 da7e44ee Michael Hanselmann
  to accept the connection. The timeout is specified in seconds and
129 da7e44ee Michael Hanselmann
  defaults to 10 seconds. If the source optional argument is not
130 da7e44ee Michael Hanselmann
  passed, the source address selection is left to the kernel,
131 da7e44ee Michael Hanselmann
  otherwise we try to connect using the passed address (failures to
132 da7e44ee Michael Hanselmann
  bind other than EADDRNOTAVAIL will be ignored).
133 da7e44ee Michael Hanselmann
134 da7e44ee Michael Hanselmann
  """
135 da7e44ee Michael Hanselmann
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
136 da7e44ee Michael Hanselmann
137 da7e44ee Michael Hanselmann
  success = False
138 da7e44ee Michael Hanselmann
139 da7e44ee Michael Hanselmann
  if source is not None:
140 da7e44ee Michael Hanselmann
    try:
141 da7e44ee Michael Hanselmann
      sock.bind((source, 0))
142 da7e44ee Michael Hanselmann
    except socket.error, (errcode):
143 da7e44ee Michael Hanselmann
      if errcode == errno.EADDRNOTAVAIL:
144 da7e44ee Michael Hanselmann
        success = False
145 da7e44ee Michael Hanselmann
146 da7e44ee Michael Hanselmann
  sock.settimeout(timeout)
147 da7e44ee Michael Hanselmann
148 da7e44ee Michael Hanselmann
  try:
149 da7e44ee Michael Hanselmann
    sock.connect((target, port))
150 da7e44ee Michael Hanselmann
    sock.close()
151 da7e44ee Michael Hanselmann
    success = True
152 da7e44ee Michael Hanselmann
  except socket.timeout:
153 da7e44ee Michael Hanselmann
    success = False
154 da7e44ee Michael Hanselmann
  except socket.error, (errcode):
155 da7e44ee Michael Hanselmann
    success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
156 da7e44ee Michael Hanselmann
157 da7e44ee Michael Hanselmann
  return success
158 da7e44ee Michael Hanselmann
159 da7e44ee Michael Hanselmann
160 da7e44ee Michael Hanselmann
def GetHosts(hostsfile):
161 da7e44ee Michael Hanselmann
  """Return list of hosts from hostfile.
162 da7e44ee Michael Hanselmann
163 da7e44ee Michael Hanselmann
  Reads the hostslist file and returns a list of hosts.
164 da7e44ee Michael Hanselmann
  Expects the hostslist file to contain one hostname per line.
165 da7e44ee Michael Hanselmann
166 da7e44ee Michael Hanselmann
  """
167 da7e44ee Michael Hanselmann
  try:
168 da7e44ee Michael Hanselmann
    datafile = open(hostsfile, "r")
169 da7e44ee Michael Hanselmann
  except IOError, msg:
170 da7e44ee Michael Hanselmann
    print "Failed to open hosts file %s: %s" % (hostsfile, msg)
171 da7e44ee Michael Hanselmann
    sys.exit(2)
172 da7e44ee Michael Hanselmann
173 da7e44ee Michael Hanselmann
  hosts = datafile.readlines()
174 da7e44ee Michael Hanselmann
  datafile.close()
175 da7e44ee Michael Hanselmann
176 da7e44ee Michael Hanselmann
  return hosts
177 da7e44ee Michael Hanselmann
178 da7e44ee Michael Hanselmann
179 da7e44ee Michael Hanselmann
def WriteLog(message, logfile):
180 da7e44ee Michael Hanselmann
  """Writes message, terminated by newline, to logfile."""
181 da7e44ee Michael Hanselmann
  try:
182 da7e44ee Michael Hanselmann
    logfile = open(logfile, "aw")
183 da7e44ee Michael Hanselmann
  except IOError, msg:
184 da7e44ee Michael Hanselmann
    print "failed to open log file %s: %s" % (logfile, msg)
185 da7e44ee Michael Hanselmann
    print "log message was: %s" % message
186 da7e44ee Michael Hanselmann
    sys.exit(1)  # no being able to log is critical
187 da7e44ee Michael Hanselmann
  try:
188 da7e44ee Michael Hanselmann
    timestamp = GetTimeStamp()
189 da7e44ee Michael Hanselmann
    logfile.writelines("%s %s\n" % (timestamp, message))
190 da7e44ee Michael Hanselmann
    logfile.close()
191 da7e44ee Michael Hanselmann
  except IOError, msg:
192 da7e44ee Michael Hanselmann
    print "failed to write to logfile %s: %s" % (logfile, msg)
193 da7e44ee Michael Hanselmann
    print "log message was: %s" % message
194 da7e44ee Michael Hanselmann
    sys.exit(1)  # no being able to log is critical
195 da7e44ee Michael Hanselmann
196 da7e44ee Michael Hanselmann
197 da7e44ee Michael Hanselmann
def GetAgentKeys():
198 da7e44ee Michael Hanselmann
  """Tries to get a list of ssh keys from an agent."""
199 da7e44ee Michael Hanselmann
  try:
200 da7e44ee Michael Hanselmann
    agent = paramiko.Agent()
201 da7e44ee Michael Hanselmann
    return list(agent.get_keys())
202 da7e44ee Michael Hanselmann
  except paramiko.SSHException:
203 da7e44ee Michael Hanselmann
    return []
204 da7e44ee Michael Hanselmann
205 da7e44ee Michael Hanselmann
206 da7e44ee Michael Hanselmann
def SetupSshConnection(host, username, password, keys, logfile):
207 da7e44ee Michael Hanselmann
  """Setup the ssh connection used for all later steps.
208 da7e44ee Michael Hanselmann
209 da7e44ee Michael Hanselmann
  This function sets up the ssh connection that will be used both
210 da7e44ee Michael Hanselmann
  for upload and remote command execution.
211 da7e44ee Michael Hanselmann
212 da7e44ee Michael Hanselmann
  On success, it will return paramiko.Transport object with an
213 da7e44ee Michael Hanselmann
  already logged in session. On failure, False will be returned.
214 da7e44ee Michael Hanselmann
215 da7e44ee Michael Hanselmann
  """
216 da7e44ee Michael Hanselmann
  # check if target is willing to talk to us at all
217 da7e44ee Michael Hanselmann
  if not PingByTcp(host, 22, live_port_needed=True):
218 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_NOT_REACHABLE", logfile)
219 da7e44ee Michael Hanselmann
    print "  - ERROR: host not reachable on 22/tcp"
220 da7e44ee Michael Hanselmann
    return False
221 da7e44ee Michael Hanselmann
222 da7e44ee Michael Hanselmann
  all_kwargs = [{"pkey": k} for k in keys]
223 da7e44ee Michael Hanselmann
  all_desc = ["key %d" % d for d in range(len(keys))]
224 da7e44ee Michael Hanselmann
  if password is not None:
225 da7e44ee Michael Hanselmann
    all_kwargs.append({"password": password})
226 da7e44ee Michael Hanselmann
    all_desc.append("password")
227 da7e44ee Michael Hanselmann
228 da7e44ee Michael Hanselmann
  # deal with logging out of paramiko.transport
229 da7e44ee Michael Hanselmann
  handler = None
230 da7e44ee Michael Hanselmann
231 da7e44ee Michael Hanselmann
  for desc, kwargs in zip(all_desc, all_kwargs):
232 da7e44ee Michael Hanselmann
    try:
233 da7e44ee Michael Hanselmann
      transport = paramiko.Transport((host, 22))
234 da7e44ee Michael Hanselmann
235 da7e44ee Michael Hanselmann
      # only try to setup the logging handler once
236 da7e44ee Michael Hanselmann
      if not handler:
237 da7e44ee Michael Hanselmann
        handler = logging.StreamHandler()
238 da7e44ee Michael Hanselmann
        handler.setLevel(logging.ERROR)
239 da7e44ee Michael Hanselmann
        log = logging.getLogger(transport.get_log_channel())
240 da7e44ee Michael Hanselmann
        log.addHandler(handler)
241 da7e44ee Michael Hanselmann
242 da7e44ee Michael Hanselmann
      transport.connect(username=username, **kwargs) # pylint: disable-msg=W0142
243 da7e44ee Michael Hanselmann
      WriteLog("ssh connection established using %s" % desc, logfile)
244 da7e44ee Michael Hanselmann
      # strange ... when establishing the session and the immediately
245 da7e44ee Michael Hanselmann
      # setting up the channels for sftp & shell from that, it sometimes
246 da7e44ee Michael Hanselmann
      # fails, but waiting 1 second after session setup makes it always work
247 da7e44ee Michael Hanselmann
      # time.sleep(1)
248 da7e44ee Michael Hanselmann
      # FIXME apparently needfull to give sshd some time
249 da7e44ee Michael Hanselmann
      return transport
250 da7e44ee Michael Hanselmann
    except (socket.gaierror, socket.error, paramiko.SSHException):
251 da7e44ee Michael Hanselmann
      continue
252 da7e44ee Michael Hanselmann
253 da7e44ee Michael Hanselmann
  methods = ", ".join(all_desc)
254 da7e44ee Michael Hanselmann
  WriteLog("ERROR: FAILURE_CONNECTION_SETUP (tried %s) " % methods, logfile)
255 da7e44ee Michael Hanselmann
  WriteLog("aborted", logfile)
256 da7e44ee Michael Hanselmann
  print "  - ERROR: connection setup failed (tried %s)" % methods
257 da7e44ee Michael Hanselmann
258 da7e44ee Michael Hanselmann
  return False
259 da7e44ee Michael Hanselmann
260 da7e44ee Michael Hanselmann
261 da7e44ee Michael Hanselmann
def UploadFiles(connection, executable, filelist, logfile):
262 da7e44ee Michael Hanselmann
  """Uploads the specified files via sftp.
263 da7e44ee Michael Hanselmann
264 da7e44ee Michael Hanselmann
  Uploads the specified files to a random, freshly created directory with
265 da7e44ee Michael Hanselmann
  a temporary name under /tmp. All uploaded files are chmod 0400 after upload
266 da7e44ee Michael Hanselmann
  with the exception of executable, with is chmod 500.
267 da7e44ee Michael Hanselmann
268 da7e44ee Michael Hanselmann
  Upon success, returns the absolute path to the remote upload directory,
269 da7e44ee Michael Hanselmann
  but will return False upon failure.
270 da7e44ee Michael Hanselmann
  """
271 da7e44ee Michael Hanselmann
  remote_dir = "%s.%s-%s" % (REMOTE_PATH_BASE,
272 da7e44ee Michael Hanselmann
                             random.random(), random.random())
273 da7e44ee Michael Hanselmann
274 da7e44ee Michael Hanselmann
  try:
275 da7e44ee Michael Hanselmann
    sftp = paramiko.SFTPClient.from_transport(connection)
276 da7e44ee Michael Hanselmann
    sftp.mkdir(remote_dir, mode=0700)
277 da7e44ee Michael Hanselmann
    for item in filelist:
278 da7e44ee Michael Hanselmann
      remote_file = "%s/%s" % (remote_dir, item.split("/").pop())
279 da7e44ee Michael Hanselmann
      WriteLog("uploading %s to remote %s" % (item, remote_file), logfile)
280 da7e44ee Michael Hanselmann
      sftp.put(item, remote_file)
281 da7e44ee Michael Hanselmann
      if item == executable:
282 da7e44ee Michael Hanselmann
        sftp.chmod(remote_file, 0500)
283 da7e44ee Michael Hanselmann
      else:
284 da7e44ee Michael Hanselmann
        sftp.chmod(remote_file, 0400)
285 da7e44ee Michael Hanselmann
    sftp.close()
286 da7e44ee Michael Hanselmann
  except IOError, err:
287 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_UPLOAD: %s" % err, logfile)
288 da7e44ee Michael Hanselmann
    return False
289 da7e44ee Michael Hanselmann
290 da7e44ee Michael Hanselmann
  return remote_dir
291 da7e44ee Michael Hanselmann
292 da7e44ee Michael Hanselmann
293 da7e44ee Michael Hanselmann
def CleanupRemoteDir(connection, upload_dir, filelist, logfile):
294 da7e44ee Michael Hanselmann
  """Cleanes out and removes the remote work directory."""
295 da7e44ee Michael Hanselmann
  try:
296 da7e44ee Michael Hanselmann
    sftp = paramiko.SFTPClient.from_transport(connection)
297 da7e44ee Michael Hanselmann
    for item in filelist:
298 da7e44ee Michael Hanselmann
      fullpath = "%s/%s" % (upload_dir, item.split("/").pop())
299 da7e44ee Michael Hanselmann
      WriteLog("removing remote %s" % fullpath, logfile)
300 da7e44ee Michael Hanselmann
      sftp.remove(fullpath)
301 da7e44ee Michael Hanselmann
    sftp.rmdir(upload_dir)
302 da7e44ee Michael Hanselmann
    sftp.close()
303 da7e44ee Michael Hanselmann
  except IOError, err:
304 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_CLEANUP: %s" % err, logfile)
305 da7e44ee Michael Hanselmann
    return False
306 da7e44ee Michael Hanselmann
307 da7e44ee Michael Hanselmann
  return True
308 da7e44ee Michael Hanselmann
309 da7e44ee Michael Hanselmann
310 da7e44ee Michael Hanselmann
def RunRemoteCommand(connection, command, logfile):
311 da7e44ee Michael Hanselmann
  """Execute the command via ssh on the remote host."""
312 da7e44ee Michael Hanselmann
  session = connection.open_session()
313 da7e44ee Michael Hanselmann
  session.setblocking(0)
314 da7e44ee Michael Hanselmann
315 da7e44ee Michael Hanselmann
  # the following dance is needed because paramiko changed APIs:
316 da7e44ee Michael Hanselmann
  # from returning True/False for success to always returning None
317 da7e44ee Michael Hanselmann
  # and throwing an exception in case of problems.
318 da7e44ee Michael Hanselmann
  # And I want to support both the old and the new API.
319 da7e44ee Michael Hanselmann
  result = True  # being optimistic here, I know
320 da7e44ee Michael Hanselmann
  message = None
321 da7e44ee Michael Hanselmann
  try:
322 da7e44ee Michael Hanselmann
    if session.exec_command("%s 2>&1" % command) is False:
323 da7e44ee Michael Hanselmann
      result = False
324 da7e44ee Michael Hanselmann
  except paramiko.SSHException, message:
325 da7e44ee Michael Hanselmann
    result = False
326 da7e44ee Michael Hanselmann
327 da7e44ee Michael Hanselmann
  if not result:
328 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_COMMAND_EXECUTION: %s" % message, logfile)
329 da7e44ee Michael Hanselmann
    return False
330 da7e44ee Michael Hanselmann
331 da7e44ee Michael Hanselmann
   ### Read when data is available
332 da7e44ee Michael Hanselmann
  output = ""
333 da7e44ee Michael Hanselmann
  while select.select([session], [], []):
334 da7e44ee Michael Hanselmann
    data = session.recv(1024)
335 da7e44ee Michael Hanselmann
    if not data:
336 da7e44ee Michael Hanselmann
      break
337 da7e44ee Michael Hanselmann
    output += data
338 da7e44ee Michael Hanselmann
    select.select([], [], [], .1)
339 da7e44ee Michael Hanselmann
340 da7e44ee Michael Hanselmann
  WriteLog("SUCCESS: command output follows", logfile)
341 da7e44ee Michael Hanselmann
  for line in output.split("\n"):
342 da7e44ee Michael Hanselmann
    WriteLog("output = %s" %line, logfile)
343 da7e44ee Michael Hanselmann
  WriteLog("command execution completed", logfile)
344 da7e44ee Michael Hanselmann
  session.close()
345 da7e44ee Michael Hanselmann
346 da7e44ee Michael Hanselmann
  return True
347 da7e44ee Michael Hanselmann
348 da7e44ee Michael Hanselmann
349 da7e44ee Michael Hanselmann
def HostWorker(logdir, username, password, keys, hostname,
350 da7e44ee Michael Hanselmann
               executable, command, filelist):
351 da7e44ee Michael Hanselmann
  """Per-host worker.
352 da7e44ee Michael Hanselmann
353 da7e44ee Michael Hanselmann
  This function does not return - it's the main code of the childs,
354 da7e44ee Michael Hanselmann
  which exit at the end of this function. The exit code 0 or 1 will be
355 da7e44ee Michael Hanselmann
  interpreted by the parent.
356 da7e44ee Michael Hanselmann
357 2a0f9372 Michael Hanselmann
  @param logdir: the directory where the logfiles must be created
358 2a0f9372 Michael Hanselmann
  @param username: SSH username
359 2a0f9372 Michael Hanselmann
  @param password: SSH password
360 2a0f9372 Michael Hanselmann
  @param keys: SSH keys
361 2a0f9372 Michael Hanselmann
  @param hostname: the hostname to connect to
362 2a0f9372 Michael Hanselmann
  @param executable: the executable to upload, if not None
363 2a0f9372 Michael Hanselmann
  @param command: the command to run
364 2a0f9372 Michael Hanselmann
  @param filelist: auxiliary files to upload
365 da7e44ee Michael Hanselmann
366 da7e44ee Michael Hanselmann
  """
367 da7e44ee Michael Hanselmann
  # in the child/worker process
368 da7e44ee Michael Hanselmann
  logfile = "%s/%s.log" % (logdir, hostname)
369 da7e44ee Michael Hanselmann
  print "%s - starting" % hostname
370 da7e44ee Michael Hanselmann
  result = 0  # optimism, I know
371 da7e44ee Michael Hanselmann
  try:
372 da7e44ee Michael Hanselmann
    connection = SetupSshConnection(hostname, username,
373 da7e44ee Michael Hanselmann
                                    password, keys, logfile)
374 da7e44ee Michael Hanselmann
    if connection is not False:
375 da7e44ee Michael Hanselmann
      if executable is not None:
376 da7e44ee Michael Hanselmann
        print "  %s: uploading files" % hostname
377 da7e44ee Michael Hanselmann
        upload_dir = UploadFiles(connection, executable,
378 da7e44ee Michael Hanselmann
                                 filelist, logfile)
379 da7e44ee Michael Hanselmann
        command = "cd %s && ./%s" % (upload_dir,
380 da7e44ee Michael Hanselmann
                                     executable.split("/").pop())
381 da7e44ee Michael Hanselmann
      print "  %s: executing remote command" % hostname
382 da7e44ee Michael Hanselmann
      cmd_result = RunRemoteCommand(connection, command, logfile)
383 da7e44ee Michael Hanselmann
      if cmd_result is True:
384 da7e44ee Michael Hanselmann
        print "  %s: remote command execution successful" % hostname
385 da7e44ee Michael Hanselmann
      else:
386 da7e44ee Michael Hanselmann
        print ("  %s: remote command execution failed,"
387 da7e44ee Michael Hanselmann
               " check log for details" % hostname)
388 da7e44ee Michael Hanselmann
        result = 1
389 da7e44ee Michael Hanselmann
      if executable is not None:
390 da7e44ee Michael Hanselmann
        print "  %s: cleaning up remote work dir" % hostname
391 da7e44ee Michael Hanselmann
        cln_result = CleanupRemoteDir(connection, upload_dir,
392 da7e44ee Michael Hanselmann
                                      filelist, logfile)
393 da7e44ee Michael Hanselmann
        if cln_result is False:
394 da7e44ee Michael Hanselmann
          print ("  %s: remote work dir cleanup failed, check"
395 da7e44ee Michael Hanselmann
                 " log for details" % hostname)
396 da7e44ee Michael Hanselmann
          result = 1
397 da7e44ee Michael Hanselmann
      connection.close()
398 da7e44ee Michael Hanselmann
    else:
399 da7e44ee Michael Hanselmann
      print "  %s: connection setup failed, skipping" % hostname
400 da7e44ee Michael Hanselmann
      result = 1
401 da7e44ee Michael Hanselmann
  except KeyboardInterrupt:
402 da7e44ee Michael Hanselmann
    print "  %s: received KeyboardInterrupt, aborting" % hostname
403 da7e44ee Michael Hanselmann
    WriteLog("ERROR: ABORT_KEYBOARD_INTERRUPT", logfile)
404 da7e44ee Michael Hanselmann
    result = 1
405 da7e44ee Michael Hanselmann
  except Exception, err:
406 da7e44ee Michael Hanselmann
    result = 1
407 da7e44ee Michael Hanselmann
    trace = traceback.format_exc()
408 da7e44ee Michael Hanselmann
    msg = "ERROR: UNHANDLED_EXECPTION_ERROR: %s\nTrace: %s" % (err, trace)
409 da7e44ee Michael Hanselmann
    WriteLog(msg, logfile)
410 da7e44ee Michael Hanselmann
    print "  %s: %s" % (hostname, msg)
411 da7e44ee Michael Hanselmann
  # and exit with exit code 0 or 1, so the parent can compute statistics
412 da7e44ee Michael Hanselmann
  sys.exit(result)
413 da7e44ee Michael Hanselmann
414 da7e44ee Michael Hanselmann
415 da7e44ee Michael Hanselmann
def LaunchWorker(child_pids, logdir, username, password, keys, hostname,
416 da7e44ee Michael Hanselmann
                 executable, command, filelist):
417 da7e44ee Michael Hanselmann
  """Launch the per-host worker.
418 da7e44ee Michael Hanselmann
419 da7e44ee Michael Hanselmann
  Arguments are the same as for HostWorker, except for child_pids,
420 da7e44ee Michael Hanselmann
  which is a dictionary holding the pid-to-hostname mapping.
421 da7e44ee Michael Hanselmann
422 da7e44ee Michael Hanselmann
  """
423 da7e44ee Michael Hanselmann
  hostname = hostname.rstrip("\n")
424 da7e44ee Michael Hanselmann
  pid = os.fork()
425 da7e44ee Michael Hanselmann
  if pid > 0:
426 da7e44ee Michael Hanselmann
    # controller just record the pids
427 da7e44ee Michael Hanselmann
    child_pids[pid] = hostname
428 da7e44ee Michael Hanselmann
  else:
429 da7e44ee Michael Hanselmann
    HostWorker(logdir, username, password, keys, hostname,
430 da7e44ee Michael Hanselmann
               executable, command, filelist)
431 da7e44ee Michael Hanselmann
432 da7e44ee Michael Hanselmann
433 da7e44ee Michael Hanselmann
def main():
434 da7e44ee Michael Hanselmann
  """main."""
435 da7e44ee Michael Hanselmann
  try:
436 da7e44ee Michael Hanselmann
    optlist, _ = getopt.getopt(sys.argv[1:], "l:x:h:f:a:c:b:u:p:A")
437 da7e44ee Michael Hanselmann
  except getopt.GetoptError, err:
438 da7e44ee Michael Hanselmann
    print str(err)
439 da7e44ee Michael Hanselmann
    ShowHelp(sys.argv[0])
440 da7e44ee Michael Hanselmann
    sys.exit(2)
441 da7e44ee Michael Hanselmann
442 da7e44ee Michael Hanselmann
  logdir = executable = hostfile = hostlist = command = None
443 da7e44ee Michael Hanselmann
  use_agent = False
444 da7e44ee Michael Hanselmann
  auxfiles = []
445 da7e44ee Michael Hanselmann
  username = "root"
446 da7e44ee Michael Hanselmann
  password = None
447 da7e44ee Michael Hanselmann
  batch_size = 15
448 da7e44ee Michael Hanselmann
  for option in optlist:
449 da7e44ee Michael Hanselmann
    if option[0] == "-l":
450 da7e44ee Michael Hanselmann
      logdir = option[1]
451 da7e44ee Michael Hanselmann
    if option[0] == "-x":
452 da7e44ee Michael Hanselmann
      executable = option[1]
453 da7e44ee Michael Hanselmann
    if option[0] == "-f":
454 da7e44ee Michael Hanselmann
      hostfile = option[1]
455 da7e44ee Michael Hanselmann
    if option[0] == "-h":
456 da7e44ee Michael Hanselmann
      hostlist = option[1]
457 da7e44ee Michael Hanselmann
    if option[0] == "-a":
458 da7e44ee Michael Hanselmann
      auxfiles.append(option[1])
459 da7e44ee Michael Hanselmann
    if option[0] == "-c":
460 da7e44ee Michael Hanselmann
      command = option[1]
461 da7e44ee Michael Hanselmann
    if option[0] == "-b":
462 da7e44ee Michael Hanselmann
      batch_size = int(option[1])
463 da7e44ee Michael Hanselmann
    if option[0] == "-u":
464 da7e44ee Michael Hanselmann
      username = option[1]
465 da7e44ee Michael Hanselmann
    if option[0] == "-p":
466 da7e44ee Michael Hanselmann
      password = option[1]
467 da7e44ee Michael Hanselmann
    if option[0] == "-A":
468 da7e44ee Michael Hanselmann
      use_agent = True
469 da7e44ee Michael Hanselmann
470 da7e44ee Michael Hanselmann
  if not (logdir and (executable or command) and (hostfile or hostlist)):
471 da7e44ee Michael Hanselmann
    print "error: missing required commandline argument(s)"
472 da7e44ee Michael Hanselmann
    ShowHelp(sys.argv[0])
473 da7e44ee Michael Hanselmann
    sys.exit(3)
474 da7e44ee Michael Hanselmann
475 da7e44ee Michael Hanselmann
  if executable and command:
476 da7e44ee Michael Hanselmann
    print "error: can run either a command or an executable, not both"
477 da7e44ee Michael Hanselmann
    ShowHelp(sys.argv[0])
478 da7e44ee Michael Hanselmann
    sys.exit(3)
479 da7e44ee Michael Hanselmann
480 da7e44ee Michael Hanselmann
  if hostlist and hostfile:
481 da7e44ee Michael Hanselmann
    print "error: specify either -f or -h arguments, not both"
482 da7e44ee Michael Hanselmann
    ShowHelp(sys.argv[0])
483 da7e44ee Michael Hanselmann
    sys.exit(3)
484 da7e44ee Michael Hanselmann
485 da7e44ee Michael Hanselmann
  ### Unbuffered sys.stdout
486 da7e44ee Michael Hanselmann
  sys.stdout = os.fdopen(1, "w", 0)
487 da7e44ee Michael Hanselmann
488 da7e44ee Michael Hanselmann
  if LogDirUseable(logdir) is False:
489 da7e44ee Michael Hanselmann
    print "ERROR: cannot create logfiles in dir %s, aborting" % logdir
490 da7e44ee Michael Hanselmann
    sys.exit(1)
491 da7e44ee Michael Hanselmann
492 da7e44ee Michael Hanselmann
  keys = []
493 da7e44ee Michael Hanselmann
  if use_agent:
494 da7e44ee Michael Hanselmann
    keys = GetAgentKeys()
495 da7e44ee Michael Hanselmann
  elif password:
496 da7e44ee Michael Hanselmann
    try:
497 da7e44ee Michael Hanselmann
      fh = file(password)
498 da7e44ee Michael Hanselmann
      pwvalue = fh.readline().strip()
499 da7e44ee Michael Hanselmann
      fh.close()
500 da7e44ee Michael Hanselmann
    except IOError, e:
501 da7e44ee Michael Hanselmann
      print "error: can not read in from password file %s: %s" % (password, e)
502 da7e44ee Michael Hanselmann
      sys.exit(1)
503 da7e44ee Michael Hanselmann
    password = pwvalue
504 da7e44ee Michael Hanselmann
  else:
505 da7e44ee Michael Hanselmann
    password = getpass.getpass("%s's password for all nodes: " % username)
506 da7e44ee Michael Hanselmann
507 da7e44ee Michael Hanselmann
  if hostfile:
508 da7e44ee Michael Hanselmann
    hosts = GetHosts(hostfile)
509 da7e44ee Michael Hanselmann
  else:
510 da7e44ee Michael Hanselmann
    if "," in hostlist:
511 da7e44ee Michael Hanselmann
      hostlist = hostlist.rstrip(",")  # commandline robustness
512 da7e44ee Michael Hanselmann
      hosts = hostlist.split(",")
513 da7e44ee Michael Hanselmann
    else:
514 da7e44ee Michael Hanselmann
      hosts = [hostlist]
515 da7e44ee Michael Hanselmann
516 da7e44ee Michael Hanselmann
  successes = failures = 0
517 da7e44ee Michael Hanselmann
518 da7e44ee Michael Hanselmann
  filelist = auxfiles[:]
519 da7e44ee Michael Hanselmann
  filelist.append(executable)
520 da7e44ee Michael Hanselmann
521 da7e44ee Michael Hanselmann
  # initial batch
522 da7e44ee Michael Hanselmann
  batch = hosts[:batch_size]
523 da7e44ee Michael Hanselmann
  hosts = hosts[batch_size:]
524 da7e44ee Michael Hanselmann
  child_pids = {}
525 da7e44ee Michael Hanselmann
  for hostname in batch:
526 da7e44ee Michael Hanselmann
    LaunchWorker(child_pids, logdir, username, password, keys, hostname,
527 da7e44ee Michael Hanselmann
                 executable, command, filelist)
528 da7e44ee Michael Hanselmann
529 da7e44ee Michael Hanselmann
  while child_pids:
530 da7e44ee Michael Hanselmann
    pid, status = os.wait()
531 da7e44ee Michael Hanselmann
    hostname = child_pids.pop(pid, "<unknown host>")
532 da7e44ee Michael Hanselmann
    print "  %s: done (in parent)" % hostname
533 da7e44ee Michael Hanselmann
    if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0:
534 da7e44ee Michael Hanselmann
      successes += 1
535 da7e44ee Michael Hanselmann
    else:
536 da7e44ee Michael Hanselmann
      failures += 1
537 da7e44ee Michael Hanselmann
    if hosts:
538 da7e44ee Michael Hanselmann
      LaunchWorker(child_pids, logdir, username, password, keys,
539 da7e44ee Michael Hanselmann
                   hosts.pop(0), executable, command, filelist)
540 da7e44ee Michael Hanselmann
541 da7e44ee Michael Hanselmann
  print
542 da7e44ee Michael Hanselmann
  print "All done, %s successful and %s failed hosts" % (successes, failures)
543 da7e44ee Michael Hanselmann
544 da7e44ee Michael Hanselmann
  sys.exit(0)
545 da7e44ee Michael Hanselmann
546 da7e44ee Michael Hanselmann
547 da7e44ee Michael Hanselmann
if __name__ == "__main__":
548 da7e44ee Michael Hanselmann
  try:
549 da7e44ee Michael Hanselmann
    main()
550 da7e44ee Michael Hanselmann
  except KeyboardInterrupt:
551 da7e44ee Michael Hanselmann
    print "Received KeyboardInterrupt, aborting"
552 da7e44ee Michael Hanselmann
    sys.exit(1)