Statistics
| Branch: | Tag: | Revision:

root / tools / ganeti-listrunner @ 1fe10404

History | View | Annotate | Download (18.7 kB)

1 da7e44ee Michael Hanselmann
#!/usr/bin/python
2 da7e44ee Michael Hanselmann
#
3 da7e44ee Michael Hanselmann
4 99a11adc Iustin Pop
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5 da7e44ee Michael Hanselmann
#
6 da7e44ee Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 da7e44ee Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 da7e44ee Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 da7e44ee Michael Hanselmann
# (at your option) any later version.
10 da7e44ee Michael Hanselmann
#
11 da7e44ee Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 da7e44ee Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 da7e44ee Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 da7e44ee Michael Hanselmann
# General Public License for more details.
15 da7e44ee Michael Hanselmann
#
16 da7e44ee Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 da7e44ee Michael Hanselmann
# along with this program; if not, write to the Free Software
18 da7e44ee Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 da7e44ee Michael Hanselmann
# 02110-1301, USA.
20 da7e44ee Michael Hanselmann
21 da7e44ee Michael Hanselmann
"""Run an executable on a list of hosts.
22 da7e44ee Michael Hanselmann
23 da7e44ee Michael Hanselmann
Script to serially run an executable on a list of hosts via ssh
24 da7e44ee Michael Hanselmann
with password auth as root. If the provided log dir does not yet
25 da7e44ee Michael Hanselmann
exist, it will try to create it.
26 da7e44ee Michael Hanselmann
27 da7e44ee Michael Hanselmann
Implementation:
28 da7e44ee Michael Hanselmann
 - the main process spawns up to batch_size children, which:
29 da7e44ee Michael Hanselmann
 - connects to the remote host via ssh as root
30 da7e44ee Michael Hanselmann
 - uploads the executable with a random name to /tmp via sftp
31 da7e44ee Michael Hanselmann
 - chmod 500s it
32 da7e44ee Michael Hanselmann
 - via ssh: chdirs into the upload directory and runs the script
33 da7e44ee Michael Hanselmann
 - deletes it
34 da7e44ee Michael Hanselmann
 - writes status messages and all output to one logfile per host
35 da7e44ee Michael Hanselmann
 - the main process gathers then the status of the children and
36 da7e44ee Michael Hanselmann
   reports the success/failure ratio
37 da7e44ee Michael Hanselmann
 - entire script can be aborted with Ctrl-C
38 da7e44ee Michael Hanselmann
39 da7e44ee Michael Hanselmann
Security considerations:
40 da7e44ee Michael Hanselmann
 - the root password for the remote hosts is stored in memory for the
41 da7e44ee Michael Hanselmann
   runtime of the script
42 da7e44ee Michael Hanselmann
 - the executable to be run on the remote host is handled the following way:
43 da7e44ee Michael Hanselmann
   - try to create a random directory with permissions 700 on the
44 da7e44ee Michael Hanselmann
     remote host, abort furter processing on this host if this failes
45 da7e44ee Michael Hanselmann
   - upload the executable with to a random filename in that directory
46 da7e44ee Michael Hanselmann
   - set executable permissions to 500
47 da7e44ee Michael Hanselmann
   - run the executable
48 da7e44ee Michael Hanselmann
   - delete the execuable and the directory on the remote host
49 da7e44ee Michael Hanselmann
50 da7e44ee Michael Hanselmann
"""
51 da7e44ee Michael Hanselmann
52 b459a848 Andrea Spadaccini
# pylint: disable=C0103
53 da7e44ee Michael Hanselmann
# C0103: Invalid name ganeti-listrunner
54 da7e44ee Michael Hanselmann
55 da7e44ee Michael Hanselmann
import errno
56 b74c0684 Iustin Pop
import optparse
57 da7e44ee Michael Hanselmann
import getpass
58 da7e44ee Michael Hanselmann
import logging
59 da7e44ee Michael Hanselmann
import os
60 da7e44ee Michael Hanselmann
import random
61 da7e44ee Michael Hanselmann
import select
62 da7e44ee Michael Hanselmann
import socket
63 da7e44ee Michael Hanselmann
import sys
64 da7e44ee Michael Hanselmann
import time
65 da7e44ee Michael Hanselmann
import traceback
66 da7e44ee Michael Hanselmann
67 78062de9 Michael Hanselmann
try:
68 78062de9 Michael Hanselmann
  import paramiko
69 78062de9 Michael Hanselmann
except ImportError:
70 78062de9 Michael Hanselmann
  print >> sys.stderr, \
71 78062de9 Michael Hanselmann
    ("The \"paramiko\" module could not be imported. Install it from your"
72 78062de9 Michael Hanselmann
     " distribution's repository. The package is usually named"
73 78062de9 Michael Hanselmann
     " \"python-paramiko\".")
74 78062de9 Michael Hanselmann
  sys.exit(1)
75 da7e44ee Michael Hanselmann
76 da7e44ee Michael Hanselmann
77 da7e44ee Michael Hanselmann
REMOTE_PATH_BASE = "/tmp/listrunner"
78 da7e44ee Michael Hanselmann
79 b74c0684 Iustin Pop
USAGE = ("%prog -l logdir {-c command | -x /path/to/file} [-b batch_size]"
80 b74c0684 Iustin Pop
         " {-f hostfile|-h hosts} [-u username]"
81 b74c0684 Iustin Pop
         " [-p password_file | -A]")
82 b74c0684 Iustin Pop
83 da7e44ee Michael Hanselmann
84 da7e44ee Michael Hanselmann
def LogDirUseable(logdir):
85 da7e44ee Michael Hanselmann
  """Ensure log file directory is available and usable."""
86 da7e44ee Michael Hanselmann
  testfile = "%s/test-%s-%s.deleteme" % (logdir, random.random(),
87 da7e44ee Michael Hanselmann
                                         random.random())
88 da7e44ee Michael Hanselmann
  try:
89 da7e44ee Michael Hanselmann
    os.mkdir(logdir)
90 da7e44ee Michael Hanselmann
  except OSError, err:
91 da7e44ee Michael Hanselmann
    if err.errno != errno.EEXIST:
92 da7e44ee Michael Hanselmann
      raise
93 da7e44ee Michael Hanselmann
  try:
94 da7e44ee Michael Hanselmann
    logtest = open(testfile, "aw")
95 da7e44ee Michael Hanselmann
    logtest.writelines("log file writeability test\n")
96 da7e44ee Michael Hanselmann
    logtest.close()
97 da7e44ee Michael Hanselmann
    os.unlink(testfile)
98 da7e44ee Michael Hanselmann
    return True
99 da7e44ee Michael Hanselmann
  except (OSError, IOError):
100 da7e44ee Michael Hanselmann
    return False
101 da7e44ee Michael Hanselmann
102 da7e44ee Michael Hanselmann
103 da7e44ee Michael Hanselmann
def GetTimeStamp(timestamp=None):
104 da7e44ee Michael Hanselmann
  """Return ISO8601 timestamp.
105 da7e44ee Michael Hanselmann
106 da7e44ee Michael Hanselmann
  Returns ISO8601 timestamp, optionally expects a time.localtime() tuple
107 da7e44ee Michael Hanselmann
  in timestamp, but will use the current time if this argument is not
108 da7e44ee Michael Hanselmann
  supplied.
109 da7e44ee Michael Hanselmann
  """
110 da7e44ee Michael Hanselmann
  if timestamp is None:
111 da7e44ee Michael Hanselmann
    timestamp = time.localtime()
112 da7e44ee Michael Hanselmann
113 da7e44ee Michael Hanselmann
  isotime = time.strftime("%Y-%m-%dT%H:%M:%S", timestamp)
114 da7e44ee Michael Hanselmann
  return isotime
115 da7e44ee Michael Hanselmann
116 da7e44ee Michael Hanselmann
117 da7e44ee Michael Hanselmann
def PingByTcp(target, port, timeout=10, live_port_needed=False, source=None):
118 da7e44ee Michael Hanselmann
  """Simple ping implementation using TCP connect(2).
119 da7e44ee Michael Hanselmann
120 da7e44ee Michael Hanselmann
  Try to do a TCP connect(2) from an optional source IP to the
121 da7e44ee Michael Hanselmann
  specified target IP and the specified target port. If the optional
122 da7e44ee Michael Hanselmann
  parameter live_port_needed is set to true, requires the remote end
123 da7e44ee Michael Hanselmann
  to accept the connection. The timeout is specified in seconds and
124 da7e44ee Michael Hanselmann
  defaults to 10 seconds. If the source optional argument is not
125 da7e44ee Michael Hanselmann
  passed, the source address selection is left to the kernel,
126 da7e44ee Michael Hanselmann
  otherwise we try to connect using the passed address (failures to
127 da7e44ee Michael Hanselmann
  bind other than EADDRNOTAVAIL will be ignored).
128 da7e44ee Michael Hanselmann
129 da7e44ee Michael Hanselmann
  """
130 da7e44ee Michael Hanselmann
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
131 da7e44ee Michael Hanselmann
132 da7e44ee Michael Hanselmann
  success = False
133 da7e44ee Michael Hanselmann
134 da7e44ee Michael Hanselmann
  if source is not None:
135 da7e44ee Michael Hanselmann
    try:
136 da7e44ee Michael Hanselmann
      sock.bind((source, 0))
137 da7e44ee Michael Hanselmann
    except socket.error, (errcode):
138 da7e44ee Michael Hanselmann
      if errcode == errno.EADDRNOTAVAIL:
139 da7e44ee Michael Hanselmann
        success = False
140 da7e44ee Michael Hanselmann
141 da7e44ee Michael Hanselmann
  sock.settimeout(timeout)
142 da7e44ee Michael Hanselmann
143 da7e44ee Michael Hanselmann
  try:
144 da7e44ee Michael Hanselmann
    sock.connect((target, port))
145 da7e44ee Michael Hanselmann
    sock.close()
146 da7e44ee Michael Hanselmann
    success = True
147 da7e44ee Michael Hanselmann
  except socket.timeout:
148 da7e44ee Michael Hanselmann
    success = False
149 da7e44ee Michael Hanselmann
  except socket.error, (errcode):
150 da7e44ee Michael Hanselmann
    success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
151 da7e44ee Michael Hanselmann
152 da7e44ee Michael Hanselmann
  return success
153 da7e44ee Michael Hanselmann
154 da7e44ee Michael Hanselmann
155 da7e44ee Michael Hanselmann
def GetHosts(hostsfile):
156 da7e44ee Michael Hanselmann
  """Return list of hosts from hostfile.
157 da7e44ee Michael Hanselmann
158 da7e44ee Michael Hanselmann
  Reads the hostslist file and returns a list of hosts.
159 da7e44ee Michael Hanselmann
  Expects the hostslist file to contain one hostname per line.
160 da7e44ee Michael Hanselmann
161 da7e44ee Michael Hanselmann
  """
162 da7e44ee Michael Hanselmann
  try:
163 da7e44ee Michael Hanselmann
    datafile = open(hostsfile, "r")
164 da7e44ee Michael Hanselmann
  except IOError, msg:
165 da7e44ee Michael Hanselmann
    print "Failed to open hosts file %s: %s" % (hostsfile, msg)
166 da7e44ee Michael Hanselmann
    sys.exit(2)
167 da7e44ee Michael Hanselmann
168 da7e44ee Michael Hanselmann
  hosts = datafile.readlines()
169 da7e44ee Michael Hanselmann
  datafile.close()
170 da7e44ee Michael Hanselmann
171 da7e44ee Michael Hanselmann
  return hosts
172 da7e44ee Michael Hanselmann
173 da7e44ee Michael Hanselmann
174 da7e44ee Michael Hanselmann
def WriteLog(message, logfile):
175 da7e44ee Michael Hanselmann
  """Writes message, terminated by newline, to logfile."""
176 da7e44ee Michael Hanselmann
  try:
177 da7e44ee Michael Hanselmann
    logfile = open(logfile, "aw")
178 da7e44ee Michael Hanselmann
  except IOError, msg:
179 da7e44ee Michael Hanselmann
    print "failed to open log file %s: %s" % (logfile, msg)
180 da7e44ee Michael Hanselmann
    print "log message was: %s" % message
181 da7e44ee Michael Hanselmann
    sys.exit(1)  # no being able to log is critical
182 da7e44ee Michael Hanselmann
  try:
183 da7e44ee Michael Hanselmann
    timestamp = GetTimeStamp()
184 da7e44ee Michael Hanselmann
    logfile.writelines("%s %s\n" % (timestamp, message))
185 da7e44ee Michael Hanselmann
    logfile.close()
186 da7e44ee Michael Hanselmann
  except IOError, msg:
187 da7e44ee Michael Hanselmann
    print "failed to write to logfile %s: %s" % (logfile, msg)
188 da7e44ee Michael Hanselmann
    print "log message was: %s" % message
189 da7e44ee Michael Hanselmann
    sys.exit(1)  # no being able to log is critical
190 da7e44ee Michael Hanselmann
191 da7e44ee Michael Hanselmann
192 da7e44ee Michael Hanselmann
def GetAgentKeys():
193 da7e44ee Michael Hanselmann
  """Tries to get a list of ssh keys from an agent."""
194 da7e44ee Michael Hanselmann
  try:
195 da7e44ee Michael Hanselmann
    agent = paramiko.Agent()
196 da7e44ee Michael Hanselmann
    return list(agent.get_keys())
197 da7e44ee Michael Hanselmann
  except paramiko.SSHException:
198 da7e44ee Michael Hanselmann
    return []
199 da7e44ee Michael Hanselmann
200 da7e44ee Michael Hanselmann
201 99a11adc Iustin Pop
def SetupSshConnection(host, username, password, use_agent, logfile):
202 da7e44ee Michael Hanselmann
  """Setup the ssh connection used for all later steps.
203 da7e44ee Michael Hanselmann
204 da7e44ee Michael Hanselmann
  This function sets up the ssh connection that will be used both
205 da7e44ee Michael Hanselmann
  for upload and remote command execution.
206 da7e44ee Michael Hanselmann
207 da7e44ee Michael Hanselmann
  On success, it will return paramiko.Transport object with an
208 da7e44ee Michael Hanselmann
  already logged in session. On failure, False will be returned.
209 da7e44ee Michael Hanselmann
210 da7e44ee Michael Hanselmann
  """
211 da7e44ee Michael Hanselmann
  # check if target is willing to talk to us at all
212 da7e44ee Michael Hanselmann
  if not PingByTcp(host, 22, live_port_needed=True):
213 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_NOT_REACHABLE", logfile)
214 da7e44ee Michael Hanselmann
    print "  - ERROR: host not reachable on 22/tcp"
215 da7e44ee Michael Hanselmann
    return False
216 da7e44ee Michael Hanselmann
217 99a11adc Iustin Pop
  if use_agent:
218 99a11adc Iustin Pop
    keys = GetAgentKeys()
219 99a11adc Iustin Pop
  else:
220 99a11adc Iustin Pop
    keys = []
221 da7e44ee Michael Hanselmann
  all_kwargs = [{"pkey": k} for k in keys]
222 da7e44ee Michael Hanselmann
  all_desc = ["key %d" % d for d in range(len(keys))]
223 da7e44ee Michael Hanselmann
  if password is not None:
224 da7e44ee Michael Hanselmann
    all_kwargs.append({"password": password})
225 da7e44ee Michael Hanselmann
    all_desc.append("password")
226 da7e44ee Michael Hanselmann
227 da7e44ee Michael Hanselmann
  # deal with logging out of paramiko.transport
228 da7e44ee Michael Hanselmann
  handler = None
229 da7e44ee Michael Hanselmann
230 da7e44ee Michael Hanselmann
  for desc, kwargs in zip(all_desc, all_kwargs):
231 da7e44ee Michael Hanselmann
    try:
232 da7e44ee Michael Hanselmann
      transport = paramiko.Transport((host, 22))
233 da7e44ee Michael Hanselmann
234 da7e44ee Michael Hanselmann
      # only try to setup the logging handler once
235 da7e44ee Michael Hanselmann
      if not handler:
236 da7e44ee Michael Hanselmann
        handler = logging.StreamHandler()
237 da7e44ee Michael Hanselmann
        handler.setLevel(logging.ERROR)
238 da7e44ee Michael Hanselmann
        log = logging.getLogger(transport.get_log_channel())
239 da7e44ee Michael Hanselmann
        log.addHandler(handler)
240 da7e44ee Michael Hanselmann
241 b459a848 Andrea Spadaccini
      transport.connect(username=username, **kwargs) # pylint: disable=W0142
242 da7e44ee Michael Hanselmann
      WriteLog("ssh connection established using %s" % desc, logfile)
243 da7e44ee Michael Hanselmann
      # strange ... when establishing the session and the immediately
244 da7e44ee Michael Hanselmann
      # setting up the channels for sftp & shell from that, it sometimes
245 da7e44ee Michael Hanselmann
      # fails, but waiting 1 second after session setup makes it always work
246 da7e44ee Michael Hanselmann
      # time.sleep(1)
247 da7e44ee Michael Hanselmann
      # FIXME apparently needfull to give sshd some time
248 da7e44ee Michael Hanselmann
      return transport
249 da7e44ee Michael Hanselmann
    except (socket.gaierror, socket.error, paramiko.SSHException):
250 da7e44ee Michael Hanselmann
      continue
251 da7e44ee Michael Hanselmann
252 da7e44ee Michael Hanselmann
  methods = ", ".join(all_desc)
253 da7e44ee Michael Hanselmann
  WriteLog("ERROR: FAILURE_CONNECTION_SETUP (tried %s) " % methods, logfile)
254 da7e44ee Michael Hanselmann
  WriteLog("aborted", logfile)
255 da7e44ee Michael Hanselmann
  print "  - ERROR: connection setup failed (tried %s)" % methods
256 da7e44ee Michael Hanselmann
257 da7e44ee Michael Hanselmann
  return False
258 da7e44ee Michael Hanselmann
259 da7e44ee Michael Hanselmann
260 da7e44ee Michael Hanselmann
def UploadFiles(connection, executable, filelist, logfile):
261 da7e44ee Michael Hanselmann
  """Uploads the specified files via sftp.
262 da7e44ee Michael Hanselmann
263 da7e44ee Michael Hanselmann
  Uploads the specified files to a random, freshly created directory with
264 da7e44ee Michael Hanselmann
  a temporary name under /tmp. All uploaded files are chmod 0400 after upload
265 da7e44ee Michael Hanselmann
  with the exception of executable, with is chmod 500.
266 da7e44ee Michael Hanselmann
267 da7e44ee Michael Hanselmann
  Upon success, returns the absolute path to the remote upload directory,
268 da7e44ee Michael Hanselmann
  but will return False upon failure.
269 da7e44ee Michael Hanselmann
  """
270 da7e44ee Michael Hanselmann
  remote_dir = "%s.%s-%s" % (REMOTE_PATH_BASE,
271 da7e44ee Michael Hanselmann
                             random.random(), random.random())
272 da7e44ee Michael Hanselmann
273 da7e44ee Michael Hanselmann
  try:
274 da7e44ee Michael Hanselmann
    sftp = paramiko.SFTPClient.from_transport(connection)
275 da7e44ee Michael Hanselmann
    sftp.mkdir(remote_dir, mode=0700)
276 da7e44ee Michael Hanselmann
    for item in filelist:
277 2c094917 Michael Hanselmann
      remote_file = "%s/%s" % (remote_dir, os.path.basename(item))
278 da7e44ee Michael Hanselmann
      WriteLog("uploading %s to remote %s" % (item, remote_file), logfile)
279 da7e44ee Michael Hanselmann
      sftp.put(item, remote_file)
280 da7e44ee Michael Hanselmann
      if item == executable:
281 da7e44ee Michael Hanselmann
        sftp.chmod(remote_file, 0500)
282 da7e44ee Michael Hanselmann
      else:
283 da7e44ee Michael Hanselmann
        sftp.chmod(remote_file, 0400)
284 da7e44ee Michael Hanselmann
    sftp.close()
285 da7e44ee Michael Hanselmann
  except IOError, err:
286 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_UPLOAD: %s" % err, logfile)
287 da7e44ee Michael Hanselmann
    return False
288 da7e44ee Michael Hanselmann
289 da7e44ee Michael Hanselmann
  return remote_dir
290 da7e44ee Michael Hanselmann
291 da7e44ee Michael Hanselmann
292 da7e44ee Michael Hanselmann
def CleanupRemoteDir(connection, upload_dir, filelist, logfile):
293 da7e44ee Michael Hanselmann
  """Cleanes out and removes the remote work directory."""
294 da7e44ee Michael Hanselmann
  try:
295 da7e44ee Michael Hanselmann
    sftp = paramiko.SFTPClient.from_transport(connection)
296 da7e44ee Michael Hanselmann
    for item in filelist:
297 2c094917 Michael Hanselmann
      fullpath = "%s/%s" % (upload_dir, os.path.basename(item))
298 da7e44ee Michael Hanselmann
      WriteLog("removing remote %s" % fullpath, logfile)
299 da7e44ee Michael Hanselmann
      sftp.remove(fullpath)
300 da7e44ee Michael Hanselmann
    sftp.rmdir(upload_dir)
301 da7e44ee Michael Hanselmann
    sftp.close()
302 da7e44ee Michael Hanselmann
  except IOError, err:
303 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_CLEANUP: %s" % err, logfile)
304 da7e44ee Michael Hanselmann
    return False
305 da7e44ee Michael Hanselmann
306 da7e44ee Michael Hanselmann
  return True
307 da7e44ee Michael Hanselmann
308 da7e44ee Michael Hanselmann
309 da7e44ee Michael Hanselmann
def RunRemoteCommand(connection, command, logfile):
310 da7e44ee Michael Hanselmann
  """Execute the command via ssh on the remote host."""
311 da7e44ee Michael Hanselmann
  session = connection.open_session()
312 da7e44ee Michael Hanselmann
  session.setblocking(0)
313 da7e44ee Michael Hanselmann
314 da7e44ee Michael Hanselmann
  # the following dance is needed because paramiko changed APIs:
315 da7e44ee Michael Hanselmann
  # from returning True/False for success to always returning None
316 da7e44ee Michael Hanselmann
  # and throwing an exception in case of problems.
317 da7e44ee Michael Hanselmann
  # And I want to support both the old and the new API.
318 da7e44ee Michael Hanselmann
  result = True  # being optimistic here, I know
319 da7e44ee Michael Hanselmann
  message = None
320 da7e44ee Michael Hanselmann
  try:
321 da7e44ee Michael Hanselmann
    if session.exec_command("%s 2>&1" % command) is False:
322 da7e44ee Michael Hanselmann
      result = False
323 da7e44ee Michael Hanselmann
  except paramiko.SSHException, message:
324 da7e44ee Michael Hanselmann
    result = False
325 da7e44ee Michael Hanselmann
326 da7e44ee Michael Hanselmann
  if not result:
327 da7e44ee Michael Hanselmann
    WriteLog("ERROR: FAILURE_COMMAND_EXECUTION: %s" % message, logfile)
328 da7e44ee Michael Hanselmann
    return False
329 da7e44ee Michael Hanselmann
330 da7e44ee Michael Hanselmann
   ### Read when data is available
331 da7e44ee Michael Hanselmann
  output = ""
332 da7e44ee Michael Hanselmann
  while select.select([session], [], []):
333 6bc1c168 Michael Hanselmann
    try:
334 6bc1c168 Michael Hanselmann
      data = session.recv(1024)
335 6bc1c168 Michael Hanselmann
    except socket.timeout, err:
336 6bc1c168 Michael Hanselmann
      data = None
337 6bc1c168 Michael Hanselmann
      WriteLog("FAILED: socket.timeout %s" % err, logfile)
338 6bc1c168 Michael Hanselmann
    except socket.error, err:
339 6bc1c168 Michael Hanselmann
      data = None
340 6bc1c168 Michael Hanselmann
      WriteLog("FAILED: socket.error %s" % err, logfile)
341 da7e44ee Michael Hanselmann
    if not data:
342 da7e44ee Michael Hanselmann
      break
343 da7e44ee Michael Hanselmann
    output += data
344 da7e44ee Michael Hanselmann
    select.select([], [], [], .1)
345 da7e44ee Michael Hanselmann
346 da7e44ee Michael Hanselmann
  WriteLog("SUCCESS: command output follows", logfile)
347 2c094917 Michael Hanselmann
  for line in output.splitlines():
348 e687ec01 Michael Hanselmann
    WriteLog("output = %s" % line, logfile)
349 da7e44ee Michael Hanselmann
  WriteLog("command execution completed", logfile)
350 da7e44ee Michael Hanselmann
  session.close()
351 da7e44ee Michael Hanselmann
352 da7e44ee Michael Hanselmann
  return True
353 da7e44ee Michael Hanselmann
354 da7e44ee Michael Hanselmann
355 99a11adc Iustin Pop
def HostWorker(logdir, username, password, use_agent, hostname,
356 6eedd356 Michael Hanselmann
               executable, exec_args, command, filelist):
357 da7e44ee Michael Hanselmann
  """Per-host worker.
358 da7e44ee Michael Hanselmann
359 da7e44ee Michael Hanselmann
  This function does not return - it's the main code of the childs,
360 da7e44ee Michael Hanselmann
  which exit at the end of this function. The exit code 0 or 1 will be
361 da7e44ee Michael Hanselmann
  interpreted by the parent.
362 da7e44ee Michael Hanselmann
363 2a0f9372 Michael Hanselmann
  @param logdir: the directory where the logfiles must be created
364 2a0f9372 Michael Hanselmann
  @param username: SSH username
365 2a0f9372 Michael Hanselmann
  @param password: SSH password
366 99a11adc Iustin Pop
  @param use_agent: whether we should instead use an agent
367 2a0f9372 Michael Hanselmann
  @param hostname: the hostname to connect to
368 2a0f9372 Michael Hanselmann
  @param executable: the executable to upload, if not None
369 6eedd356 Michael Hanselmann
  @param exec_args: Additional arguments for executable
370 2a0f9372 Michael Hanselmann
  @param command: the command to run
371 2a0f9372 Michael Hanselmann
  @param filelist: auxiliary files to upload
372 da7e44ee Michael Hanselmann
373 da7e44ee Michael Hanselmann
  """
374 da7e44ee Michael Hanselmann
  # in the child/worker process
375 da7e44ee Michael Hanselmann
  logfile = "%s/%s.log" % (logdir, hostname)
376 da7e44ee Michael Hanselmann
  print "%s - starting" % hostname
377 da7e44ee Michael Hanselmann
  result = 0  # optimism, I know
378 da7e44ee Michael Hanselmann
  try:
379 da7e44ee Michael Hanselmann
    connection = SetupSshConnection(hostname, username,
380 99a11adc Iustin Pop
                                    password, use_agent, logfile)
381 da7e44ee Michael Hanselmann
    if connection is not False:
382 da7e44ee Michael Hanselmann
      if executable is not None:
383 da7e44ee Michael Hanselmann
        print "  %s: uploading files" % hostname
384 da7e44ee Michael Hanselmann
        upload_dir = UploadFiles(connection, executable,
385 da7e44ee Michael Hanselmann
                                 filelist, logfile)
386 0c009cc5 Michael Hanselmann
        command = ("cd %s && ./%s" %
387 0c009cc5 Michael Hanselmann
                   (upload_dir, os.path.basename(executable)))
388 0c009cc5 Michael Hanselmann
        if exec_args:
389 0c009cc5 Michael Hanselmann
          command += " %s" % exec_args
390 da7e44ee Michael Hanselmann
      print "  %s: executing remote command" % hostname
391 da7e44ee Michael Hanselmann
      cmd_result = RunRemoteCommand(connection, command, logfile)
392 da7e44ee Michael Hanselmann
      if cmd_result is True:
393 da7e44ee Michael Hanselmann
        print "  %s: remote command execution successful" % hostname
394 da7e44ee Michael Hanselmann
      else:
395 da7e44ee Michael Hanselmann
        print ("  %s: remote command execution failed,"
396 da7e44ee Michael Hanselmann
               " check log for details" % hostname)
397 da7e44ee Michael Hanselmann
        result = 1
398 da7e44ee Michael Hanselmann
      if executable is not None:
399 da7e44ee Michael Hanselmann
        print "  %s: cleaning up remote work dir" % hostname
400 da7e44ee Michael Hanselmann
        cln_result = CleanupRemoteDir(connection, upload_dir,
401 da7e44ee Michael Hanselmann
                                      filelist, logfile)
402 da7e44ee Michael Hanselmann
        if cln_result is False:
403 da7e44ee Michael Hanselmann
          print ("  %s: remote work dir cleanup failed, check"
404 da7e44ee Michael Hanselmann
                 " log for details" % hostname)
405 da7e44ee Michael Hanselmann
          result = 1
406 da7e44ee Michael Hanselmann
      connection.close()
407 da7e44ee Michael Hanselmann
    else:
408 da7e44ee Michael Hanselmann
      print "  %s: connection setup failed, skipping" % hostname
409 da7e44ee Michael Hanselmann
      result = 1
410 da7e44ee Michael Hanselmann
  except KeyboardInterrupt:
411 da7e44ee Michael Hanselmann
    print "  %s: received KeyboardInterrupt, aborting" % hostname
412 da7e44ee Michael Hanselmann
    WriteLog("ERROR: ABORT_KEYBOARD_INTERRUPT", logfile)
413 da7e44ee Michael Hanselmann
    result = 1
414 da7e44ee Michael Hanselmann
  except Exception, err:
415 da7e44ee Michael Hanselmann
    result = 1
416 da7e44ee Michael Hanselmann
    trace = traceback.format_exc()
417 da7e44ee Michael Hanselmann
    msg = "ERROR: UNHANDLED_EXECPTION_ERROR: %s\nTrace: %s" % (err, trace)
418 da7e44ee Michael Hanselmann
    WriteLog(msg, logfile)
419 da7e44ee Michael Hanselmann
    print "  %s: %s" % (hostname, msg)
420 da7e44ee Michael Hanselmann
  # and exit with exit code 0 or 1, so the parent can compute statistics
421 da7e44ee Michael Hanselmann
  sys.exit(result)
422 da7e44ee Michael Hanselmann
423 da7e44ee Michael Hanselmann
424 99a11adc Iustin Pop
def LaunchWorker(child_pids, logdir, username, password, use_agent, hostname,
425 6eedd356 Michael Hanselmann
                 executable, exec_args, command, filelist):
426 da7e44ee Michael Hanselmann
  """Launch the per-host worker.
427 da7e44ee Michael Hanselmann
428 da7e44ee Michael Hanselmann
  Arguments are the same as for HostWorker, except for child_pids,
429 da7e44ee Michael Hanselmann
  which is a dictionary holding the pid-to-hostname mapping.
430 da7e44ee Michael Hanselmann
431 da7e44ee Michael Hanselmann
  """
432 da7e44ee Michael Hanselmann
  hostname = hostname.rstrip("\n")
433 da7e44ee Michael Hanselmann
  pid = os.fork()
434 da7e44ee Michael Hanselmann
  if pid > 0:
435 da7e44ee Michael Hanselmann
    # controller just record the pids
436 da7e44ee Michael Hanselmann
    child_pids[pid] = hostname
437 da7e44ee Michael Hanselmann
  else:
438 99a11adc Iustin Pop
    HostWorker(logdir, username, password, use_agent, hostname,
439 6eedd356 Michael Hanselmann
               executable, exec_args, command, filelist)
440 da7e44ee Michael Hanselmann
441 da7e44ee Michael Hanselmann
442 b74c0684 Iustin Pop
def ParseOptions():
443 b74c0684 Iustin Pop
  """Parses the command line options.
444 b74c0684 Iustin Pop
445 b74c0684 Iustin Pop
  In case of command line errors, it will show the usage and exit the
446 b74c0684 Iustin Pop
  program.
447 b74c0684 Iustin Pop
448 b74c0684 Iustin Pop
  @return: the options in a tuple
449 b74c0684 Iustin Pop
450 b74c0684 Iustin Pop
  """
451 b74c0684 Iustin Pop
  # resolve because original used -h for hostfile, which conflicts
452 b74c0684 Iustin Pop
  # with -h for help
453 b74c0684 Iustin Pop
  parser = optparse.OptionParser(usage="\n%s" % USAGE,
454 b74c0684 Iustin Pop
                                 conflict_handler="resolve")
455 b74c0684 Iustin Pop
456 b74c0684 Iustin Pop
  parser.add_option("-l", dest="logdir", default=None,
457 b74c0684 Iustin Pop
                    help="directory to write logfiles to")
458 b74c0684 Iustin Pop
  parser.add_option("-x", dest="executable", default=None,
459 b74c0684 Iustin Pop
                    help="executable to run on remote host(s)",)
460 b74c0684 Iustin Pop
  parser.add_option("-f", dest="hostfile", default=None,
461 b74c0684 Iustin Pop
                    help="hostlist file (one host per line)")
462 b74c0684 Iustin Pop
  parser.add_option("-h", dest="hostlist", default=None, metavar="HOSTS",
463 b74c0684 Iustin Pop
                    help="comma-separated list of hosts or single hostname",)
464 b74c0684 Iustin Pop
  parser.add_option("-a", dest="auxfiles", action="append", default=[],
465 b74c0684 Iustin Pop
                    help="optional auxiliary file to upload"
466 6eedd356 Michael Hanselmann
                    " (can be given multiple times)",
467 b74c0684 Iustin Pop
                    metavar="FILE")
468 b74c0684 Iustin Pop
  parser.add_option("-c", dest="command", default=None,
469 b74c0684 Iustin Pop
                    help="shell command to run on remote host(s)")
470 b74c0684 Iustin Pop
  parser.add_option("-b", dest="batch_size", default=15, type="int",
471 b74c0684 Iustin Pop
                    help="batch-size, how many hosts to process"
472 b74c0684 Iustin Pop
                    " in parallel [15]")
473 b74c0684 Iustin Pop
  parser.add_option("-u", dest="username", default="root",
474 b74c0684 Iustin Pop
                    help="username used to connect [root]")
475 b74c0684 Iustin Pop
  parser.add_option("-p", dest="password", default=None,
476 b74c0684 Iustin Pop
                    help="password used to authenticate (when not"
477 b74c0684 Iustin Pop
                    " using an agent)")
478 b74c0684 Iustin Pop
  parser.add_option("-A", dest="use_agent", default=False, action="store_true",
479 b74c0684 Iustin Pop
                    help="instead of password, use keys from an SSH agent")
480 6eedd356 Michael Hanselmann
  parser.add_option("--args", dest="exec_args", default=None,
481 6eedd356 Michael Hanselmann
                    help="Arguments to be passed to executable (-x)")
482 b74c0684 Iustin Pop
483 b74c0684 Iustin Pop
  opts, args = parser.parse_args()
484 b74c0684 Iustin Pop
485 b74c0684 Iustin Pop
  if opts.executable and opts.command:
486 b74c0684 Iustin Pop
    parser.error("Options -x and -c conflict with each other")
487 b74c0684 Iustin Pop
  if not (opts.executable or opts.command):
488 b74c0684 Iustin Pop
    parser.error("One of -x and -c must be given")
489 6eedd356 Michael Hanselmann
  if opts.command and opts.exec_args:
490 6eedd356 Michael Hanselmann
    parser.error("Can't specify arguments when using custom command")
491 b74c0684 Iustin Pop
  if not opts.logdir:
492 b74c0684 Iustin Pop
    parser.error("Option -l is required")
493 b74c0684 Iustin Pop
  if opts.hostfile and opts.hostlist:
494 b74c0684 Iustin Pop
    parser.error("Options -f and -h conflict with each other")
495 b74c0684 Iustin Pop
  if not (opts.hostfile or opts.hostlist):
496 b74c0684 Iustin Pop
    parser.error("One of -f or -h must be given")
497 b74c0684 Iustin Pop
  if args:
498 b74c0684 Iustin Pop
    parser.error("This program doesn't take any arguments, passed in: %s" %
499 b74c0684 Iustin Pop
                 ", ".join(args))
500 b74c0684 Iustin Pop
501 6eedd356 Michael Hanselmann
  return (opts.logdir, opts.executable, opts.exec_args,
502 6eedd356 Michael Hanselmann
          opts.hostfile, opts.hostlist,
503 b74c0684 Iustin Pop
          opts.command, opts.use_agent, opts.auxfiles, opts.username,
504 b74c0684 Iustin Pop
          opts.password, opts.batch_size)
505 b74c0684 Iustin Pop
506 b74c0684 Iustin Pop
507 da7e44ee Michael Hanselmann
def main():
508 da7e44ee Michael Hanselmann
  """main."""
509 6eedd356 Michael Hanselmann
  (logdir, executable, exec_args, hostfile, hostlist,
510 b74c0684 Iustin Pop
   command, use_agent, auxfiles, username,
511 b74c0684 Iustin Pop
   password, batch_size) = ParseOptions()
512 da7e44ee Michael Hanselmann
513 da7e44ee Michael Hanselmann
  ### Unbuffered sys.stdout
514 da7e44ee Michael Hanselmann
  sys.stdout = os.fdopen(1, "w", 0)
515 da7e44ee Michael Hanselmann
516 da7e44ee Michael Hanselmann
  if LogDirUseable(logdir) is False:
517 da7e44ee Michael Hanselmann
    print "ERROR: cannot create logfiles in dir %s, aborting" % logdir
518 da7e44ee Michael Hanselmann
    sys.exit(1)
519 da7e44ee Michael Hanselmann
520 da7e44ee Michael Hanselmann
  if use_agent:
521 99a11adc Iustin Pop
    pass
522 da7e44ee Michael Hanselmann
  elif password:
523 da7e44ee Michael Hanselmann
    try:
524 da7e44ee Michael Hanselmann
      fh = file(password)
525 da7e44ee Michael Hanselmann
      pwvalue = fh.readline().strip()
526 da7e44ee Michael Hanselmann
      fh.close()
527 da7e44ee Michael Hanselmann
    except IOError, e:
528 da7e44ee Michael Hanselmann
      print "error: can not read in from password file %s: %s" % (password, e)
529 da7e44ee Michael Hanselmann
      sys.exit(1)
530 da7e44ee Michael Hanselmann
    password = pwvalue
531 da7e44ee Michael Hanselmann
  else:
532 da7e44ee Michael Hanselmann
    password = getpass.getpass("%s's password for all nodes: " % username)
533 da7e44ee Michael Hanselmann
534 da7e44ee Michael Hanselmann
  if hostfile:
535 da7e44ee Michael Hanselmann
    hosts = GetHosts(hostfile)
536 da7e44ee Michael Hanselmann
  else:
537 da7e44ee Michael Hanselmann
    if "," in hostlist:
538 da7e44ee Michael Hanselmann
      hostlist = hostlist.rstrip(",")  # commandline robustness
539 da7e44ee Michael Hanselmann
      hosts = hostlist.split(",")
540 da7e44ee Michael Hanselmann
    else:
541 da7e44ee Michael Hanselmann
      hosts = [hostlist]
542 da7e44ee Michael Hanselmann
543 da7e44ee Michael Hanselmann
  successes = failures = 0
544 da7e44ee Michael Hanselmann
545 da7e44ee Michael Hanselmann
  filelist = auxfiles[:]
546 da7e44ee Michael Hanselmann
  filelist.append(executable)
547 da7e44ee Michael Hanselmann
548 da7e44ee Michael Hanselmann
  # initial batch
549 da7e44ee Michael Hanselmann
  batch = hosts[:batch_size]
550 da7e44ee Michael Hanselmann
  hosts = hosts[batch_size:]
551 da7e44ee Michael Hanselmann
  child_pids = {}
552 da7e44ee Michael Hanselmann
  for hostname in batch:
553 99a11adc Iustin Pop
    LaunchWorker(child_pids, logdir, username, password, use_agent, hostname,
554 6eedd356 Michael Hanselmann
                 executable, exec_args, command, filelist)
555 da7e44ee Michael Hanselmann
556 da7e44ee Michael Hanselmann
  while child_pids:
557 da7e44ee Michael Hanselmann
    pid, status = os.wait()
558 da7e44ee Michael Hanselmann
    hostname = child_pids.pop(pid, "<unknown host>")
559 da7e44ee Michael Hanselmann
    print "  %s: done (in parent)" % hostname
560 da7e44ee Michael Hanselmann
    if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0:
561 da7e44ee Michael Hanselmann
      successes += 1
562 da7e44ee Michael Hanselmann
    else:
563 da7e44ee Michael Hanselmann
      failures += 1
564 da7e44ee Michael Hanselmann
    if hosts:
565 99a11adc Iustin Pop
      LaunchWorker(child_pids, logdir, username, password, use_agent,
566 6eedd356 Michael Hanselmann
                   hosts.pop(0), executable, exec_args, command, filelist)
567 da7e44ee Michael Hanselmann
568 da7e44ee Michael Hanselmann
  print
569 da7e44ee Michael Hanselmann
  print "All done, %s successful and %s failed hosts" % (successes, failures)
570 da7e44ee Michael Hanselmann
571 da7e44ee Michael Hanselmann
  sys.exit(0)
572 da7e44ee Michael Hanselmann
573 da7e44ee Michael Hanselmann
574 da7e44ee Michael Hanselmann
if __name__ == "__main__":
575 da7e44ee Michael Hanselmann
  try:
576 da7e44ee Michael Hanselmann
    main()
577 da7e44ee Michael Hanselmann
  except KeyboardInterrupt:
578 da7e44ee Michael Hanselmann
    print "Received KeyboardInterrupt, aborting"
579 da7e44ee Michael Hanselmann
    sys.exit(1)