root / tools / ganeti-listrunner @ e1ab08db
History | View | Annotate | Download (16.9 kB)
1 | da7e44ee | Michael Hanselmann | #!/usr/bin/python |
---|---|---|---|
2 | da7e44ee | Michael Hanselmann | # |
3 | da7e44ee | Michael Hanselmann | |
4 | da7e44ee | Michael Hanselmann | # Copyright (C) 2006, 2007, 2010 Google Inc. |
5 | da7e44ee | Michael Hanselmann | # |
6 | da7e44ee | Michael Hanselmann | # This program is free software; you can redistribute it and/or modify |
7 | da7e44ee | Michael Hanselmann | # it under the terms of the GNU General Public License as published by |
8 | da7e44ee | Michael Hanselmann | # the Free Software Foundation; either version 2 of the License, or |
9 | da7e44ee | Michael Hanselmann | # (at your option) any later version. |
10 | da7e44ee | Michael Hanselmann | # |
11 | da7e44ee | Michael Hanselmann | # This program is distributed in the hope that it will be useful, but |
12 | da7e44ee | Michael Hanselmann | # WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | da7e44ee | Michael Hanselmann | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | da7e44ee | Michael Hanselmann | # General Public License for more details. |
15 | da7e44ee | Michael Hanselmann | # |
16 | da7e44ee | Michael Hanselmann | # You should have received a copy of the GNU General Public License |
17 | da7e44ee | Michael Hanselmann | # along with this program; if not, write to the Free Software |
18 | da7e44ee | Michael Hanselmann | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
19 | da7e44ee | Michael Hanselmann | # 02110-1301, USA. |
20 | da7e44ee | Michael Hanselmann | |
21 | da7e44ee | Michael Hanselmann | """Run an executable on a list of hosts. |
22 | da7e44ee | Michael Hanselmann | |
23 | da7e44ee | Michael Hanselmann | Script to serially run an executable on a list of hosts via ssh |
24 | da7e44ee | Michael Hanselmann | with password auth as root. If the provided log dir does not yet |
25 | da7e44ee | Michael Hanselmann | exist, it will try to create it. |
26 | da7e44ee | Michael Hanselmann | |
27 | da7e44ee | Michael Hanselmann | Implementation: |
28 | da7e44ee | Michael Hanselmann | - the main process spawns up to batch_size children, which: |
29 | da7e44ee | Michael Hanselmann | - connects to the remote host via ssh as root |
30 | da7e44ee | Michael Hanselmann | - uploads the executable with a random name to /tmp via sftp |
31 | da7e44ee | Michael Hanselmann | - chmod 500s it |
32 | da7e44ee | Michael Hanselmann | - via ssh: chdirs into the upload directory and runs the script |
33 | da7e44ee | Michael Hanselmann | - deletes it |
34 | da7e44ee | Michael Hanselmann | - writes status messages and all output to one logfile per host |
35 | da7e44ee | Michael Hanselmann | - the main process gathers then the status of the children and |
36 | da7e44ee | Michael Hanselmann | reports the success/failure ratio |
37 | da7e44ee | Michael Hanselmann | - entire script can be aborted with Ctrl-C |
38 | da7e44ee | Michael Hanselmann | |
39 | da7e44ee | Michael Hanselmann | Security considerations: |
40 | da7e44ee | Michael Hanselmann | - the root password for the remote hosts is stored in memory for the |
41 | da7e44ee | Michael Hanselmann | runtime of the script |
42 | da7e44ee | Michael Hanselmann | - the executable to be run on the remote host is handled the following way: |
43 | da7e44ee | Michael Hanselmann | - try to create a random directory with permissions 700 on the |
44 | da7e44ee | Michael Hanselmann | remote host, abort furter processing on this host if this failes |
45 | da7e44ee | Michael Hanselmann | - upload the executable with to a random filename in that directory |
46 | da7e44ee | Michael Hanselmann | - set executable permissions to 500 |
47 | da7e44ee | Michael Hanselmann | - run the executable |
48 | da7e44ee | Michael Hanselmann | - delete the execuable and the directory on the remote host |
49 | da7e44ee | Michael Hanselmann | |
50 | da7e44ee | Michael Hanselmann | """ |
51 | da7e44ee | Michael Hanselmann | |
52 | da7e44ee | Michael Hanselmann | # pylint: disable-msg=C0103 |
53 | da7e44ee | Michael Hanselmann | # C0103: Invalid name ganeti-listrunner |
54 | da7e44ee | Michael Hanselmann | |
55 | da7e44ee | Michael Hanselmann | import errno |
56 | da7e44ee | Michael Hanselmann | import getopt |
57 | da7e44ee | Michael Hanselmann | import getpass |
58 | da7e44ee | Michael Hanselmann | import logging |
59 | da7e44ee | Michael Hanselmann | import os |
60 | da7e44ee | Michael Hanselmann | import random |
61 | da7e44ee | Michael Hanselmann | import select |
62 | da7e44ee | Michael Hanselmann | import socket |
63 | da7e44ee | Michael Hanselmann | import sys |
64 | da7e44ee | Michael Hanselmann | import time |
65 | da7e44ee | Michael Hanselmann | import traceback |
66 | da7e44ee | Michael Hanselmann | |
67 | da7e44ee | Michael Hanselmann | import paramiko |
68 | da7e44ee | Michael Hanselmann | |
69 | da7e44ee | Michael Hanselmann | |
70 | da7e44ee | Michael Hanselmann | REMOTE_PATH_BASE = "/tmp/listrunner" |
71 | da7e44ee | Michael Hanselmann | |
72 | da7e44ee | Michael Hanselmann | |
73 | da7e44ee | Michael Hanselmann | def LogDirUseable(logdir): |
74 | da7e44ee | Michael Hanselmann | """Ensure log file directory is available and usable.""" |
75 | da7e44ee | Michael Hanselmann | testfile = "%s/test-%s-%s.deleteme" % (logdir, random.random(), |
76 | da7e44ee | Michael Hanselmann | random.random()) |
77 | da7e44ee | Michael Hanselmann | try: |
78 | da7e44ee | Michael Hanselmann | os.mkdir(logdir) |
79 | da7e44ee | Michael Hanselmann | except OSError, err: |
80 | da7e44ee | Michael Hanselmann | if err.errno != errno.EEXIST: |
81 | da7e44ee | Michael Hanselmann | raise |
82 | da7e44ee | Michael Hanselmann | try: |
83 | da7e44ee | Michael Hanselmann | logtest = open(testfile, "aw") |
84 | da7e44ee | Michael Hanselmann | logtest.writelines("log file writeability test\n") |
85 | da7e44ee | Michael Hanselmann | logtest.close() |
86 | da7e44ee | Michael Hanselmann | os.unlink(testfile) |
87 | da7e44ee | Michael Hanselmann | return True |
88 | da7e44ee | Michael Hanselmann | except (OSError, IOError): |
89 | da7e44ee | Michael Hanselmann | return False |
90 | da7e44ee | Michael Hanselmann | |
91 | da7e44ee | Michael Hanselmann | |
92 | da7e44ee | Michael Hanselmann | def ShowHelp(executable): |
93 | da7e44ee | Michael Hanselmann | """Print short usage information.""" |
94 | da7e44ee | Michael Hanselmann | print ("usage: %s -l logdir [-c|-x] value [-b batch_size]" |
95 | da7e44ee | Michael Hanselmann | " [-f hostfile|-h hosts] [-u username]" |
96 | da7e44ee | Michael Hanselmann | " [-p password_file]" % executable) |
97 | da7e44ee | Michael Hanselmann | print """ -l logdir to write logfiles to |
98 | da7e44ee | Michael Hanselmann | -x executable to run on remote host(s) |
99 | da7e44ee | Michael Hanselmann | -c shell command to run on remote host(s) |
100 | da7e44ee | Michael Hanselmann | -f hostlist file (one host per line) |
101 | da7e44ee | Michael Hanselmann | -a optional auxiliary file to upload (can be given multiple times) |
102 | da7e44ee | Michael Hanselmann | -b batch-size, how many hosts to process in parallel [15] |
103 | da7e44ee | Michael Hanselmann | -h comma-separated list of hosts or single hostname |
104 | da7e44ee | Michael Hanselmann | -u username used to connect [root] |
105 | da7e44ee | Michael Hanselmann | -p password used to authenticate""" |
106 | da7e44ee | Michael Hanselmann | |
107 | da7e44ee | Michael Hanselmann | |
108 | da7e44ee | Michael Hanselmann | def GetTimeStamp(timestamp=None): |
109 | da7e44ee | Michael Hanselmann | """Return ISO8601 timestamp. |
110 | da7e44ee | Michael Hanselmann | |
111 | da7e44ee | Michael Hanselmann | Returns ISO8601 timestamp, optionally expects a time.localtime() tuple |
112 | da7e44ee | Michael Hanselmann | in timestamp, but will use the current time if this argument is not |
113 | da7e44ee | Michael Hanselmann | supplied. |
114 | da7e44ee | Michael Hanselmann | """ |
115 | da7e44ee | Michael Hanselmann | if timestamp is None: |
116 | da7e44ee | Michael Hanselmann | timestamp = time.localtime() |
117 | da7e44ee | Michael Hanselmann | |
118 | da7e44ee | Michael Hanselmann | isotime = time.strftime("%Y-%m-%dT%H:%M:%S", timestamp) |
119 | da7e44ee | Michael Hanselmann | return isotime |
120 | da7e44ee | Michael Hanselmann | |
121 | da7e44ee | Michael Hanselmann | |
122 | da7e44ee | Michael Hanselmann | def PingByTcp(target, port, timeout=10, live_port_needed=False, source=None): |
123 | da7e44ee | Michael Hanselmann | """Simple ping implementation using TCP connect(2). |
124 | da7e44ee | Michael Hanselmann | |
125 | da7e44ee | Michael Hanselmann | Try to do a TCP connect(2) from an optional source IP to the |
126 | da7e44ee | Michael Hanselmann | specified target IP and the specified target port. If the optional |
127 | da7e44ee | Michael Hanselmann | parameter live_port_needed is set to true, requires the remote end |
128 | da7e44ee | Michael Hanselmann | to accept the connection. The timeout is specified in seconds and |
129 | da7e44ee | Michael Hanselmann | defaults to 10 seconds. If the source optional argument is not |
130 | da7e44ee | Michael Hanselmann | passed, the source address selection is left to the kernel, |
131 | da7e44ee | Michael Hanselmann | otherwise we try to connect using the passed address (failures to |
132 | da7e44ee | Michael Hanselmann | bind other than EADDRNOTAVAIL will be ignored). |
133 | da7e44ee | Michael Hanselmann | |
134 | da7e44ee | Michael Hanselmann | """ |
135 | da7e44ee | Michael Hanselmann | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
136 | da7e44ee | Michael Hanselmann | |
137 | da7e44ee | Michael Hanselmann | success = False |
138 | da7e44ee | Michael Hanselmann | |
139 | da7e44ee | Michael Hanselmann | if source is not None: |
140 | da7e44ee | Michael Hanselmann | try: |
141 | da7e44ee | Michael Hanselmann | sock.bind((source, 0)) |
142 | da7e44ee | Michael Hanselmann | except socket.error, (errcode): |
143 | da7e44ee | Michael Hanselmann | if errcode == errno.EADDRNOTAVAIL: |
144 | da7e44ee | Michael Hanselmann | success = False |
145 | da7e44ee | Michael Hanselmann | |
146 | da7e44ee | Michael Hanselmann | sock.settimeout(timeout) |
147 | da7e44ee | Michael Hanselmann | |
148 | da7e44ee | Michael Hanselmann | try: |
149 | da7e44ee | Michael Hanselmann | sock.connect((target, port)) |
150 | da7e44ee | Michael Hanselmann | sock.close() |
151 | da7e44ee | Michael Hanselmann | success = True |
152 | da7e44ee | Michael Hanselmann | except socket.timeout: |
153 | da7e44ee | Michael Hanselmann | success = False |
154 | da7e44ee | Michael Hanselmann | except socket.error, (errcode): |
155 | da7e44ee | Michael Hanselmann | success = (not live_port_needed) and (errcode == errno.ECONNREFUSED) |
156 | da7e44ee | Michael Hanselmann | |
157 | da7e44ee | Michael Hanselmann | return success |
158 | da7e44ee | Michael Hanselmann | |
159 | da7e44ee | Michael Hanselmann | |
160 | da7e44ee | Michael Hanselmann | def GetHosts(hostsfile): |
161 | da7e44ee | Michael Hanselmann | """Return list of hosts from hostfile. |
162 | da7e44ee | Michael Hanselmann | |
163 | da7e44ee | Michael Hanselmann | Reads the hostslist file and returns a list of hosts. |
164 | da7e44ee | Michael Hanselmann | Expects the hostslist file to contain one hostname per line. |
165 | da7e44ee | Michael Hanselmann | |
166 | da7e44ee | Michael Hanselmann | """ |
167 | da7e44ee | Michael Hanselmann | try: |
168 | da7e44ee | Michael Hanselmann | datafile = open(hostsfile, "r") |
169 | da7e44ee | Michael Hanselmann | except IOError, msg: |
170 | da7e44ee | Michael Hanselmann | print "Failed to open hosts file %s: %s" % (hostsfile, msg) |
171 | da7e44ee | Michael Hanselmann | sys.exit(2) |
172 | da7e44ee | Michael Hanselmann | |
173 | da7e44ee | Michael Hanselmann | hosts = datafile.readlines() |
174 | da7e44ee | Michael Hanselmann | datafile.close() |
175 | da7e44ee | Michael Hanselmann | |
176 | da7e44ee | Michael Hanselmann | return hosts |
177 | da7e44ee | Michael Hanselmann | |
178 | da7e44ee | Michael Hanselmann | |
179 | da7e44ee | Michael Hanselmann | def WriteLog(message, logfile): |
180 | da7e44ee | Michael Hanselmann | """Writes message, terminated by newline, to logfile.""" |
181 | da7e44ee | Michael Hanselmann | try: |
182 | da7e44ee | Michael Hanselmann | logfile = open(logfile, "aw") |
183 | da7e44ee | Michael Hanselmann | except IOError, msg: |
184 | da7e44ee | Michael Hanselmann | print "failed to open log file %s: %s" % (logfile, msg) |
185 | da7e44ee | Michael Hanselmann | print "log message was: %s" % message |
186 | da7e44ee | Michael Hanselmann | sys.exit(1) # no being able to log is critical |
187 | da7e44ee | Michael Hanselmann | try: |
188 | da7e44ee | Michael Hanselmann | timestamp = GetTimeStamp() |
189 | da7e44ee | Michael Hanselmann | logfile.writelines("%s %s\n" % (timestamp, message)) |
190 | da7e44ee | Michael Hanselmann | logfile.close() |
191 | da7e44ee | Michael Hanselmann | except IOError, msg: |
192 | da7e44ee | Michael Hanselmann | print "failed to write to logfile %s: %s" % (logfile, msg) |
193 | da7e44ee | Michael Hanselmann | print "log message was: %s" % message |
194 | da7e44ee | Michael Hanselmann | sys.exit(1) # no being able to log is critical |
195 | da7e44ee | Michael Hanselmann | |
196 | da7e44ee | Michael Hanselmann | |
197 | da7e44ee | Michael Hanselmann | def GetAgentKeys(): |
198 | da7e44ee | Michael Hanselmann | """Tries to get a list of ssh keys from an agent.""" |
199 | da7e44ee | Michael Hanselmann | try: |
200 | da7e44ee | Michael Hanselmann | agent = paramiko.Agent() |
201 | da7e44ee | Michael Hanselmann | return list(agent.get_keys()) |
202 | da7e44ee | Michael Hanselmann | except paramiko.SSHException: |
203 | da7e44ee | Michael Hanselmann | return [] |
204 | da7e44ee | Michael Hanselmann | |
205 | da7e44ee | Michael Hanselmann | |
206 | da7e44ee | Michael Hanselmann | def SetupSshConnection(host, username, password, keys, logfile): |
207 | da7e44ee | Michael Hanselmann | """Setup the ssh connection used for all later steps. |
208 | da7e44ee | Michael Hanselmann | |
209 | da7e44ee | Michael Hanselmann | This function sets up the ssh connection that will be used both |
210 | da7e44ee | Michael Hanselmann | for upload and remote command execution. |
211 | da7e44ee | Michael Hanselmann | |
212 | da7e44ee | Michael Hanselmann | On success, it will return paramiko.Transport object with an |
213 | da7e44ee | Michael Hanselmann | already logged in session. On failure, False will be returned. |
214 | da7e44ee | Michael Hanselmann | |
215 | da7e44ee | Michael Hanselmann | """ |
216 | da7e44ee | Michael Hanselmann | # check if target is willing to talk to us at all |
217 | da7e44ee | Michael Hanselmann | if not PingByTcp(host, 22, live_port_needed=True): |
218 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_NOT_REACHABLE", logfile) |
219 | da7e44ee | Michael Hanselmann | print " - ERROR: host not reachable on 22/tcp" |
220 | da7e44ee | Michael Hanselmann | return False |
221 | da7e44ee | Michael Hanselmann | |
222 | da7e44ee | Michael Hanselmann | all_kwargs = [{"pkey": k} for k in keys] |
223 | da7e44ee | Michael Hanselmann | all_desc = ["key %d" % d for d in range(len(keys))] |
224 | da7e44ee | Michael Hanselmann | if password is not None: |
225 | da7e44ee | Michael Hanselmann | all_kwargs.append({"password": password}) |
226 | da7e44ee | Michael Hanselmann | all_desc.append("password") |
227 | da7e44ee | Michael Hanselmann | |
228 | da7e44ee | Michael Hanselmann | # deal with logging out of paramiko.transport |
229 | da7e44ee | Michael Hanselmann | handler = None |
230 | da7e44ee | Michael Hanselmann | |
231 | da7e44ee | Michael Hanselmann | for desc, kwargs in zip(all_desc, all_kwargs): |
232 | da7e44ee | Michael Hanselmann | try: |
233 | da7e44ee | Michael Hanselmann | transport = paramiko.Transport((host, 22)) |
234 | da7e44ee | Michael Hanselmann | |
235 | da7e44ee | Michael Hanselmann | # only try to setup the logging handler once |
236 | da7e44ee | Michael Hanselmann | if not handler: |
237 | da7e44ee | Michael Hanselmann | handler = logging.StreamHandler() |
238 | da7e44ee | Michael Hanselmann | handler.setLevel(logging.ERROR) |
239 | da7e44ee | Michael Hanselmann | log = logging.getLogger(transport.get_log_channel()) |
240 | da7e44ee | Michael Hanselmann | log.addHandler(handler) |
241 | da7e44ee | Michael Hanselmann | |
242 | da7e44ee | Michael Hanselmann | transport.connect(username=username, **kwargs) # pylint: disable-msg=W0142 |
243 | da7e44ee | Michael Hanselmann | WriteLog("ssh connection established using %s" % desc, logfile) |
244 | da7e44ee | Michael Hanselmann | # strange ... when establishing the session and the immediately |
245 | da7e44ee | Michael Hanselmann | # setting up the channels for sftp & shell from that, it sometimes |
246 | da7e44ee | Michael Hanselmann | # fails, but waiting 1 second after session setup makes it always work |
247 | da7e44ee | Michael Hanselmann | # time.sleep(1) |
248 | da7e44ee | Michael Hanselmann | # FIXME apparently needfull to give sshd some time |
249 | da7e44ee | Michael Hanselmann | return transport |
250 | da7e44ee | Michael Hanselmann | except (socket.gaierror, socket.error, paramiko.SSHException): |
251 | da7e44ee | Michael Hanselmann | continue |
252 | da7e44ee | Michael Hanselmann | |
253 | da7e44ee | Michael Hanselmann | methods = ", ".join(all_desc) |
254 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_CONNECTION_SETUP (tried %s) " % methods, logfile) |
255 | da7e44ee | Michael Hanselmann | WriteLog("aborted", logfile) |
256 | da7e44ee | Michael Hanselmann | print " - ERROR: connection setup failed (tried %s)" % methods |
257 | da7e44ee | Michael Hanselmann | |
258 | da7e44ee | Michael Hanselmann | return False |
259 | da7e44ee | Michael Hanselmann | |
260 | da7e44ee | Michael Hanselmann | |
261 | da7e44ee | Michael Hanselmann | def UploadFiles(connection, executable, filelist, logfile): |
262 | da7e44ee | Michael Hanselmann | """Uploads the specified files via sftp. |
263 | da7e44ee | Michael Hanselmann | |
264 | da7e44ee | Michael Hanselmann | Uploads the specified files to a random, freshly created directory with |
265 | da7e44ee | Michael Hanselmann | a temporary name under /tmp. All uploaded files are chmod 0400 after upload |
266 | da7e44ee | Michael Hanselmann | with the exception of executable, with is chmod 500. |
267 | da7e44ee | Michael Hanselmann | |
268 | da7e44ee | Michael Hanselmann | Upon success, returns the absolute path to the remote upload directory, |
269 | da7e44ee | Michael Hanselmann | but will return False upon failure. |
270 | da7e44ee | Michael Hanselmann | """ |
271 | da7e44ee | Michael Hanselmann | remote_dir = "%s.%s-%s" % (REMOTE_PATH_BASE, |
272 | da7e44ee | Michael Hanselmann | random.random(), random.random()) |
273 | da7e44ee | Michael Hanselmann | |
274 | da7e44ee | Michael Hanselmann | try: |
275 | da7e44ee | Michael Hanselmann | sftp = paramiko.SFTPClient.from_transport(connection) |
276 | da7e44ee | Michael Hanselmann | sftp.mkdir(remote_dir, mode=0700) |
277 | da7e44ee | Michael Hanselmann | for item in filelist: |
278 | da7e44ee | Michael Hanselmann | remote_file = "%s/%s" % (remote_dir, item.split("/").pop()) |
279 | da7e44ee | Michael Hanselmann | WriteLog("uploading %s to remote %s" % (item, remote_file), logfile) |
280 | da7e44ee | Michael Hanselmann | sftp.put(item, remote_file) |
281 | da7e44ee | Michael Hanselmann | if item == executable: |
282 | da7e44ee | Michael Hanselmann | sftp.chmod(remote_file, 0500) |
283 | da7e44ee | Michael Hanselmann | else: |
284 | da7e44ee | Michael Hanselmann | sftp.chmod(remote_file, 0400) |
285 | da7e44ee | Michael Hanselmann | sftp.close() |
286 | da7e44ee | Michael Hanselmann | except IOError, err: |
287 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_UPLOAD: %s" % err, logfile) |
288 | da7e44ee | Michael Hanselmann | return False |
289 | da7e44ee | Michael Hanselmann | |
290 | da7e44ee | Michael Hanselmann | return remote_dir |
291 | da7e44ee | Michael Hanselmann | |
292 | da7e44ee | Michael Hanselmann | |
293 | da7e44ee | Michael Hanselmann | def CleanupRemoteDir(connection, upload_dir, filelist, logfile): |
294 | da7e44ee | Michael Hanselmann | """Cleanes out and removes the remote work directory.""" |
295 | da7e44ee | Michael Hanselmann | try: |
296 | da7e44ee | Michael Hanselmann | sftp = paramiko.SFTPClient.from_transport(connection) |
297 | da7e44ee | Michael Hanselmann | for item in filelist: |
298 | da7e44ee | Michael Hanselmann | fullpath = "%s/%s" % (upload_dir, item.split("/").pop()) |
299 | da7e44ee | Michael Hanselmann | WriteLog("removing remote %s" % fullpath, logfile) |
300 | da7e44ee | Michael Hanselmann | sftp.remove(fullpath) |
301 | da7e44ee | Michael Hanselmann | sftp.rmdir(upload_dir) |
302 | da7e44ee | Michael Hanselmann | sftp.close() |
303 | da7e44ee | Michael Hanselmann | except IOError, err: |
304 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_CLEANUP: %s" % err, logfile) |
305 | da7e44ee | Michael Hanselmann | return False |
306 | da7e44ee | Michael Hanselmann | |
307 | da7e44ee | Michael Hanselmann | return True |
308 | da7e44ee | Michael Hanselmann | |
309 | da7e44ee | Michael Hanselmann | |
310 | da7e44ee | Michael Hanselmann | def RunRemoteCommand(connection, command, logfile): |
311 | da7e44ee | Michael Hanselmann | """Execute the command via ssh on the remote host.""" |
312 | da7e44ee | Michael Hanselmann | session = connection.open_session() |
313 | da7e44ee | Michael Hanselmann | session.setblocking(0) |
314 | da7e44ee | Michael Hanselmann | |
315 | da7e44ee | Michael Hanselmann | # the following dance is needed because paramiko changed APIs: |
316 | da7e44ee | Michael Hanselmann | # from returning True/False for success to always returning None |
317 | da7e44ee | Michael Hanselmann | # and throwing an exception in case of problems. |
318 | da7e44ee | Michael Hanselmann | # And I want to support both the old and the new API. |
319 | da7e44ee | Michael Hanselmann | result = True # being optimistic here, I know |
320 | da7e44ee | Michael Hanselmann | message = None |
321 | da7e44ee | Michael Hanselmann | try: |
322 | da7e44ee | Michael Hanselmann | if session.exec_command("%s 2>&1" % command) is False: |
323 | da7e44ee | Michael Hanselmann | result = False |
324 | da7e44ee | Michael Hanselmann | except paramiko.SSHException, message: |
325 | da7e44ee | Michael Hanselmann | result = False |
326 | da7e44ee | Michael Hanselmann | |
327 | da7e44ee | Michael Hanselmann | if not result: |
328 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_COMMAND_EXECUTION: %s" % message, logfile) |
329 | da7e44ee | Michael Hanselmann | return False |
330 | da7e44ee | Michael Hanselmann | |
331 | da7e44ee | Michael Hanselmann | ### Read when data is available |
332 | da7e44ee | Michael Hanselmann | output = "" |
333 | da7e44ee | Michael Hanselmann | while select.select([session], [], []): |
334 | da7e44ee | Michael Hanselmann | data = session.recv(1024) |
335 | da7e44ee | Michael Hanselmann | if not data: |
336 | da7e44ee | Michael Hanselmann | break |
337 | da7e44ee | Michael Hanselmann | output += data |
338 | da7e44ee | Michael Hanselmann | select.select([], [], [], .1) |
339 | da7e44ee | Michael Hanselmann | |
340 | da7e44ee | Michael Hanselmann | WriteLog("SUCCESS: command output follows", logfile) |
341 | da7e44ee | Michael Hanselmann | for line in output.split("\n"): |
342 | da7e44ee | Michael Hanselmann | WriteLog("output = %s" %line, logfile) |
343 | da7e44ee | Michael Hanselmann | WriteLog("command execution completed", logfile) |
344 | da7e44ee | Michael Hanselmann | session.close() |
345 | da7e44ee | Michael Hanselmann | |
346 | da7e44ee | Michael Hanselmann | return True |
347 | da7e44ee | Michael Hanselmann | |
348 | da7e44ee | Michael Hanselmann | |
349 | da7e44ee | Michael Hanselmann | def HostWorker(logdir, username, password, keys, hostname, |
350 | da7e44ee | Michael Hanselmann | executable, command, filelist): |
351 | da7e44ee | Michael Hanselmann | """Per-host worker. |
352 | da7e44ee | Michael Hanselmann | |
353 | da7e44ee | Michael Hanselmann | This function does not return - it's the main code of the childs, |
354 | da7e44ee | Michael Hanselmann | which exit at the end of this function. The exit code 0 or 1 will be |
355 | da7e44ee | Michael Hanselmann | interpreted by the parent. |
356 | da7e44ee | Michael Hanselmann | |
357 | 2a0f9372 | Michael Hanselmann | @param logdir: the directory where the logfiles must be created |
358 | 2a0f9372 | Michael Hanselmann | @param username: SSH username |
359 | 2a0f9372 | Michael Hanselmann | @param password: SSH password |
360 | 2a0f9372 | Michael Hanselmann | @param keys: SSH keys |
361 | 2a0f9372 | Michael Hanselmann | @param hostname: the hostname to connect to |
362 | 2a0f9372 | Michael Hanselmann | @param executable: the executable to upload, if not None |
363 | 2a0f9372 | Michael Hanselmann | @param command: the command to run |
364 | 2a0f9372 | Michael Hanselmann | @param filelist: auxiliary files to upload |
365 | da7e44ee | Michael Hanselmann | |
366 | da7e44ee | Michael Hanselmann | """ |
367 | da7e44ee | Michael Hanselmann | # in the child/worker process |
368 | da7e44ee | Michael Hanselmann | logfile = "%s/%s.log" % (logdir, hostname) |
369 | da7e44ee | Michael Hanselmann | print "%s - starting" % hostname |
370 | da7e44ee | Michael Hanselmann | result = 0 # optimism, I know |
371 | da7e44ee | Michael Hanselmann | try: |
372 | da7e44ee | Michael Hanselmann | connection = SetupSshConnection(hostname, username, |
373 | da7e44ee | Michael Hanselmann | password, keys, logfile) |
374 | da7e44ee | Michael Hanselmann | if connection is not False: |
375 | da7e44ee | Michael Hanselmann | if executable is not None: |
376 | da7e44ee | Michael Hanselmann | print " %s: uploading files" % hostname |
377 | da7e44ee | Michael Hanselmann | upload_dir = UploadFiles(connection, executable, |
378 | da7e44ee | Michael Hanselmann | filelist, logfile) |
379 | da7e44ee | Michael Hanselmann | command = "cd %s && ./%s" % (upload_dir, |
380 | da7e44ee | Michael Hanselmann | executable.split("/").pop()) |
381 | da7e44ee | Michael Hanselmann | print " %s: executing remote command" % hostname |
382 | da7e44ee | Michael Hanselmann | cmd_result = RunRemoteCommand(connection, command, logfile) |
383 | da7e44ee | Michael Hanselmann | if cmd_result is True: |
384 | da7e44ee | Michael Hanselmann | print " %s: remote command execution successful" % hostname |
385 | da7e44ee | Michael Hanselmann | else: |
386 | da7e44ee | Michael Hanselmann | print (" %s: remote command execution failed," |
387 | da7e44ee | Michael Hanselmann | " check log for details" % hostname) |
388 | da7e44ee | Michael Hanselmann | result = 1 |
389 | da7e44ee | Michael Hanselmann | if executable is not None: |
390 | da7e44ee | Michael Hanselmann | print " %s: cleaning up remote work dir" % hostname |
391 | da7e44ee | Michael Hanselmann | cln_result = CleanupRemoteDir(connection, upload_dir, |
392 | da7e44ee | Michael Hanselmann | filelist, logfile) |
393 | da7e44ee | Michael Hanselmann | if cln_result is False: |
394 | da7e44ee | Michael Hanselmann | print (" %s: remote work dir cleanup failed, check" |
395 | da7e44ee | Michael Hanselmann | " log for details" % hostname) |
396 | da7e44ee | Michael Hanselmann | result = 1 |
397 | da7e44ee | Michael Hanselmann | connection.close() |
398 | da7e44ee | Michael Hanselmann | else: |
399 | da7e44ee | Michael Hanselmann | print " %s: connection setup failed, skipping" % hostname |
400 | da7e44ee | Michael Hanselmann | result = 1 |
401 | da7e44ee | Michael Hanselmann | except KeyboardInterrupt: |
402 | da7e44ee | Michael Hanselmann | print " %s: received KeyboardInterrupt, aborting" % hostname |
403 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: ABORT_KEYBOARD_INTERRUPT", logfile) |
404 | da7e44ee | Michael Hanselmann | result = 1 |
405 | da7e44ee | Michael Hanselmann | except Exception, err: |
406 | da7e44ee | Michael Hanselmann | result = 1 |
407 | da7e44ee | Michael Hanselmann | trace = traceback.format_exc() |
408 | da7e44ee | Michael Hanselmann | msg = "ERROR: UNHANDLED_EXECPTION_ERROR: %s\nTrace: %s" % (err, trace) |
409 | da7e44ee | Michael Hanselmann | WriteLog(msg, logfile) |
410 | da7e44ee | Michael Hanselmann | print " %s: %s" % (hostname, msg) |
411 | da7e44ee | Michael Hanselmann | # and exit with exit code 0 or 1, so the parent can compute statistics |
412 | da7e44ee | Michael Hanselmann | sys.exit(result) |
413 | da7e44ee | Michael Hanselmann | |
414 | da7e44ee | Michael Hanselmann | |
415 | da7e44ee | Michael Hanselmann | def LaunchWorker(child_pids, logdir, username, password, keys, hostname, |
416 | da7e44ee | Michael Hanselmann | executable, command, filelist): |
417 | da7e44ee | Michael Hanselmann | """Launch the per-host worker. |
418 | da7e44ee | Michael Hanselmann | |
419 | da7e44ee | Michael Hanselmann | Arguments are the same as for HostWorker, except for child_pids, |
420 | da7e44ee | Michael Hanselmann | which is a dictionary holding the pid-to-hostname mapping. |
421 | da7e44ee | Michael Hanselmann | |
422 | da7e44ee | Michael Hanselmann | """ |
423 | da7e44ee | Michael Hanselmann | hostname = hostname.rstrip("\n") |
424 | da7e44ee | Michael Hanselmann | pid = os.fork() |
425 | da7e44ee | Michael Hanselmann | if pid > 0: |
426 | da7e44ee | Michael Hanselmann | # controller just record the pids |
427 | da7e44ee | Michael Hanselmann | child_pids[pid] = hostname |
428 | da7e44ee | Michael Hanselmann | else: |
429 | da7e44ee | Michael Hanselmann | HostWorker(logdir, username, password, keys, hostname, |
430 | da7e44ee | Michael Hanselmann | executable, command, filelist) |
431 | da7e44ee | Michael Hanselmann | |
432 | da7e44ee | Michael Hanselmann | |
433 | da7e44ee | Michael Hanselmann | def main(): |
434 | da7e44ee | Michael Hanselmann | """main.""" |
435 | da7e44ee | Michael Hanselmann | try: |
436 | da7e44ee | Michael Hanselmann | optlist, _ = getopt.getopt(sys.argv[1:], "l:x:h:f:a:c:b:u:p:A") |
437 | da7e44ee | Michael Hanselmann | except getopt.GetoptError, err: |
438 | da7e44ee | Michael Hanselmann | print str(err) |
439 | da7e44ee | Michael Hanselmann | ShowHelp(sys.argv[0]) |
440 | da7e44ee | Michael Hanselmann | sys.exit(2) |
441 | da7e44ee | Michael Hanselmann | |
442 | da7e44ee | Michael Hanselmann | logdir = executable = hostfile = hostlist = command = None |
443 | da7e44ee | Michael Hanselmann | use_agent = False |
444 | da7e44ee | Michael Hanselmann | auxfiles = [] |
445 | da7e44ee | Michael Hanselmann | username = "root" |
446 | da7e44ee | Michael Hanselmann | password = None |
447 | da7e44ee | Michael Hanselmann | batch_size = 15 |
448 | da7e44ee | Michael Hanselmann | for option in optlist: |
449 | da7e44ee | Michael Hanselmann | if option[0] == "-l": |
450 | da7e44ee | Michael Hanselmann | logdir = option[1] |
451 | da7e44ee | Michael Hanselmann | if option[0] == "-x": |
452 | da7e44ee | Michael Hanselmann | executable = option[1] |
453 | da7e44ee | Michael Hanselmann | if option[0] == "-f": |
454 | da7e44ee | Michael Hanselmann | hostfile = option[1] |
455 | da7e44ee | Michael Hanselmann | if option[0] == "-h": |
456 | da7e44ee | Michael Hanselmann | hostlist = option[1] |
457 | da7e44ee | Michael Hanselmann | if option[0] == "-a": |
458 | da7e44ee | Michael Hanselmann | auxfiles.append(option[1]) |
459 | da7e44ee | Michael Hanselmann | if option[0] == "-c": |
460 | da7e44ee | Michael Hanselmann | command = option[1] |
461 | da7e44ee | Michael Hanselmann | if option[0] == "-b": |
462 | da7e44ee | Michael Hanselmann | batch_size = int(option[1]) |
463 | da7e44ee | Michael Hanselmann | if option[0] == "-u": |
464 | da7e44ee | Michael Hanselmann | username = option[1] |
465 | da7e44ee | Michael Hanselmann | if option[0] == "-p": |
466 | da7e44ee | Michael Hanselmann | password = option[1] |
467 | da7e44ee | Michael Hanselmann | if option[0] == "-A": |
468 | da7e44ee | Michael Hanselmann | use_agent = True |
469 | da7e44ee | Michael Hanselmann | |
470 | da7e44ee | Michael Hanselmann | if not (logdir and (executable or command) and (hostfile or hostlist)): |
471 | da7e44ee | Michael Hanselmann | print "error: missing required commandline argument(s)" |
472 | da7e44ee | Michael Hanselmann | ShowHelp(sys.argv[0]) |
473 | da7e44ee | Michael Hanselmann | sys.exit(3) |
474 | da7e44ee | Michael Hanselmann | |
475 | da7e44ee | Michael Hanselmann | if executable and command: |
476 | da7e44ee | Michael Hanselmann | print "error: can run either a command or an executable, not both" |
477 | da7e44ee | Michael Hanselmann | ShowHelp(sys.argv[0]) |
478 | da7e44ee | Michael Hanselmann | sys.exit(3) |
479 | da7e44ee | Michael Hanselmann | |
480 | da7e44ee | Michael Hanselmann | if hostlist and hostfile: |
481 | da7e44ee | Michael Hanselmann | print "error: specify either -f or -h arguments, not both" |
482 | da7e44ee | Michael Hanselmann | ShowHelp(sys.argv[0]) |
483 | da7e44ee | Michael Hanselmann | sys.exit(3) |
484 | da7e44ee | Michael Hanselmann | |
485 | da7e44ee | Michael Hanselmann | ### Unbuffered sys.stdout |
486 | da7e44ee | Michael Hanselmann | sys.stdout = os.fdopen(1, "w", 0) |
487 | da7e44ee | Michael Hanselmann | |
488 | da7e44ee | Michael Hanselmann | if LogDirUseable(logdir) is False: |
489 | da7e44ee | Michael Hanselmann | print "ERROR: cannot create logfiles in dir %s, aborting" % logdir |
490 | da7e44ee | Michael Hanselmann | sys.exit(1) |
491 | da7e44ee | Michael Hanselmann | |
492 | da7e44ee | Michael Hanselmann | keys = [] |
493 | da7e44ee | Michael Hanselmann | if use_agent: |
494 | da7e44ee | Michael Hanselmann | keys = GetAgentKeys() |
495 | da7e44ee | Michael Hanselmann | elif password: |
496 | da7e44ee | Michael Hanselmann | try: |
497 | da7e44ee | Michael Hanselmann | fh = file(password) |
498 | da7e44ee | Michael Hanselmann | pwvalue = fh.readline().strip() |
499 | da7e44ee | Michael Hanselmann | fh.close() |
500 | da7e44ee | Michael Hanselmann | except IOError, e: |
501 | da7e44ee | Michael Hanselmann | print "error: can not read in from password file %s: %s" % (password, e) |
502 | da7e44ee | Michael Hanselmann | sys.exit(1) |
503 | da7e44ee | Michael Hanselmann | password = pwvalue |
504 | da7e44ee | Michael Hanselmann | else: |
505 | da7e44ee | Michael Hanselmann | password = getpass.getpass("%s's password for all nodes: " % username) |
506 | da7e44ee | Michael Hanselmann | |
507 | da7e44ee | Michael Hanselmann | if hostfile: |
508 | da7e44ee | Michael Hanselmann | hosts = GetHosts(hostfile) |
509 | da7e44ee | Michael Hanselmann | else: |
510 | da7e44ee | Michael Hanselmann | if "," in hostlist: |
511 | da7e44ee | Michael Hanselmann | hostlist = hostlist.rstrip(",") # commandline robustness |
512 | da7e44ee | Michael Hanselmann | hosts = hostlist.split(",") |
513 | da7e44ee | Michael Hanselmann | else: |
514 | da7e44ee | Michael Hanselmann | hosts = [hostlist] |
515 | da7e44ee | Michael Hanselmann | |
516 | da7e44ee | Michael Hanselmann | successes = failures = 0 |
517 | da7e44ee | Michael Hanselmann | |
518 | da7e44ee | Michael Hanselmann | filelist = auxfiles[:] |
519 | da7e44ee | Michael Hanselmann | filelist.append(executable) |
520 | da7e44ee | Michael Hanselmann | |
521 | da7e44ee | Michael Hanselmann | # initial batch |
522 | da7e44ee | Michael Hanselmann | batch = hosts[:batch_size] |
523 | da7e44ee | Michael Hanselmann | hosts = hosts[batch_size:] |
524 | da7e44ee | Michael Hanselmann | child_pids = {} |
525 | da7e44ee | Michael Hanselmann | for hostname in batch: |
526 | da7e44ee | Michael Hanselmann | LaunchWorker(child_pids, logdir, username, password, keys, hostname, |
527 | da7e44ee | Michael Hanselmann | executable, command, filelist) |
528 | da7e44ee | Michael Hanselmann | |
529 | da7e44ee | Michael Hanselmann | while child_pids: |
530 | da7e44ee | Michael Hanselmann | pid, status = os.wait() |
531 | da7e44ee | Michael Hanselmann | hostname = child_pids.pop(pid, "<unknown host>") |
532 | da7e44ee | Michael Hanselmann | print " %s: done (in parent)" % hostname |
533 | da7e44ee | Michael Hanselmann | if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0: |
534 | da7e44ee | Michael Hanselmann | successes += 1 |
535 | da7e44ee | Michael Hanselmann | else: |
536 | da7e44ee | Michael Hanselmann | failures += 1 |
537 | da7e44ee | Michael Hanselmann | if hosts: |
538 | da7e44ee | Michael Hanselmann | LaunchWorker(child_pids, logdir, username, password, keys, |
539 | da7e44ee | Michael Hanselmann | hosts.pop(0), executable, command, filelist) |
540 | da7e44ee | Michael Hanselmann | |
541 | da7e44ee | Michael Hanselmann | |
542 | da7e44ee | Michael Hanselmann | print "All done, %s successful and %s failed hosts" % (successes, failures) |
543 | da7e44ee | Michael Hanselmann | |
544 | da7e44ee | Michael Hanselmann | sys.exit(0) |
545 | da7e44ee | Michael Hanselmann | |
546 | da7e44ee | Michael Hanselmann | |
547 | da7e44ee | Michael Hanselmann | if __name__ == "__main__": |
548 | da7e44ee | Michael Hanselmann | try: |
549 | da7e44ee | Michael Hanselmann | main() |
550 | da7e44ee | Michael Hanselmann | except KeyboardInterrupt: |
551 | da7e44ee | Michael Hanselmann | print "Received KeyboardInterrupt, aborting" |
552 | da7e44ee | Michael Hanselmann | sys.exit(1) |