root / tools / ganeti-listrunner @ 18397489
History | View | Annotate | Download (18.7 kB)
1 | da7e44ee | Michael Hanselmann | #!/usr/bin/python |
---|---|---|---|
2 | da7e44ee | Michael Hanselmann | # |
3 | da7e44ee | Michael Hanselmann | |
4 | 99a11adc | Iustin Pop | # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. |
5 | da7e44ee | Michael Hanselmann | # |
6 | da7e44ee | Michael Hanselmann | # This program is free software; you can redistribute it and/or modify |
7 | da7e44ee | Michael Hanselmann | # it under the terms of the GNU General Public License as published by |
8 | da7e44ee | Michael Hanselmann | # the Free Software Foundation; either version 2 of the License, or |
9 | da7e44ee | Michael Hanselmann | # (at your option) any later version. |
10 | da7e44ee | Michael Hanselmann | # |
11 | da7e44ee | Michael Hanselmann | # This program is distributed in the hope that it will be useful, but |
12 | da7e44ee | Michael Hanselmann | # WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | da7e44ee | Michael Hanselmann | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | da7e44ee | Michael Hanselmann | # General Public License for more details. |
15 | da7e44ee | Michael Hanselmann | # |
16 | da7e44ee | Michael Hanselmann | # You should have received a copy of the GNU General Public License |
17 | da7e44ee | Michael Hanselmann | # along with this program; if not, write to the Free Software |
18 | da7e44ee | Michael Hanselmann | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
19 | da7e44ee | Michael Hanselmann | # 02110-1301, USA. |
20 | da7e44ee | Michael Hanselmann | |
21 | da7e44ee | Michael Hanselmann | """Run an executable on a list of hosts. |
22 | da7e44ee | Michael Hanselmann | |
23 | da7e44ee | Michael Hanselmann | Script to serially run an executable on a list of hosts via ssh |
24 | da7e44ee | Michael Hanselmann | with password auth as root. If the provided log dir does not yet |
25 | da7e44ee | Michael Hanselmann | exist, it will try to create it. |
26 | da7e44ee | Michael Hanselmann | |
27 | da7e44ee | Michael Hanselmann | Implementation: |
28 | da7e44ee | Michael Hanselmann | - the main process spawns up to batch_size children, which: |
29 | da7e44ee | Michael Hanselmann | - connects to the remote host via ssh as root |
30 | da7e44ee | Michael Hanselmann | - uploads the executable with a random name to /tmp via sftp |
31 | da7e44ee | Michael Hanselmann | - chmod 500s it |
32 | da7e44ee | Michael Hanselmann | - via ssh: chdirs into the upload directory and runs the script |
33 | da7e44ee | Michael Hanselmann | - deletes it |
34 | da7e44ee | Michael Hanselmann | - writes status messages and all output to one logfile per host |
35 | da7e44ee | Michael Hanselmann | - the main process gathers then the status of the children and |
36 | da7e44ee | Michael Hanselmann | reports the success/failure ratio |
37 | da7e44ee | Michael Hanselmann | - entire script can be aborted with Ctrl-C |
38 | da7e44ee | Michael Hanselmann | |
39 | da7e44ee | Michael Hanselmann | Security considerations: |
40 | da7e44ee | Michael Hanselmann | - the root password for the remote hosts is stored in memory for the |
41 | da7e44ee | Michael Hanselmann | runtime of the script |
42 | da7e44ee | Michael Hanselmann | - the executable to be run on the remote host is handled the following way: |
43 | da7e44ee | Michael Hanselmann | - try to create a random directory with permissions 700 on the |
44 | da7e44ee | Michael Hanselmann | remote host, abort furter processing on this host if this failes |
45 | da7e44ee | Michael Hanselmann | - upload the executable with to a random filename in that directory |
46 | da7e44ee | Michael Hanselmann | - set executable permissions to 500 |
47 | da7e44ee | Michael Hanselmann | - run the executable |
48 | da7e44ee | Michael Hanselmann | - delete the execuable and the directory on the remote host |
49 | da7e44ee | Michael Hanselmann | |
50 | da7e44ee | Michael Hanselmann | """ |
51 | da7e44ee | Michael Hanselmann | |
52 | b459a848 | Andrea Spadaccini | # pylint: disable=C0103 |
53 | da7e44ee | Michael Hanselmann | # C0103: Invalid name ganeti-listrunner |
54 | da7e44ee | Michael Hanselmann | |
55 | da7e44ee | Michael Hanselmann | import errno |
56 | b74c0684 | Iustin Pop | import optparse |
57 | da7e44ee | Michael Hanselmann | import getpass |
58 | da7e44ee | Michael Hanselmann | import logging |
59 | da7e44ee | Michael Hanselmann | import os |
60 | da7e44ee | Michael Hanselmann | import random |
61 | da7e44ee | Michael Hanselmann | import select |
62 | da7e44ee | Michael Hanselmann | import socket |
63 | da7e44ee | Michael Hanselmann | import sys |
64 | da7e44ee | Michael Hanselmann | import time |
65 | da7e44ee | Michael Hanselmann | import traceback |
66 | da7e44ee | Michael Hanselmann | |
67 | 78062de9 | Michael Hanselmann | try: |
68 | 78062de9 | Michael Hanselmann | import paramiko |
69 | 78062de9 | Michael Hanselmann | except ImportError: |
70 | 78062de9 | Michael Hanselmann | print >> sys.stderr, \ |
71 | 78062de9 | Michael Hanselmann | ("The \"paramiko\" module could not be imported. Install it from your" |
72 | 78062de9 | Michael Hanselmann | " distribution's repository. The package is usually named" |
73 | 78062de9 | Michael Hanselmann | " \"python-paramiko\".") |
74 | 78062de9 | Michael Hanselmann | sys.exit(1) |
75 | da7e44ee | Michael Hanselmann | |
76 | da7e44ee | Michael Hanselmann | |
77 | da7e44ee | Michael Hanselmann | REMOTE_PATH_BASE = "/tmp/listrunner" |
78 | da7e44ee | Michael Hanselmann | |
79 | b74c0684 | Iustin Pop | USAGE = ("%prog -l logdir {-c command | -x /path/to/file} [-b batch_size]" |
80 | b74c0684 | Iustin Pop | " {-f hostfile|-h hosts} [-u username]" |
81 | b74c0684 | Iustin Pop | " [-p password_file | -A]") |
82 | b74c0684 | Iustin Pop | |
83 | da7e44ee | Michael Hanselmann | |
84 | da7e44ee | Michael Hanselmann | def LogDirUseable(logdir): |
85 | da7e44ee | Michael Hanselmann | """Ensure log file directory is available and usable.""" |
86 | da7e44ee | Michael Hanselmann | testfile = "%s/test-%s-%s.deleteme" % (logdir, random.random(), |
87 | da7e44ee | Michael Hanselmann | random.random()) |
88 | da7e44ee | Michael Hanselmann | try: |
89 | da7e44ee | Michael Hanselmann | os.mkdir(logdir) |
90 | da7e44ee | Michael Hanselmann | except OSError, err: |
91 | da7e44ee | Michael Hanselmann | if err.errno != errno.EEXIST: |
92 | da7e44ee | Michael Hanselmann | raise |
93 | da7e44ee | Michael Hanselmann | try: |
94 | da7e44ee | Michael Hanselmann | logtest = open(testfile, "aw") |
95 | da7e44ee | Michael Hanselmann | logtest.writelines("log file writeability test\n") |
96 | da7e44ee | Michael Hanselmann | logtest.close() |
97 | da7e44ee | Michael Hanselmann | os.unlink(testfile) |
98 | da7e44ee | Michael Hanselmann | return True |
99 | da7e44ee | Michael Hanselmann | except (OSError, IOError): |
100 | da7e44ee | Michael Hanselmann | return False |
101 | da7e44ee | Michael Hanselmann | |
102 | da7e44ee | Michael Hanselmann | |
103 | da7e44ee | Michael Hanselmann | def GetTimeStamp(timestamp=None): |
104 | da7e44ee | Michael Hanselmann | """Return ISO8601 timestamp. |
105 | da7e44ee | Michael Hanselmann | |
106 | da7e44ee | Michael Hanselmann | Returns ISO8601 timestamp, optionally expects a time.localtime() tuple |
107 | da7e44ee | Michael Hanselmann | in timestamp, but will use the current time if this argument is not |
108 | da7e44ee | Michael Hanselmann | supplied. |
109 | da7e44ee | Michael Hanselmann | """ |
110 | da7e44ee | Michael Hanselmann | if timestamp is None: |
111 | da7e44ee | Michael Hanselmann | timestamp = time.localtime() |
112 | da7e44ee | Michael Hanselmann | |
113 | da7e44ee | Michael Hanselmann | isotime = time.strftime("%Y-%m-%dT%H:%M:%S", timestamp) |
114 | da7e44ee | Michael Hanselmann | return isotime |
115 | da7e44ee | Michael Hanselmann | |
116 | da7e44ee | Michael Hanselmann | |
117 | da7e44ee | Michael Hanselmann | def PingByTcp(target, port, timeout=10, live_port_needed=False, source=None): |
118 | da7e44ee | Michael Hanselmann | """Simple ping implementation using TCP connect(2). |
119 | da7e44ee | Michael Hanselmann | |
120 | da7e44ee | Michael Hanselmann | Try to do a TCP connect(2) from an optional source IP to the |
121 | da7e44ee | Michael Hanselmann | specified target IP and the specified target port. If the optional |
122 | da7e44ee | Michael Hanselmann | parameter live_port_needed is set to true, requires the remote end |
123 | da7e44ee | Michael Hanselmann | to accept the connection. The timeout is specified in seconds and |
124 | da7e44ee | Michael Hanselmann | defaults to 10 seconds. If the source optional argument is not |
125 | da7e44ee | Michael Hanselmann | passed, the source address selection is left to the kernel, |
126 | da7e44ee | Michael Hanselmann | otherwise we try to connect using the passed address (failures to |
127 | da7e44ee | Michael Hanselmann | bind other than EADDRNOTAVAIL will be ignored). |
128 | da7e44ee | Michael Hanselmann | |
129 | da7e44ee | Michael Hanselmann | """ |
130 | da7e44ee | Michael Hanselmann | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
131 | da7e44ee | Michael Hanselmann | |
132 | da7e44ee | Michael Hanselmann | success = False |
133 | da7e44ee | Michael Hanselmann | |
134 | da7e44ee | Michael Hanselmann | if source is not None: |
135 | da7e44ee | Michael Hanselmann | try: |
136 | da7e44ee | Michael Hanselmann | sock.bind((source, 0)) |
137 | da7e44ee | Michael Hanselmann | except socket.error, (errcode): |
138 | da7e44ee | Michael Hanselmann | if errcode == errno.EADDRNOTAVAIL: |
139 | da7e44ee | Michael Hanselmann | success = False |
140 | da7e44ee | Michael Hanselmann | |
141 | da7e44ee | Michael Hanselmann | sock.settimeout(timeout) |
142 | da7e44ee | Michael Hanselmann | |
143 | da7e44ee | Michael Hanselmann | try: |
144 | da7e44ee | Michael Hanselmann | sock.connect((target, port)) |
145 | da7e44ee | Michael Hanselmann | sock.close() |
146 | da7e44ee | Michael Hanselmann | success = True |
147 | da7e44ee | Michael Hanselmann | except socket.timeout: |
148 | da7e44ee | Michael Hanselmann | success = False |
149 | da7e44ee | Michael Hanselmann | except socket.error, (errcode): |
150 | da7e44ee | Michael Hanselmann | success = (not live_port_needed) and (errcode == errno.ECONNREFUSED) |
151 | da7e44ee | Michael Hanselmann | |
152 | da7e44ee | Michael Hanselmann | return success |
153 | da7e44ee | Michael Hanselmann | |
154 | da7e44ee | Michael Hanselmann | |
155 | da7e44ee | Michael Hanselmann | def GetHosts(hostsfile): |
156 | da7e44ee | Michael Hanselmann | """Return list of hosts from hostfile. |
157 | da7e44ee | Michael Hanselmann | |
158 | da7e44ee | Michael Hanselmann | Reads the hostslist file and returns a list of hosts. |
159 | da7e44ee | Michael Hanselmann | Expects the hostslist file to contain one hostname per line. |
160 | da7e44ee | Michael Hanselmann | |
161 | da7e44ee | Michael Hanselmann | """ |
162 | da7e44ee | Michael Hanselmann | try: |
163 | da7e44ee | Michael Hanselmann | datafile = open(hostsfile, "r") |
164 | da7e44ee | Michael Hanselmann | except IOError, msg: |
165 | da7e44ee | Michael Hanselmann | print "Failed to open hosts file %s: %s" % (hostsfile, msg) |
166 | da7e44ee | Michael Hanselmann | sys.exit(2) |
167 | da7e44ee | Michael Hanselmann | |
168 | da7e44ee | Michael Hanselmann | hosts = datafile.readlines() |
169 | da7e44ee | Michael Hanselmann | datafile.close() |
170 | da7e44ee | Michael Hanselmann | |
171 | da7e44ee | Michael Hanselmann | return hosts |
172 | da7e44ee | Michael Hanselmann | |
173 | da7e44ee | Michael Hanselmann | |
174 | da7e44ee | Michael Hanselmann | def WriteLog(message, logfile): |
175 | da7e44ee | Michael Hanselmann | """Writes message, terminated by newline, to logfile.""" |
176 | da7e44ee | Michael Hanselmann | try: |
177 | da7e44ee | Michael Hanselmann | logfile = open(logfile, "aw") |
178 | da7e44ee | Michael Hanselmann | except IOError, msg: |
179 | da7e44ee | Michael Hanselmann | print "failed to open log file %s: %s" % (logfile, msg) |
180 | da7e44ee | Michael Hanselmann | print "log message was: %s" % message |
181 | da7e44ee | Michael Hanselmann | sys.exit(1) # no being able to log is critical |
182 | da7e44ee | Michael Hanselmann | try: |
183 | da7e44ee | Michael Hanselmann | timestamp = GetTimeStamp() |
184 | da7e44ee | Michael Hanselmann | logfile.writelines("%s %s\n" % (timestamp, message)) |
185 | da7e44ee | Michael Hanselmann | logfile.close() |
186 | da7e44ee | Michael Hanselmann | except IOError, msg: |
187 | da7e44ee | Michael Hanselmann | print "failed to write to logfile %s: %s" % (logfile, msg) |
188 | da7e44ee | Michael Hanselmann | print "log message was: %s" % message |
189 | da7e44ee | Michael Hanselmann | sys.exit(1) # no being able to log is critical |
190 | da7e44ee | Michael Hanselmann | |
191 | da7e44ee | Michael Hanselmann | |
192 | da7e44ee | Michael Hanselmann | def GetAgentKeys(): |
193 | da7e44ee | Michael Hanselmann | """Tries to get a list of ssh keys from an agent.""" |
194 | da7e44ee | Michael Hanselmann | try: |
195 | da7e44ee | Michael Hanselmann | agent = paramiko.Agent() |
196 | da7e44ee | Michael Hanselmann | return list(agent.get_keys()) |
197 | da7e44ee | Michael Hanselmann | except paramiko.SSHException: |
198 | da7e44ee | Michael Hanselmann | return [] |
199 | da7e44ee | Michael Hanselmann | |
200 | da7e44ee | Michael Hanselmann | |
201 | 99a11adc | Iustin Pop | def SetupSshConnection(host, username, password, use_agent, logfile): |
202 | da7e44ee | Michael Hanselmann | """Setup the ssh connection used for all later steps. |
203 | da7e44ee | Michael Hanselmann | |
204 | da7e44ee | Michael Hanselmann | This function sets up the ssh connection that will be used both |
205 | da7e44ee | Michael Hanselmann | for upload and remote command execution. |
206 | da7e44ee | Michael Hanselmann | |
207 | da7e44ee | Michael Hanselmann | On success, it will return paramiko.Transport object with an |
208 | da7e44ee | Michael Hanselmann | already logged in session. On failure, False will be returned. |
209 | da7e44ee | Michael Hanselmann | |
210 | da7e44ee | Michael Hanselmann | """ |
211 | da7e44ee | Michael Hanselmann | # check if target is willing to talk to us at all |
212 | da7e44ee | Michael Hanselmann | if not PingByTcp(host, 22, live_port_needed=True): |
213 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_NOT_REACHABLE", logfile) |
214 | da7e44ee | Michael Hanselmann | print " - ERROR: host not reachable on 22/tcp" |
215 | da7e44ee | Michael Hanselmann | return False |
216 | da7e44ee | Michael Hanselmann | |
217 | 99a11adc | Iustin Pop | if use_agent: |
218 | 99a11adc | Iustin Pop | keys = GetAgentKeys() |
219 | 99a11adc | Iustin Pop | else: |
220 | 99a11adc | Iustin Pop | keys = [] |
221 | da7e44ee | Michael Hanselmann | all_kwargs = [{"pkey": k} for k in keys] |
222 | da7e44ee | Michael Hanselmann | all_desc = ["key %d" % d for d in range(len(keys))] |
223 | da7e44ee | Michael Hanselmann | if password is not None: |
224 | da7e44ee | Michael Hanselmann | all_kwargs.append({"password": password}) |
225 | da7e44ee | Michael Hanselmann | all_desc.append("password") |
226 | da7e44ee | Michael Hanselmann | |
227 | da7e44ee | Michael Hanselmann | # deal with logging out of paramiko.transport |
228 | da7e44ee | Michael Hanselmann | handler = None |
229 | da7e44ee | Michael Hanselmann | |
230 | da7e44ee | Michael Hanselmann | for desc, kwargs in zip(all_desc, all_kwargs): |
231 | da7e44ee | Michael Hanselmann | try: |
232 | da7e44ee | Michael Hanselmann | transport = paramiko.Transport((host, 22)) |
233 | da7e44ee | Michael Hanselmann | |
234 | da7e44ee | Michael Hanselmann | # only try to setup the logging handler once |
235 | da7e44ee | Michael Hanselmann | if not handler: |
236 | da7e44ee | Michael Hanselmann | handler = logging.StreamHandler() |
237 | da7e44ee | Michael Hanselmann | handler.setLevel(logging.ERROR) |
238 | da7e44ee | Michael Hanselmann | log = logging.getLogger(transport.get_log_channel()) |
239 | da7e44ee | Michael Hanselmann | log.addHandler(handler) |
240 | da7e44ee | Michael Hanselmann | |
241 | b459a848 | Andrea Spadaccini | transport.connect(username=username, **kwargs) # pylint: disable=W0142 |
242 | da7e44ee | Michael Hanselmann | WriteLog("ssh connection established using %s" % desc, logfile) |
243 | da7e44ee | Michael Hanselmann | # strange ... when establishing the session and the immediately |
244 | da7e44ee | Michael Hanselmann | # setting up the channels for sftp & shell from that, it sometimes |
245 | da7e44ee | Michael Hanselmann | # fails, but waiting 1 second after session setup makes it always work |
246 | da7e44ee | Michael Hanselmann | # time.sleep(1) |
247 | da7e44ee | Michael Hanselmann | # FIXME apparently needfull to give sshd some time |
248 | da7e44ee | Michael Hanselmann | return transport |
249 | da7e44ee | Michael Hanselmann | except (socket.gaierror, socket.error, paramiko.SSHException): |
250 | da7e44ee | Michael Hanselmann | continue |
251 | da7e44ee | Michael Hanselmann | |
252 | da7e44ee | Michael Hanselmann | methods = ", ".join(all_desc) |
253 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_CONNECTION_SETUP (tried %s) " % methods, logfile) |
254 | da7e44ee | Michael Hanselmann | WriteLog("aborted", logfile) |
255 | da7e44ee | Michael Hanselmann | print " - ERROR: connection setup failed (tried %s)" % methods |
256 | da7e44ee | Michael Hanselmann | |
257 | da7e44ee | Michael Hanselmann | return False |
258 | da7e44ee | Michael Hanselmann | |
259 | da7e44ee | Michael Hanselmann | |
260 | da7e44ee | Michael Hanselmann | def UploadFiles(connection, executable, filelist, logfile): |
261 | da7e44ee | Michael Hanselmann | """Uploads the specified files via sftp. |
262 | da7e44ee | Michael Hanselmann | |
263 | da7e44ee | Michael Hanselmann | Uploads the specified files to a random, freshly created directory with |
264 | da7e44ee | Michael Hanselmann | a temporary name under /tmp. All uploaded files are chmod 0400 after upload |
265 | da7e44ee | Michael Hanselmann | with the exception of executable, with is chmod 500. |
266 | da7e44ee | Michael Hanselmann | |
267 | da7e44ee | Michael Hanselmann | Upon success, returns the absolute path to the remote upload directory, |
268 | da7e44ee | Michael Hanselmann | but will return False upon failure. |
269 | da7e44ee | Michael Hanselmann | """ |
270 | da7e44ee | Michael Hanselmann | remote_dir = "%s.%s-%s" % (REMOTE_PATH_BASE, |
271 | da7e44ee | Michael Hanselmann | random.random(), random.random()) |
272 | da7e44ee | Michael Hanselmann | |
273 | da7e44ee | Michael Hanselmann | try: |
274 | da7e44ee | Michael Hanselmann | sftp = paramiko.SFTPClient.from_transport(connection) |
275 | da7e44ee | Michael Hanselmann | sftp.mkdir(remote_dir, mode=0700) |
276 | da7e44ee | Michael Hanselmann | for item in filelist: |
277 | 2c094917 | Michael Hanselmann | remote_file = "%s/%s" % (remote_dir, os.path.basename(item)) |
278 | da7e44ee | Michael Hanselmann | WriteLog("uploading %s to remote %s" % (item, remote_file), logfile) |
279 | da7e44ee | Michael Hanselmann | sftp.put(item, remote_file) |
280 | da7e44ee | Michael Hanselmann | if item == executable: |
281 | da7e44ee | Michael Hanselmann | sftp.chmod(remote_file, 0500) |
282 | da7e44ee | Michael Hanselmann | else: |
283 | da7e44ee | Michael Hanselmann | sftp.chmod(remote_file, 0400) |
284 | da7e44ee | Michael Hanselmann | sftp.close() |
285 | da7e44ee | Michael Hanselmann | except IOError, err: |
286 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_UPLOAD: %s" % err, logfile) |
287 | da7e44ee | Michael Hanselmann | return False |
288 | da7e44ee | Michael Hanselmann | |
289 | da7e44ee | Michael Hanselmann | return remote_dir |
290 | da7e44ee | Michael Hanselmann | |
291 | da7e44ee | Michael Hanselmann | |
292 | da7e44ee | Michael Hanselmann | def CleanupRemoteDir(connection, upload_dir, filelist, logfile): |
293 | da7e44ee | Michael Hanselmann | """Cleanes out and removes the remote work directory.""" |
294 | da7e44ee | Michael Hanselmann | try: |
295 | da7e44ee | Michael Hanselmann | sftp = paramiko.SFTPClient.from_transport(connection) |
296 | da7e44ee | Michael Hanselmann | for item in filelist: |
297 | 2c094917 | Michael Hanselmann | fullpath = "%s/%s" % (upload_dir, os.path.basename(item)) |
298 | da7e44ee | Michael Hanselmann | WriteLog("removing remote %s" % fullpath, logfile) |
299 | da7e44ee | Michael Hanselmann | sftp.remove(fullpath) |
300 | da7e44ee | Michael Hanselmann | sftp.rmdir(upload_dir) |
301 | da7e44ee | Michael Hanselmann | sftp.close() |
302 | da7e44ee | Michael Hanselmann | except IOError, err: |
303 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_CLEANUP: %s" % err, logfile) |
304 | da7e44ee | Michael Hanselmann | return False |
305 | da7e44ee | Michael Hanselmann | |
306 | da7e44ee | Michael Hanselmann | return True |
307 | da7e44ee | Michael Hanselmann | |
308 | da7e44ee | Michael Hanselmann | |
309 | da7e44ee | Michael Hanselmann | def RunRemoteCommand(connection, command, logfile): |
310 | da7e44ee | Michael Hanselmann | """Execute the command via ssh on the remote host.""" |
311 | da7e44ee | Michael Hanselmann | session = connection.open_session() |
312 | da7e44ee | Michael Hanselmann | session.setblocking(0) |
313 | da7e44ee | Michael Hanselmann | |
314 | da7e44ee | Michael Hanselmann | # the following dance is needed because paramiko changed APIs: |
315 | da7e44ee | Michael Hanselmann | # from returning True/False for success to always returning None |
316 | da7e44ee | Michael Hanselmann | # and throwing an exception in case of problems. |
317 | da7e44ee | Michael Hanselmann | # And I want to support both the old and the new API. |
318 | da7e44ee | Michael Hanselmann | result = True # being optimistic here, I know |
319 | da7e44ee | Michael Hanselmann | message = None |
320 | da7e44ee | Michael Hanselmann | try: |
321 | da7e44ee | Michael Hanselmann | if session.exec_command("%s 2>&1" % command) is False: |
322 | da7e44ee | Michael Hanselmann | result = False |
323 | da7e44ee | Michael Hanselmann | except paramiko.SSHException, message: |
324 | da7e44ee | Michael Hanselmann | result = False |
325 | da7e44ee | Michael Hanselmann | |
326 | da7e44ee | Michael Hanselmann | if not result: |
327 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: FAILURE_COMMAND_EXECUTION: %s" % message, logfile) |
328 | da7e44ee | Michael Hanselmann | return False |
329 | da7e44ee | Michael Hanselmann | |
330 | da7e44ee | Michael Hanselmann | ### Read when data is available |
331 | da7e44ee | Michael Hanselmann | output = "" |
332 | da7e44ee | Michael Hanselmann | while select.select([session], [], []): |
333 | 6bc1c168 | Michael Hanselmann | try: |
334 | 6bc1c168 | Michael Hanselmann | data = session.recv(1024) |
335 | 6bc1c168 | Michael Hanselmann | except socket.timeout, err: |
336 | 6bc1c168 | Michael Hanselmann | data = None |
337 | 6bc1c168 | Michael Hanselmann | WriteLog("FAILED: socket.timeout %s" % err, logfile) |
338 | 6bc1c168 | Michael Hanselmann | except socket.error, err: |
339 | 6bc1c168 | Michael Hanselmann | data = None |
340 | 6bc1c168 | Michael Hanselmann | WriteLog("FAILED: socket.error %s" % err, logfile) |
341 | da7e44ee | Michael Hanselmann | if not data: |
342 | da7e44ee | Michael Hanselmann | break |
343 | da7e44ee | Michael Hanselmann | output += data |
344 | da7e44ee | Michael Hanselmann | select.select([], [], [], .1) |
345 | da7e44ee | Michael Hanselmann | |
346 | da7e44ee | Michael Hanselmann | WriteLog("SUCCESS: command output follows", logfile) |
347 | 2c094917 | Michael Hanselmann | for line in output.splitlines(): |
348 | e687ec01 | Michael Hanselmann | WriteLog("output = %s" % line, logfile) |
349 | da7e44ee | Michael Hanselmann | WriteLog("command execution completed", logfile) |
350 | da7e44ee | Michael Hanselmann | session.close() |
351 | da7e44ee | Michael Hanselmann | |
352 | da7e44ee | Michael Hanselmann | return True |
353 | da7e44ee | Michael Hanselmann | |
354 | da7e44ee | Michael Hanselmann | |
355 | 99a11adc | Iustin Pop | def HostWorker(logdir, username, password, use_agent, hostname, |
356 | 6eedd356 | Michael Hanselmann | executable, exec_args, command, filelist): |
357 | da7e44ee | Michael Hanselmann | """Per-host worker. |
358 | da7e44ee | Michael Hanselmann | |
359 | da7e44ee | Michael Hanselmann | This function does not return - it's the main code of the childs, |
360 | da7e44ee | Michael Hanselmann | which exit at the end of this function. The exit code 0 or 1 will be |
361 | da7e44ee | Michael Hanselmann | interpreted by the parent. |
362 | da7e44ee | Michael Hanselmann | |
363 | 2a0f9372 | Michael Hanselmann | @param logdir: the directory where the logfiles must be created |
364 | 2a0f9372 | Michael Hanselmann | @param username: SSH username |
365 | 2a0f9372 | Michael Hanselmann | @param password: SSH password |
366 | 99a11adc | Iustin Pop | @param use_agent: whether we should instead use an agent |
367 | 2a0f9372 | Michael Hanselmann | @param hostname: the hostname to connect to |
368 | 2a0f9372 | Michael Hanselmann | @param executable: the executable to upload, if not None |
369 | 6eedd356 | Michael Hanselmann | @param exec_args: Additional arguments for executable |
370 | 2a0f9372 | Michael Hanselmann | @param command: the command to run |
371 | 2a0f9372 | Michael Hanselmann | @param filelist: auxiliary files to upload |
372 | da7e44ee | Michael Hanselmann | |
373 | da7e44ee | Michael Hanselmann | """ |
374 | da7e44ee | Michael Hanselmann | # in the child/worker process |
375 | da7e44ee | Michael Hanselmann | logfile = "%s/%s.log" % (logdir, hostname) |
376 | da7e44ee | Michael Hanselmann | print "%s - starting" % hostname |
377 | da7e44ee | Michael Hanselmann | result = 0 # optimism, I know |
378 | da7e44ee | Michael Hanselmann | try: |
379 | da7e44ee | Michael Hanselmann | connection = SetupSshConnection(hostname, username, |
380 | 99a11adc | Iustin Pop | password, use_agent, logfile) |
381 | da7e44ee | Michael Hanselmann | if connection is not False: |
382 | da7e44ee | Michael Hanselmann | if executable is not None: |
383 | da7e44ee | Michael Hanselmann | print " %s: uploading files" % hostname |
384 | da7e44ee | Michael Hanselmann | upload_dir = UploadFiles(connection, executable, |
385 | da7e44ee | Michael Hanselmann | filelist, logfile) |
386 | 0c009cc5 | Michael Hanselmann | command = ("cd %s && ./%s" % |
387 | 0c009cc5 | Michael Hanselmann | (upload_dir, os.path.basename(executable))) |
388 | 0c009cc5 | Michael Hanselmann | if exec_args: |
389 | 0c009cc5 | Michael Hanselmann | command += " %s" % exec_args |
390 | da7e44ee | Michael Hanselmann | print " %s: executing remote command" % hostname |
391 | da7e44ee | Michael Hanselmann | cmd_result = RunRemoteCommand(connection, command, logfile) |
392 | da7e44ee | Michael Hanselmann | if cmd_result is True: |
393 | da7e44ee | Michael Hanselmann | print " %s: remote command execution successful" % hostname |
394 | da7e44ee | Michael Hanselmann | else: |
395 | da7e44ee | Michael Hanselmann | print (" %s: remote command execution failed," |
396 | da7e44ee | Michael Hanselmann | " check log for details" % hostname) |
397 | da7e44ee | Michael Hanselmann | result = 1 |
398 | da7e44ee | Michael Hanselmann | if executable is not None: |
399 | da7e44ee | Michael Hanselmann | print " %s: cleaning up remote work dir" % hostname |
400 | da7e44ee | Michael Hanselmann | cln_result = CleanupRemoteDir(connection, upload_dir, |
401 | da7e44ee | Michael Hanselmann | filelist, logfile) |
402 | da7e44ee | Michael Hanselmann | if cln_result is False: |
403 | da7e44ee | Michael Hanselmann | print (" %s: remote work dir cleanup failed, check" |
404 | da7e44ee | Michael Hanselmann | " log for details" % hostname) |
405 | da7e44ee | Michael Hanselmann | result = 1 |
406 | da7e44ee | Michael Hanselmann | connection.close() |
407 | da7e44ee | Michael Hanselmann | else: |
408 | da7e44ee | Michael Hanselmann | print " %s: connection setup failed, skipping" % hostname |
409 | da7e44ee | Michael Hanselmann | result = 1 |
410 | da7e44ee | Michael Hanselmann | except KeyboardInterrupt: |
411 | da7e44ee | Michael Hanselmann | print " %s: received KeyboardInterrupt, aborting" % hostname |
412 | da7e44ee | Michael Hanselmann | WriteLog("ERROR: ABORT_KEYBOARD_INTERRUPT", logfile) |
413 | da7e44ee | Michael Hanselmann | result = 1 |
414 | da7e44ee | Michael Hanselmann | except Exception, err: |
415 | da7e44ee | Michael Hanselmann | result = 1 |
416 | da7e44ee | Michael Hanselmann | trace = traceback.format_exc() |
417 | da7e44ee | Michael Hanselmann | msg = "ERROR: UNHANDLED_EXECPTION_ERROR: %s\nTrace: %s" % (err, trace) |
418 | da7e44ee | Michael Hanselmann | WriteLog(msg, logfile) |
419 | da7e44ee | Michael Hanselmann | print " %s: %s" % (hostname, msg) |
420 | da7e44ee | Michael Hanselmann | # and exit with exit code 0 or 1, so the parent can compute statistics |
421 | da7e44ee | Michael Hanselmann | sys.exit(result) |
422 | da7e44ee | Michael Hanselmann | |
423 | da7e44ee | Michael Hanselmann | |
424 | 99a11adc | Iustin Pop | def LaunchWorker(child_pids, logdir, username, password, use_agent, hostname, |
425 | 6eedd356 | Michael Hanselmann | executable, exec_args, command, filelist): |
426 | da7e44ee | Michael Hanselmann | """Launch the per-host worker. |
427 | da7e44ee | Michael Hanselmann | |
428 | da7e44ee | Michael Hanselmann | Arguments are the same as for HostWorker, except for child_pids, |
429 | da7e44ee | Michael Hanselmann | which is a dictionary holding the pid-to-hostname mapping. |
430 | da7e44ee | Michael Hanselmann | |
431 | da7e44ee | Michael Hanselmann | """ |
432 | da7e44ee | Michael Hanselmann | hostname = hostname.rstrip("\n") |
433 | da7e44ee | Michael Hanselmann | pid = os.fork() |
434 | da7e44ee | Michael Hanselmann | if pid > 0: |
435 | da7e44ee | Michael Hanselmann | # controller just record the pids |
436 | da7e44ee | Michael Hanselmann | child_pids[pid] = hostname |
437 | da7e44ee | Michael Hanselmann | else: |
438 | 99a11adc | Iustin Pop | HostWorker(logdir, username, password, use_agent, hostname, |
439 | 6eedd356 | Michael Hanselmann | executable, exec_args, command, filelist) |
440 | da7e44ee | Michael Hanselmann | |
441 | da7e44ee | Michael Hanselmann | |
442 | b74c0684 | Iustin Pop | def ParseOptions(): |
443 | b74c0684 | Iustin Pop | """Parses the command line options. |
444 | b74c0684 | Iustin Pop | |
445 | b74c0684 | Iustin Pop | In case of command line errors, it will show the usage and exit the |
446 | b74c0684 | Iustin Pop | program. |
447 | b74c0684 | Iustin Pop | |
448 | b74c0684 | Iustin Pop | @return: the options in a tuple |
449 | b74c0684 | Iustin Pop | |
450 | b74c0684 | Iustin Pop | """ |
451 | b74c0684 | Iustin Pop | # resolve because original used -h for hostfile, which conflicts |
452 | b74c0684 | Iustin Pop | # with -h for help |
453 | b74c0684 | Iustin Pop | parser = optparse.OptionParser(usage="\n%s" % USAGE, |
454 | b74c0684 | Iustin Pop | conflict_handler="resolve") |
455 | b74c0684 | Iustin Pop | |
456 | b74c0684 | Iustin Pop | parser.add_option("-l", dest="logdir", default=None, |
457 | b74c0684 | Iustin Pop | help="directory to write logfiles to") |
458 | b74c0684 | Iustin Pop | parser.add_option("-x", dest="executable", default=None, |
459 | b74c0684 | Iustin Pop | help="executable to run on remote host(s)",) |
460 | b74c0684 | Iustin Pop | parser.add_option("-f", dest="hostfile", default=None, |
461 | b74c0684 | Iustin Pop | help="hostlist file (one host per line)") |
462 | b74c0684 | Iustin Pop | parser.add_option("-h", dest="hostlist", default=None, metavar="HOSTS", |
463 | b74c0684 | Iustin Pop | help="comma-separated list of hosts or single hostname",) |
464 | b74c0684 | Iustin Pop | parser.add_option("-a", dest="auxfiles", action="append", default=[], |
465 | b74c0684 | Iustin Pop | help="optional auxiliary file to upload" |
466 | 6eedd356 | Michael Hanselmann | " (can be given multiple times)", |
467 | b74c0684 | Iustin Pop | metavar="FILE") |
468 | b74c0684 | Iustin Pop | parser.add_option("-c", dest="command", default=None, |
469 | b74c0684 | Iustin Pop | help="shell command to run on remote host(s)") |
470 | b74c0684 | Iustin Pop | parser.add_option("-b", dest="batch_size", default=15, type="int", |
471 | b74c0684 | Iustin Pop | help="batch-size, how many hosts to process" |
472 | b74c0684 | Iustin Pop | " in parallel [15]") |
473 | b74c0684 | Iustin Pop | parser.add_option("-u", dest="username", default="root", |
474 | b74c0684 | Iustin Pop | help="username used to connect [root]") |
475 | b74c0684 | Iustin Pop | parser.add_option("-p", dest="password", default=None, |
476 | b74c0684 | Iustin Pop | help="password used to authenticate (when not" |
477 | b74c0684 | Iustin Pop | " using an agent)") |
478 | b74c0684 | Iustin Pop | parser.add_option("-A", dest="use_agent", default=False, action="store_true", |
479 | b74c0684 | Iustin Pop | help="instead of password, use keys from an SSH agent") |
480 | 6eedd356 | Michael Hanselmann | parser.add_option("--args", dest="exec_args", default=None, |
481 | 6eedd356 | Michael Hanselmann | help="Arguments to be passed to executable (-x)") |
482 | b74c0684 | Iustin Pop | |
483 | b74c0684 | Iustin Pop | opts, args = parser.parse_args() |
484 | b74c0684 | Iustin Pop | |
485 | b74c0684 | Iustin Pop | if opts.executable and opts.command: |
486 | b74c0684 | Iustin Pop | parser.error("Options -x and -c conflict with each other") |
487 | b74c0684 | Iustin Pop | if not (opts.executable or opts.command): |
488 | b74c0684 | Iustin Pop | parser.error("One of -x and -c must be given") |
489 | 6eedd356 | Michael Hanselmann | if opts.command and opts.exec_args: |
490 | 6eedd356 | Michael Hanselmann | parser.error("Can't specify arguments when using custom command") |
491 | b74c0684 | Iustin Pop | if not opts.logdir: |
492 | b74c0684 | Iustin Pop | parser.error("Option -l is required") |
493 | b74c0684 | Iustin Pop | if opts.hostfile and opts.hostlist: |
494 | b74c0684 | Iustin Pop | parser.error("Options -f and -h conflict with each other") |
495 | b74c0684 | Iustin Pop | if not (opts.hostfile or opts.hostlist): |
496 | b74c0684 | Iustin Pop | parser.error("One of -f or -h must be given") |
497 | b74c0684 | Iustin Pop | if args: |
498 | b74c0684 | Iustin Pop | parser.error("This program doesn't take any arguments, passed in: %s" % |
499 | b74c0684 | Iustin Pop | ", ".join(args)) |
500 | b74c0684 | Iustin Pop | |
501 | 6eedd356 | Michael Hanselmann | return (opts.logdir, opts.executable, opts.exec_args, |
502 | 6eedd356 | Michael Hanselmann | opts.hostfile, opts.hostlist, |
503 | b74c0684 | Iustin Pop | opts.command, opts.use_agent, opts.auxfiles, opts.username, |
504 | b74c0684 | Iustin Pop | opts.password, opts.batch_size) |
505 | b74c0684 | Iustin Pop | |
506 | b74c0684 | Iustin Pop | |
507 | da7e44ee | Michael Hanselmann | def main(): |
508 | da7e44ee | Michael Hanselmann | """main.""" |
509 | 6eedd356 | Michael Hanselmann | (logdir, executable, exec_args, hostfile, hostlist, |
510 | b74c0684 | Iustin Pop | command, use_agent, auxfiles, username, |
511 | b74c0684 | Iustin Pop | password, batch_size) = ParseOptions() |
512 | da7e44ee | Michael Hanselmann | |
513 | da7e44ee | Michael Hanselmann | ### Unbuffered sys.stdout |
514 | da7e44ee | Michael Hanselmann | sys.stdout = os.fdopen(1, "w", 0) |
515 | da7e44ee | Michael Hanselmann | |
516 | da7e44ee | Michael Hanselmann | if LogDirUseable(logdir) is False: |
517 | da7e44ee | Michael Hanselmann | print "ERROR: cannot create logfiles in dir %s, aborting" % logdir |
518 | da7e44ee | Michael Hanselmann | sys.exit(1) |
519 | da7e44ee | Michael Hanselmann | |
520 | da7e44ee | Michael Hanselmann | if use_agent: |
521 | 99a11adc | Iustin Pop | pass |
522 | da7e44ee | Michael Hanselmann | elif password: |
523 | da7e44ee | Michael Hanselmann | try: |
524 | da7e44ee | Michael Hanselmann | fh = file(password) |
525 | da7e44ee | Michael Hanselmann | pwvalue = fh.readline().strip() |
526 | da7e44ee | Michael Hanselmann | fh.close() |
527 | da7e44ee | Michael Hanselmann | except IOError, e: |
528 | da7e44ee | Michael Hanselmann | print "error: can not read in from password file %s: %s" % (password, e) |
529 | da7e44ee | Michael Hanselmann | sys.exit(1) |
530 | da7e44ee | Michael Hanselmann | password = pwvalue |
531 | da7e44ee | Michael Hanselmann | else: |
532 | da7e44ee | Michael Hanselmann | password = getpass.getpass("%s's password for all nodes: " % username) |
533 | da7e44ee | Michael Hanselmann | |
534 | da7e44ee | Michael Hanselmann | if hostfile: |
535 | da7e44ee | Michael Hanselmann | hosts = GetHosts(hostfile) |
536 | da7e44ee | Michael Hanselmann | else: |
537 | da7e44ee | Michael Hanselmann | if "," in hostlist: |
538 | da7e44ee | Michael Hanselmann | hostlist = hostlist.rstrip(",") # commandline robustness |
539 | da7e44ee | Michael Hanselmann | hosts = hostlist.split(",") |
540 | da7e44ee | Michael Hanselmann | else: |
541 | da7e44ee | Michael Hanselmann | hosts = [hostlist] |
542 | da7e44ee | Michael Hanselmann | |
543 | da7e44ee | Michael Hanselmann | successes = failures = 0 |
544 | da7e44ee | Michael Hanselmann | |
545 | da7e44ee | Michael Hanselmann | filelist = auxfiles[:] |
546 | da7e44ee | Michael Hanselmann | filelist.append(executable) |
547 | da7e44ee | Michael Hanselmann | |
548 | da7e44ee | Michael Hanselmann | # initial batch |
549 | da7e44ee | Michael Hanselmann | batch = hosts[:batch_size] |
550 | da7e44ee | Michael Hanselmann | hosts = hosts[batch_size:] |
551 | da7e44ee | Michael Hanselmann | child_pids = {} |
552 | da7e44ee | Michael Hanselmann | for hostname in batch: |
553 | 99a11adc | Iustin Pop | LaunchWorker(child_pids, logdir, username, password, use_agent, hostname, |
554 | 6eedd356 | Michael Hanselmann | executable, exec_args, command, filelist) |
555 | da7e44ee | Michael Hanselmann | |
556 | da7e44ee | Michael Hanselmann | while child_pids: |
557 | da7e44ee | Michael Hanselmann | pid, status = os.wait() |
558 | da7e44ee | Michael Hanselmann | hostname = child_pids.pop(pid, "<unknown host>") |
559 | da7e44ee | Michael Hanselmann | print " %s: done (in parent)" % hostname |
560 | da7e44ee | Michael Hanselmann | if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0: |
561 | da7e44ee | Michael Hanselmann | successes += 1 |
562 | da7e44ee | Michael Hanselmann | else: |
563 | da7e44ee | Michael Hanselmann | failures += 1 |
564 | da7e44ee | Michael Hanselmann | if hosts: |
565 | 99a11adc | Iustin Pop | LaunchWorker(child_pids, logdir, username, password, use_agent, |
566 | 6eedd356 | Michael Hanselmann | hosts.pop(0), executable, exec_args, command, filelist) |
567 | da7e44ee | Michael Hanselmann | |
568 | da7e44ee | Michael Hanselmann | |
569 | da7e44ee | Michael Hanselmann | print "All done, %s successful and %s failed hosts" % (successes, failures) |
570 | da7e44ee | Michael Hanselmann | |
571 | da7e44ee | Michael Hanselmann | sys.exit(0) |
572 | da7e44ee | Michael Hanselmann | |
573 | da7e44ee | Michael Hanselmann | |
574 | da7e44ee | Michael Hanselmann | if __name__ == "__main__": |
575 | da7e44ee | Michael Hanselmann | try: |
576 | da7e44ee | Michael Hanselmann | main() |
577 | da7e44ee | Michael Hanselmann | except KeyboardInterrupt: |
578 | da7e44ee | Michael Hanselmann | print "Received KeyboardInterrupt, aborting" |
579 | da7e44ee | Michael Hanselmann | sys.exit(1) |