root / synthbench / selfish / bench_worst_best.c
History | View | Annotate | Download (5.4 kB)
1 | 0:839f52ef7657 | louridas | /****************************************************************************/
|
---|---|---|---|
2 | 0:839f52ef7657 | louridas | /* SELFISHDETOUR:selfishdetour-info */
|
3 | 0:839f52ef7657 | louridas | /* This file is part of Selfish Detour: OS detour(noise) benchmark
|
4 | 0:839f52ef7657 | louridas | */
|
5 | 0:839f52ef7657 | louridas | /* SELFISHDETOUR:selfishdetour-info */
|
6 | 0:839f52ef7657 | louridas | /* */
|
7 | 0:839f52ef7657 | louridas | /* SELFISHDETOUR:selfishdetour-fillin */
|
8 | 0:839f52ef7657 | louridas | /* $Id: bench_worst_best.c,v 1.10 2007/01/03 21:16:49 kazutomo Exp $
|
9 | 0:839f52ef7657 | louridas | * SelfishDetour_Version: 1.0.7
|
10 | 0:839f52ef7657 | louridas | * SelfishDetour_Heredity: FOSS_ORIG
|
11 | 0:839f52ef7657 | louridas | * SelfishDetour_License: GPL
|
12 | 0:839f52ef7657 | louridas | */
|
13 | 0:839f52ef7657 | louridas | /* SELFISHDETOUR:selfishdetour-fillin */
|
14 | 0:839f52ef7657 | louridas | /* */
|
15 | 0:839f52ef7657 | louridas | /* SELFISHDETOUR:selfishdetour-gpl */
|
16 | 0:839f52ef7657 | louridas | /* Copyright: Argonne National Laboratory, Department of Energy,
|
17 | 0:839f52ef7657 | louridas | * and University of Chicago. 2004, 2005, 2006, 2007
|
18 | 0:839f52ef7657 | louridas | * SelfishDetour License: GPL
|
19 | 0:839f52ef7657 | louridas | *
|
20 | 0:839f52ef7657 | louridas | * This software is free. See the file license-files/license.GPL
|
21 | 0:839f52ef7657 | louridas | * for complete details on your rights to copy, modify, and use this
|
22 | 0:839f52ef7657 | louridas | * software.
|
23 | 0:839f52ef7657 | louridas | */
|
24 | 0:839f52ef7657 | louridas | /* SELFISHDETOUR:selfishdetour-gpl */
|
25 | 0:839f52ef7657 | louridas | /****************************************************************************/
|
26 | 0:839f52ef7657 | louridas | /*
|
27 | 0:839f52ef7657 | louridas | best-worst benchmark
|
28 | 0:839f52ef7657 | louridas | */
|
29 | 0:839f52ef7657 | louridas | #include <stdlib.h> |
30 | 0:839f52ef7657 | louridas | #include <sys/time.h> |
31 | 0:839f52ef7657 | louridas | #include <signal.h> |
32 | 0:839f52ef7657 | louridas | #include <unistd.h> |
33 | 0:839f52ef7657 | louridas | #include <stdio.h> |
34 | 0:839f52ef7657 | louridas | #include <time.h> |
35 | 0:839f52ef7657 | louridas | #include <errno.h> |
36 | 0:839f52ef7657 | louridas | #include <stdarg.h> |
37 | 0:839f52ef7657 | louridas | #include <values.h> |
38 | 0:839f52ef7657 | louridas | #include <limits.h> |
39 | 0:839f52ef7657 | louridas | |
40 | 0:839f52ef7657 | louridas | #include "mpi.h" |
41 | 0:839f52ef7657 | louridas | |
42 | 0:839f52ef7657 | louridas | #include "selfish_detour.h" |
43 | 0:839f52ef7657 | louridas | |
44 | 0:839f52ef7657 | louridas | |
45 | 0:839f52ef7657 | louridas | #define COMP_WINDOW_USEC (0) |
46 | 0:839f52ef7657 | louridas | #define TIMEOUT_SEC (10) |
47 | 0:839f52ef7657 | louridas | |
48 | 0:839f52ef7657 | louridas | static int rank0_printf( const char* fmt, ...); |
49 | 0:839f52ef7657 | louridas | |
50 | 0:839f52ef7657 | louridas | |
51 | 0:839f52ef7657 | louridas | |
52 | 0:839f52ef7657 | louridas | static double busy_loop_cnt_per_sec; |
53 | 0:839f52ef7657 | louridas | |
54 | 0:839f52ef7657 | louridas | /*
|
55 | 0:839f52ef7657 | louridas | busy_loop_cnt_per_sec is adjusted before main
|
56 | 0:839f52ef7657 | louridas | */
|
57 | 0:839f52ef7657 | louridas | void calibrate_busy_loop_cnt() __attribute__( (constructor) );
|
58 | 0:839f52ef7657 | louridas | void calibrate_busy_loop_cnt()
|
59 | 0:839f52ef7657 | louridas | { |
60 | 0:839f52ef7657 | louridas | unsigned long long s,b; |
61 | 0:839f52ef7657 | louridas | int time_to_calibrate = 5; |
62 | 0:839f52ef7657 | louridas | unsigned long long delay_ticks = (unsigned long long)time_to_calibrate*1000*1000*TICKS_PER_US; |
63 | 0:839f52ef7657 | louridas | double x;
|
64 | 0:839f52ef7657 | louridas | double fixed_work;
|
65 | 0:839f52ef7657 | louridas | |
66 | 0:839f52ef7657 | louridas | fixed_work = MAXDOUBLE; |
67 | 0:839f52ef7657 | louridas | x = 0.0; |
68 | 0:839f52ef7657 | louridas | |
69 | 0:839f52ef7657 | louridas | s = rdtsc(); |
70 | 0:839f52ef7657 | louridas | b = s; |
71 | 0:839f52ef7657 | louridas | |
72 | 0:839f52ef7657 | louridas | while ( (b-s)<delay_ticks && x<fixed_work ) {
|
73 | 0:839f52ef7657 | louridas | x++; |
74 | 0:839f52ef7657 | louridas | b = rdtsc(); |
75 | 0:839f52ef7657 | louridas | } |
76 | 0:839f52ef7657 | louridas | |
77 | 0:839f52ef7657 | louridas | busy_loop_cnt_per_sec = x/(double)time_to_calibrate;
|
78 | 0:839f52ef7657 | louridas | } |
79 | 0:839f52ef7657 | louridas | |
80 | 0:839f52ef7657 | louridas | |
81 | 0:839f52ef7657 | louridas | static void busy_loop(unsigned wait_usec) |
82 | 0:839f52ef7657 | louridas | { |
83 | 0:839f52ef7657 | louridas | unsigned long long delay_ticks = ULONG_LONG_MAX; |
84 | 0:839f52ef7657 | louridas | unsigned long long s,b; |
85 | 0:839f52ef7657 | louridas | double x;
|
86 | 0:839f52ef7657 | louridas | double fixed_work = busy_loop_cnt_per_sec*wait_usec/1000.0/1000.0; |
87 | 0:839f52ef7657 | louridas | |
88 | 0:839f52ef7657 | louridas | x = 0.0; |
89 | 0:839f52ef7657 | louridas | |
90 | 0:839f52ef7657 | louridas | s = rdtsc(); |
91 | 0:839f52ef7657 | louridas | b = s; |
92 | 0:839f52ef7657 | louridas | |
93 | 0:839f52ef7657 | louridas | /* busy waiting loop itself is exactly same as the loop in the
|
94 | 0:839f52ef7657 | louridas | calibration function */
|
95 | 0:839f52ef7657 | louridas | while ( (b-s)<delay_ticks && x<fixed_work ) {
|
96 | 0:839f52ef7657 | louridas | x++; |
97 | 0:839f52ef7657 | louridas | b = rdtsc(); |
98 | 0:839f52ef7657 | louridas | } |
99 | 0:839f52ef7657 | louridas | |
100 | 0:839f52ef7657 | louridas | } |
101 | 0:839f52ef7657 | louridas | |
102 | 0:839f52ef7657 | louridas | |
103 | 0:839f52ef7657 | louridas | static void myfunc(double *in, double *inout, int *len, MPI_Datatype *dptr) { |
104 | 0:839f52ef7657 | louridas | int i;
|
105 | 0:839f52ef7657 | louridas | for (i=0; i<*len; i++) { |
106 | 0:839f52ef7657 | louridas | *inout = *in + *inout; |
107 | 0:839f52ef7657 | louridas | in++; inout++; |
108 | 0:839f52ef7657 | louridas | } |
109 | 0:839f52ef7657 | louridas | } |
110 | 0:839f52ef7657 | louridas | |
111 | 0:839f52ef7657 | louridas | |
112 | 0:839f52ef7657 | louridas | static double benchloop(int mode, int* n_iters) |
113 | 0:839f52ef7657 | louridas | { |
114 | 0:839f52ef7657 | louridas | int i;
|
115 | 0:839f52ef7657 | louridas | MPI_Op myOp; |
116 | 0:839f52ef7657 | louridas | double in=1.0, out; /* for AllReduce */ |
117 | 0:839f52ef7657 | louridas | double t1,t2;
|
118 | 0:839f52ef7657 | louridas | int cnt,n;
|
119 | 0:839f52ef7657 | louridas | int timeouted_in, timeouted_out;
|
120 | 0:839f52ef7657 | louridas | int size;
|
121 | 0:839f52ef7657 | louridas | MPI_Op_create( (MPI_User_function *) myfunc, 1, &myOp );
|
122 | 0:839f52ef7657 | louridas | |
123 | 0:839f52ef7657 | louridas | MPI_Barrier(MPI_COMM_WORLD); |
124 | 0:839f52ef7657 | louridas | MPI_Comm_size(MPI_COMM_WORLD, &size); |
125 | 0:839f52ef7657 | louridas | |
126 | 0:839f52ef7657 | louridas | |
127 | 0:839f52ef7657 | louridas | selfish_detour_init(); |
128 | 0:839f52ef7657 | louridas | |
129 | 0:839f52ef7657 | louridas | t1 = MPI_Wtime(); |
130 | 0:839f52ef7657 | louridas | cnt = 0;
|
131 | 0:839f52ef7657 | louridas | for(i=0; ;i++) { |
132 | 0:839f52ef7657 | louridas | if( COMP_WINDOW_USEC > 0 ) |
133 | 0:839f52ef7657 | louridas | busy_loop(COMP_WINDOW_USEC); |
134 | 0:839f52ef7657 | louridas | |
135 | 0:839f52ef7657 | louridas | switch( mode ) {
|
136 | 0:839f52ef7657 | louridas | case 0: |
137 | 0:839f52ef7657 | louridas | busy_loop(100); /* when no noise, it will be 100 usec busyloop. */ |
138 | 0:839f52ef7657 | louridas | break;
|
139 | 0:839f52ef7657 | louridas | case 1: |
140 | 0:839f52ef7657 | louridas | MPI_Allreduce(&in, &out, 1, MPI_DOUBLE, myOp, MPI_COMM_WORLD);
|
141 | 0:839f52ef7657 | louridas | break;
|
142 | 0:839f52ef7657 | louridas | } |
143 | 0:839f52ef7657 | louridas | |
144 | 0:839f52ef7657 | louridas | if( (MPI_Wtime()-t1) > (double)TIMEOUT_SEC ) { |
145 | 0:839f52ef7657 | louridas | timeouted_in = 1;
|
146 | 0:839f52ef7657 | louridas | } else {
|
147 | 0:839f52ef7657 | louridas | timeouted_in = 0;
|
148 | 0:839f52ef7657 | louridas | } |
149 | 0:839f52ef7657 | louridas | |
150 | 0:839f52ef7657 | louridas | MPI_Allreduce(&timeouted_in, &timeouted_out, 1,
|
151 | 0:839f52ef7657 | louridas | MPI_INT, MPI_SUM, MPI_COMM_WORLD); |
152 | 0:839f52ef7657 | louridas | if( timeouted_out == size ) {
|
153 | 0:839f52ef7657 | louridas | break;
|
154 | 0:839f52ef7657 | louridas | } |
155 | 0:839f52ef7657 | louridas | } |
156 | 0:839f52ef7657 | louridas | t2 = MPI_Wtime(); |
157 | 0:839f52ef7657 | louridas | selfish_detour_finalize(); |
158 | 0:839f52ef7657 | louridas | |
159 | 0:839f52ef7657 | louridas | |
160 | 0:839f52ef7657 | louridas | *n_iters = n = i*2; /* the reason why i is doubled is that the |
161 | 0:839f52ef7657 | louridas | above calibration loop does allreduce twice
|
162 | 0:839f52ef7657 | louridas | when mode is 1. this is a kind of
|
163 | 0:839f52ef7657 | louridas | approximation. */
|
164 | 0:839f52ef7657 | louridas | |
165 | 0:839f52ef7657 | louridas | rank0_printf("# N_ITERS=%d\n",n);
|
166 | 0:839f52ef7657 | louridas | |
167 | 0:839f52ef7657 | louridas | selfish_detour_init(); |
168 | 0:839f52ef7657 | louridas | |
169 | 0:839f52ef7657 | louridas | t1 = MPI_Wtime(); |
170 | 0:839f52ef7657 | louridas | for(i=0; i<n; i++) { |
171 | 0:839f52ef7657 | louridas | if( COMP_WINDOW_USEC > 0 ) |
172 | 0:839f52ef7657 | louridas | busy_loop(COMP_WINDOW_USEC); |
173 | 0:839f52ef7657 | louridas | switch( mode ) {
|
174 | 0:839f52ef7657 | louridas | case 0: |
175 | 0:839f52ef7657 | louridas | busy_loop(100);
|
176 | 0:839f52ef7657 | louridas | break;
|
177 | 0:839f52ef7657 | louridas | case 1: |
178 | 0:839f52ef7657 | louridas | MPI_Allreduce(&in, &out, 1, MPI_DOUBLE, myOp, MPI_COMM_WORLD);
|
179 | 0:839f52ef7657 | louridas | break;
|
180 | 0:839f52ef7657 | louridas | } |
181 | 0:839f52ef7657 | louridas | } |
182 | 0:839f52ef7657 | louridas | t2 = MPI_Wtime(); |
183 | 0:839f52ef7657 | louridas | selfish_detour_finalize(); |
184 | 0:839f52ef7657 | louridas | |
185 | 0:839f52ef7657 | louridas | return (t2-t1);
|
186 | 0:839f52ef7657 | louridas | } |
187 | 0:839f52ef7657 | louridas | |
188 | 0:839f52ef7657 | louridas | |
189 | 0:839f52ef7657 | louridas | static void testloop(int mode) |
190 | 0:839f52ef7657 | louridas | { |
191 | 0:839f52ef7657 | louridas | double elapsed_sec;
|
192 | 0:839f52ef7657 | louridas | unsigned n_iters;
|
193 | 0:839f52ef7657 | louridas | int size;
|
194 | 0:839f52ef7657 | louridas | |
195 | 0:839f52ef7657 | louridas | MPI_Comm_size(MPI_COMM_WORLD, &size); |
196 | 0:839f52ef7657 | louridas | |
197 | 0:839f52ef7657 | louridas | |
198 | 0:839f52ef7657 | louridas | elapsed_sec = benchloop(mode, &n_iters); |
199 | 0:839f52ef7657 | louridas | rank0_printf("# e=%f\n", elapsed_sec);
|
200 | 0:839f52ef7657 | louridas | |
201 | 0:839f52ef7657 | louridas | rank0_printf("# %s ", (mode==0)?"BEST":"WORST" ); |
202 | 0:839f52ef7657 | louridas | rank0_printf("N=%d ", size);
|
203 | 0:839f52ef7657 | louridas | rank0_printf("TIME_PER_ITERATION=%f usec ",
|
204 | 0:839f52ef7657 | louridas | (double)elapsed_sec*1000.0*1000.0/(double)n_iters); |
205 | 0:839f52ef7657 | louridas | if( get_detour_noise_interval_usec() == 0 ) { |
206 | 0:839f52ef7657 | louridas | rank0_printf("NO_NOISE ");
|
207 | 0:839f52ef7657 | louridas | } else {
|
208 | 0:839f52ef7657 | louridas | rank0_printf("I=%f D=%f ",
|
209 | 0:839f52ef7657 | louridas | get_detour_noise_interval_usec(), |
210 | 0:839f52ef7657 | louridas | get_detour_noise_duration_usec() ); |
211 | 0:839f52ef7657 | louridas | if( get_detour_noise_counter() > 0 ) { |
212 | 0:839f52ef7657 | louridas | rank0_printf("CNT=%d ",
|
213 | 0:839f52ef7657 | louridas | get_detour_noise_counter() ); |
214 | 0:839f52ef7657 | louridas | } else {
|
215 | 0:839f52ef7657 | louridas | rank0_printf("P=%f ",
|
216 | 0:839f52ef7657 | louridas | get_detour_noise_probability() ); |
217 | 0:839f52ef7657 | louridas | } |
218 | 0:839f52ef7657 | louridas | if( get_detour_noise_sync()>0 ) { |
219 | 0:839f52ef7657 | louridas | rank0_printf("SYNC ");
|
220 | 0:839f52ef7657 | louridas | } |
221 | 0:839f52ef7657 | louridas | } |
222 | 0:839f52ef7657 | louridas | rank0_printf("\n");
|
223 | 0:839f52ef7657 | louridas | |
224 | 0:839f52ef7657 | louridas | } |
225 | 0:839f52ef7657 | louridas | |
226 | 0:839f52ef7657 | louridas | int main(int argc, char* argv[]) |
227 | 0:839f52ef7657 | louridas | { |
228 | 0:839f52ef7657 | louridas | MPI_Init(&argc, &argv); |
229 | 0:839f52ef7657 | louridas | |
230 | 0:839f52ef7657 | louridas | rank0_printf("# [bench_worst_best] v1.2 %s %s\n",
|
231 | 0:839f52ef7657 | louridas | __DATE__, __TIME__); |
232 | 0:839f52ef7657 | louridas | |
233 | 0:839f52ef7657 | louridas | testloop( 0 );
|
234 | 0:839f52ef7657 | louridas | testloop( 1 );
|
235 | 0:839f52ef7657 | louridas | |
236 | 0:839f52ef7657 | louridas | rank0_printf("done.\n");
|
237 | 0:839f52ef7657 | louridas | |
238 | 0:839f52ef7657 | louridas | MPI_Finalize(); |
239 | 0:839f52ef7657 | louridas | |
240 | 0:839f52ef7657 | louridas | return 0; |
241 | 0:839f52ef7657 | louridas | } |
242 | 0:839f52ef7657 | louridas | |
243 | 0:839f52ef7657 | louridas | static int rank0_printf(const char* fmt, ...) |
244 | 0:839f52ef7657 | louridas | { |
245 | 0:839f52ef7657 | louridas | int rank;
|
246 | 0:839f52ef7657 | louridas | int ret=0; |
247 | 0:839f52ef7657 | louridas | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
248 | 0:839f52ef7657 | louridas | if( rank==0 ) { |
249 | 0:839f52ef7657 | louridas | va_list ap; |
250 | 0:839f52ef7657 | louridas | va_start(ap,fmt); |
251 | 0:839f52ef7657 | louridas | ret = vprintf(fmt, ap); |
252 | 0:839f52ef7657 | louridas | va_end(ap); |
253 | 0:839f52ef7657 | louridas | } |
254 | 0:839f52ef7657 | louridas | return ret;
|
255 | 0:839f52ef7657 | louridas | } |