5 #include <mach/mach_time.h>
10 #pragma mark * local ( static ) function prototypes *
13 static double CurrentTime(
void);
17 #pragma mark * exported function implementations *
20 int main(
int argc,
char* argv[])
22 const int vlen=atoi(argv[1]),stride=1;
23 register int i,m,ntimes;
24 register double a=atof(argv[2]),b=atof(argv[3]),
c=atof(argv[4]),xi,yi;
25 double *x,*y,*
z,*
u,*
v,*
w,start,stop,diff;
32 printf(
"usage: zerotest vlen da db dc");
35 start = CurrentTime();
36 x = malloc(
sizeof(
double)*vlen);
37 y = malloc(
sizeof(
double)*vlen);
38 z = malloc(
sizeof(
double)*vlen);
39 u = malloc(
sizeof(
double)*vlen);
40 v = malloc(
sizeof(
double)*vlen);
41 w = malloc(
sizeof(
double)*vlen);
43 printf(
"%10.2e s %10.2e seconds per double; malloc\n",(stop-start),(stop-start)/(3.0*vlen));
45 start = CurrentTime();
46 memset(x,0,vlen*
sizeof(
double));
47 memset(y,0,vlen*
sizeof(
double));
48 memset(
z,0,vlen*
sizeof(
double));
50 printf(
"%10.2e s %10.2e seconds per double; memset\n",(stop-start),(stop-start)/(3.0*vlen));
52 start = CurrentTime();
60 printf(
"%10.2e s %10.2e seconds per double; x[i]=c \n",(stop-start),(stop-start)/(3.0*vlen));
62 start = CurrentTime();
68 printf(
"%10.2e s %10.2e seconds per double; y[i]=x[i]\n",(stop-start),(stop-start)/(
float)(vlen));
70 start = CurrentTime();
71 cblas_dcopy(vlen,x,stride,y,stride);
73 printf(
"%10.2e s %10.2e seconds per double; dcopy y[i]=x[i]\n",(stop-start),(stop-start)/(
float)(vlen));
75 start = CurrentTime();
81 printf(
"%10.2e s %10.2f MFLOPS; y[i]=a*x[i]\n",(stop-start),((stop-start)/(
float)(vlen))/1.0e6);
83 start = CurrentTime();
84 cblas_dscal(vlen,a,x,stride);
86 printf(
"%10.2e s %10.2f MFLOPS; dscal y[i]=a*x[i]\n",(stop-start),((stop-start)/(
float)(vlen))/1.0e6);
88 start = CurrentTime();
92 printf(
"%10.2e s %10.2f MFLOPS; y[i] += a*x[i]\n",(stop-start),(vlen*2/(stop-start))/1.0e6);
94 start = CurrentTime();
95 cblas_daxpy(vlen,a,x,stride,y,stride);
97 printf(
"%10.2e s %10.2f MFLOPS; daxpy y[i] += a*x[i]\n",(stop-start),(vlen*2/(stop-start))/1.0e6);
101 stop = CurrentTime();
102 printf(
"%10.2e s %10.2f MFLOPS; y[i] += a*x[i]\n",(stop-start),(vlen*2/(stop-start))/1.0e6);
104 start = CurrentTime();
107 stop = CurrentTime();
108 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i]\n",(stop-start),(vlen*2/(stop-start))/1.0e6);
110 start = CurrentTime();
112 y[i] =
c + a*x[i] + b*y[i];
113 stop = CurrentTime();
114 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*y[i]\n",(stop-start),(vlen*4/(stop-start))/1.0e6);
116 start = CurrentTime();
118 y[i] =
c + a*x[i] + b*y[i]+
c*x[i]*y[i];
119 stop = CurrentTime();
120 printf(
"%10.2e s %10.2f MFLOPS y[i] = c + a*x[i] + b*y[i] + c*x[i]*y[i]\n",(stop-start),(vlen*7/(stop-start))/1.0e6);
122 start = CurrentTime();
124 y[i] =
c + a*x[i] + b*y[i]+
c*x[i]*y[i] + a*x[i]*x[i];
125 stop = CurrentTime();
126 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*y[i] + c*x[i]*y[i] + a*x[i]*x[i]\n",(stop-start),(vlen*10/(stop-start))/1.0e6);
128 start = CurrentTime();
130 y[i] =
c + a*x[i] + b*y[i]+
c*x[i]*y[i] + a*x[i]*x[i] + b*y[i]*y[i];
131 stop = CurrentTime();
132 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*y[i] + c*x[i]*y[i] + a*x[i]*x[i] + b*y[i]*y[i]\n",(stop-start),(vlen*13/(stop-start))/1.0e6);
134 start = CurrentTime();
136 y[i] =
c + a*x[i] + b*y[i] +
c*x[i]*y[i] + a*x[i]*x[i] + b*y[i]*y[i] +
c*x[i]*x[i]*x[i];
137 stop = CurrentTime();
138 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*y[i] + c*x[i]*y[i] + a*x[i]*x[i] + b*y[i]*y[i] + c*x[i]*x[i]*x[i]\n",(stop-start),(vlen*17/(stop-start))/1.0e6);
140 start = CurrentTime();
142 y[i] =
c + a*x[i] + b*y[i] +
c*x[i]*y[i] + a*x[i]*x[i] + b*y[i]*y[i] +
c*x[i]*x[i]*x[i] + a*x[i]*y[i]*y[i] + b*x[i]*
z[i]*x[i];
143 stop = CurrentTime();
144 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*y[i] + c*x[i]*y[i] + a*x[i]*x[i] + b*y[i]*y[i] + c*x[i]*x[i]*x[i] + a*x[i]*y[i]*y[i] + b*x[i]*z[i]*x[i]\n",(stop-start),(vlen*25/(stop-start))/1.0e6);
148 start = CurrentTime();
150 y[i] =
c + a*x[i] + b*
z[i];
151 stop = CurrentTime();
152 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*z[i]\n",(stop-start),(vlen*4/(stop-start))/1.0e6);
154 start = CurrentTime();
156 y[i] =
c + a*x[i] + b*
z[i]+
c*x[i]*
z[i];
157 stop = CurrentTime();
158 printf(
"%10.2e s %10.2f MFLOPS y[i] = c + a*x[i] + b*z[i] + c*x[i]*z[i]\n",(stop-start),(vlen*7/(stop-start))/1.0e6);
160 start = CurrentTime();
162 y[i] =
c + a*x[i] + b*
z[i]+
c*x[i]*
z[i] + a*x[i]*x[i];
163 stop = CurrentTime();
164 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*z[i] + c*x[i]*z[i] + a*x[i]*x[i]\n",(stop-start),(vlen*10/(stop-start))/1.0e6);
166 start = CurrentTime();
168 y[i] =
c + a*x[i] + b*
z[i]+
c*x[i]*
z[i] + a*x[i]*x[i] + b*
z[i]*
z[i];
169 stop = CurrentTime();
170 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*z[i] + c*x[i]*z[i] + a*x[i]*x[i] + b*z[i]*z[i]\n",(stop-start),(vlen*13/(stop-start))/1.0e6);
172 start = CurrentTime();
174 y[i] =
c + a*x[i] + b*
z[i] +
c*x[i]*
z[i] + a*x[i]*x[i] + b*
z[i]*
z[i] +
c*x[i]*x[i]*x[i];
175 stop = CurrentTime();
176 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*z[i] + c*x[i]*z[i] + a*x[i]*x[i] + b*z[i]*z[i] + c*x[i]*x[i]*x[i]\n",(stop-start),(vlen*17/(stop-start))/1.0e6);
178 start = CurrentTime();
180 y[i] =
c + a*x[i] + b*
z[i] +
c*x[i]*
z[i] + a*x[i]*x[i] + b*
z[i]*
z[i] +
c*x[i]*x[i]*x[i] + a*x[i]*
z[i]*
z[i] + b*x[i]*
z[i]*x[i];
181 stop = CurrentTime();
182 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*x[i] + b*z[i] + c*x[i]*z[i] + a*x[i]*x[i] + b*z[i]*z[i] + c*x[i]*x[i]*x[i] + a*x[i]*z[i]*z[i] + b*x[i]*z[i]*x[i]\n",(stop-start),(vlen*25/(stop-start))/1.0e6);
186 start = CurrentTime();
188 y[i] =
c + a*
u[i] + b*
z[i]+
c*x[i]*
z[i];
189 stop = CurrentTime();
190 printf(
"%10.2e s %10.2f MFLOPS y[i] = c + a*u[i] + b*z[i] + c*x[i]*z[i]\n",(stop-start),(vlen*7/(stop-start))/1.0e6);
192 start = CurrentTime();
194 y[i] =
c + a*
u[i] + b*
z[i]+
c*x[i]*
z[i] + a*
u[i]*x[i];
195 stop = CurrentTime();
196 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*u[i] + b*z[i] + c*x[i]*z[i] + a*u[i]*x[i]\n",(stop-start),(vlen*10/(stop-start))/1.0e6);
198 start = CurrentTime();
200 y[i] =
c + a*
u[i] + b*
z[i]+
c*x[i]*
z[i] + a*
u[i]*x[i] + b*
z[i]*
z[i];
201 stop = CurrentTime();
202 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*u[i] + b*z[i] + c*x[i]*z[i] + a*u[i]*x[i] + b*z[i]*z[i]\n",(stop-start),(vlen*13/(stop-start))/1.0e6);
204 start = CurrentTime();
206 y[i] =
c + a*
u[i] + b*
z[i] +
c*x[i]*
z[i] + a*
u[i]*x[i] + b*
z[i]*
z[i] +
c*
u[i]*x[i]*
u[i];
207 stop = CurrentTime();
208 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*u[i] + b*z[i] + c*x[i]*z[i] + a*u[i]*x[i] + b*z[i]*z[i] + c*u[i]*x[i]*u[i]\n",(stop-start),(vlen*17/(stop-start))/1.0e6);
210 start = CurrentTime();
212 y[i] =
c + a*
u[i] + b*
z[i] +
c*x[i]*
z[i] + a*
u[i]*x[i] + b*
z[i]*
z[i] +
c*
u[i]*x[i]*
u[i] + a*x[i]*
z[i]*
z[i] + b*
u[i]*
z[i]*x[i];
213 stop = CurrentTime();
214 printf(
"%10.2e s %10.2f MFLOPS; y[i] = c + a*u[i] + b*z[i] + c*x[i]*z[i] + a*u[i]*x[i] + b*z[i]*z[i] + c*u[i]*x[i]*u[i] + a*x[i]*z[i]*z[i] + b*u[i]*z[i]*x[i]\n",(stop-start),(vlen*25/(stop-start))/1.0e6);
217 start = CurrentTime();
224 stop = CurrentTime();
225 printf(
"%10.2e s %10.2e seconds per double; free; \n",(stop-start),(stop-start)/(6.0*vlen));
231 #pragma mark * local ( static ) function implementations *
239 static double CurrentTime(
void)
241 static double scale = 0.0;
244 mach_timebase_info_data_t info;
245 mach_timebase_info(&info);
246 scale = info.numer / info.denom * 1e-9;
249 return mach_absolute_time() * scale;