00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include "WKFUtils.h"
00032 #include "WKFThreads.h"
00033 #include "utilities.h"
00034
00035
00036
00037
00038
00039
00040
00041 #if 0
00042 #define RESTRICT restrict
00043 #else
00044 #define RESTRICT
00045 #endif
00046
00047
00048
00049
00050
00051
00052 #if 0
00053 #define VECTORIZEME _Pragma("vector always")
00054 #else
00055 #define VECTORIZEME
00056 #endif
00057
00058
00059
00060
00061
00062
00063 void dstream_init(double * RESTRICT a, double * RESTRICT b,
00064 double * RESTRICT c, int N) {
00065 int j;
00066 VECTORIZEME
00067 for (j=0; j<N; j++) {
00068 a[j] = 1.0;
00069 b[j] = 2.0;
00070 c[j] = 0.0;
00071 }
00072 }
00073
00074 void dstream_copy(double * RESTRICT a, const double * RESTRICT b,
00075 int N, double *mbsize) {
00076 int j;
00077 VECTORIZEME
00078 for (j=0; j<N; j++)
00079 a[j] = b[j];
00080
00081 *mbsize = (2L * sizeof(double) * N) / (1024.0 * 1024.0);
00082 }
00083
00084 void dstream_scale(double * RESTRICT a, const double * RESTRICT b,
00085 double scalar, int N, double *mbsize) {
00086 int j;
00087 VECTORIZEME
00088 for (j=0; j<N; j++)
00089 a[j] = scalar * b[j];
00090
00091 *mbsize = (2L * sizeof(double) * N) / (1024.0 * 1024.0);
00092 }
00093
00094 void dstream_add(double * RESTRICT a, const double * RESTRICT b,
00095 const double * RESTRICT c, int N, double *mbsize) {
00096 int j;
00097 VECTORIZEME
00098 for (j=0; j<N; j++)
00099 a[j] = b[j] + c[j];
00100
00101 *mbsize = (3L * sizeof(double) * N) / (1024.0 * 1024.0);
00102 }
00103
00104 void dstream_triad(double * RESTRICT a, const double * RESTRICT b,
00105 const double * RESTRICT c, double scalar, int N,
00106 double *mbsize) {
00107 int j;
00108 VECTORIZEME
00109 for (j=0; j<N; j++)
00110 a[j] = b[j] + scalar * c[j];
00111
00112 *mbsize = (3L * sizeof(double) * N) / (1024.0 * 1024.0);
00113 }
00114
00115
00116
00117
00118
00119
00120
00121 void fstream_init(float * RESTRICT a, float * RESTRICT b,
00122 float * RESTRICT c, int N) {
00123 int j;
00124 VECTORIZEME
00125 for (j=0; j<N; j++) {
00126 a[j] = 1.0f;
00127 b[j] = 2.0f;
00128 c[j] = 0.0f;
00129 }
00130 }
00131
00132 void fstream_copy(float * RESTRICT a, const float * RESTRICT b,
00133 int N, double *mbsize) {
00134 int j;
00135 VECTORIZEME
00136 for (j=0; j<N; j++)
00137 a[j] = b[j];
00138
00139 *mbsize = (2L * sizeof(float) * N) / (1024.0 * 1024.0);
00140 }
00141
00142 void fstream_scale(float * RESTRICT a, const float * RESTRICT b,
00143 float scalar, int N, double *mbsize) {
00144 int j;
00145 VECTORIZEME
00146 for (j=0; j<N; j++)
00147 a[j] = scalar * b[j];
00148
00149 *mbsize = (2L * sizeof(float) * N) / (1024.0 * 1024.0);
00150 }
00151
00152 void fstream_add(float * RESTRICT a, const float * RESTRICT b,
00153 const float * RESTRICT c, int N, double *mbsize) {
00154 int j;
00155 VECTORIZEME
00156 for (j=0; j<N; j++)
00157 a[j] = b[j] + c[j];
00158
00159 *mbsize = (3L * sizeof(float) * N) / (1024.0 * 1024.0);
00160 }
00161
00162 void fstream_triad(float * RESTRICT a, const float * RESTRICT b,
00163 const float * RESTRICT c, float scalar, int N,
00164 double *mbsize) {
00165 int j;
00166 VECTORIZEME
00167 for (j=0; j<N; j++)
00168 a[j] = b[j] + scalar * c[j];
00169
00170 *mbsize = (3L * sizeof(float) * N) / (1024.0 * 1024.0);
00171 }
00172
00173
00174
00175
00176
00177 int stream_bench(int N, double *time, double *mbsec) {
00178 double *da, *db, *dc;
00179 float *fa, *fb, *fc;
00180 wkf_timerhandle timer;
00181 int rc = 0;
00182
00183 timer = wkf_timer_create();
00184
00185
00186
00187
00188 da = (double *) malloc(N * sizeof(double));
00189 db = (double *) malloc(N * sizeof(double));
00190 dc = (double *) malloc(N * sizeof(double));
00191
00192 if ((da != NULL) && (db != NULL) && (dc != NULL)) {
00193 double mbsz;
00194
00195 dstream_init(da, db, dc, N);
00196
00197 wkf_timer_start(timer);
00198 dstream_copy(da, db, N, &mbsz);
00199 wkf_timer_stop(timer);
00200 time[0] = wkf_timer_time(timer);
00201 mbsec[0] = mbsz / time[0];
00202
00203 wkf_timer_start(timer);
00204 dstream_scale(da, db, 2.0, N, &mbsz);
00205 wkf_timer_stop(timer);
00206 time[1] = wkf_timer_time(timer);
00207 mbsec[1] = mbsz / time[1];
00208
00209 wkf_timer_start(timer);
00210 dstream_add(da, db, dc, N, &mbsz);
00211 wkf_timer_stop(timer);
00212 time[2] = wkf_timer_time(timer);
00213 mbsec[2] = mbsz / time[2];
00214
00215 wkf_timer_start(timer);
00216 dstream_triad(da, db, dc, 2.0, N, &mbsz);
00217 wkf_timer_stop(timer);
00218 time[3] = wkf_timer_time(timer);
00219 mbsec[3] = mbsz / time[3];
00220 } else {
00221 rc = -1;
00222 }
00223
00224 if (da)
00225 free(da);
00226 if (db)
00227 free(db);
00228 if (dc)
00229 free(dc);
00230
00231 if (rc) {
00232 wkf_timer_destroy(timer);
00233 return rc;
00234 }
00235
00236
00237
00238
00239 fa = (float *) malloc(N * sizeof(float));
00240 fb = (float *) malloc(N * sizeof(float));
00241 fc = (float *) malloc(N * sizeof(float));
00242
00243 if ((fa != NULL) && (fb != NULL) && (fc != NULL)) {
00244 double mbsz;
00245
00246 fstream_init(fa, fb, fc, N);
00247
00248 wkf_timer_start(timer);
00249 fstream_copy(fa, fb, N, &mbsz);
00250 wkf_timer_stop(timer);
00251 time[4] = wkf_timer_time(timer);
00252 mbsec[4] = mbsz / time[4];
00253
00254 wkf_timer_start(timer);
00255 fstream_scale(fa, fb, 2.0, N, &mbsz);
00256 wkf_timer_stop(timer);
00257 time[5] = wkf_timer_time(timer);
00258 mbsec[5] = mbsz / time[5];
00259
00260 wkf_timer_start(timer);
00261 fstream_add(fa, fb, fc, N, &mbsz);
00262 wkf_timer_stop(timer);
00263 time[6] = wkf_timer_time(timer);
00264 mbsec[6] = mbsz / time[6];
00265
00266 wkf_timer_start(timer);
00267 fstream_triad(fa, fb, fc, 2.0, N, &mbsz);
00268 wkf_timer_stop(timer);
00269 time[7] = wkf_timer_time(timer);
00270 mbsec[7] = mbsz / time[7];
00271 } else {
00272 rc = -1;
00273 }
00274
00275 if (fa)
00276 free(fa);
00277 if (fb)
00278 free(fb);
00279 if (fc)
00280 free(fc);
00281
00282 wkf_timer_destroy(timer);
00283
00284 return rc;
00285 }
00286
00287
00288
00289 void vmdbench_minmax_1fv(int sz, int reps, double &runtime, double &bwmbsec) {
00290 int i;
00291 float minf=0, maxf=0;
00292
00293
00294 float *fv = (float *) malloc(sz * sizeof(float));
00295 for (i=0; i<sz; i++) {
00296 fv[i] = (float) i;
00297 }
00298
00299 wkf_timerhandle timer;
00300 timer = wkf_timer_create();
00301 wkf_timer_start(timer);
00302 int r;
00303 for (r=0; r<reps; r++)
00304 minmax_1fv_aligned(fv, sz, &minf, &maxf);
00305 wkf_timer_stop(timer);
00306 runtime = wkf_timer_time(timer);
00307
00308
00309
00310
00311 bwmbsec = (reps * sz * sizeof(float) / (1024.0 * 1024.0)) / runtime;
00312
00313
00314 free(fv);
00315 wkf_timer_destroy(timer);
00316 }
00317
00318
00319 void vmdbench_minmaxmean_1fv(int sz, int reps,
00320 double &runtime, double &bwmbsec) {
00321 int i;
00322 float minf=0, maxf=0, meanf=0;
00323
00324
00325 float *fv = (float *) malloc(sz * sizeof(float));
00326 for (i=0; i<sz; i++) {
00327 fv[i] = (float) i;
00328 }
00329
00330 wkf_timerhandle timer;
00331 timer = wkf_timer_create();
00332 wkf_timer_start(timer);
00333 int r;
00334 for (r=0; r<reps; r++)
00335 minmaxmean_1fv_aligned(fv, sz, &minf, &maxf, &meanf);
00336 wkf_timer_stop(timer);
00337 runtime = wkf_timer_time(timer);
00338
00339
00340
00341
00342 bwmbsec = (reps * sz * sizeof(float) / (1024.0 * 1024.0)) / runtime;
00343
00344
00345 free(fv);
00346 wkf_timer_destroy(timer);
00347 }
00348
00349
00350 void vmdbench_minmax_3fv(int sz, int reps, double &runtime, double &bwmbsec) {
00351 int i;
00352 float minfv[3] = { 0 }, maxfv[3] = { 0 };
00353
00354
00355 float *fv = (float *) malloc(3L * sz * sizeof(float));
00356 for (i=0; i<sz * 3L; i++) {
00357 fv[i] = (float) i;
00358 }
00359
00360 wkf_timerhandle timer;
00361 timer = wkf_timer_create();
00362 wkf_timer_start(timer);
00363 int r;
00364 for (r=0; r<reps; r++)
00365 minmax_3fv_aligned(fv, sz, minfv, maxfv);
00366 wkf_timer_stop(timer);
00367 runtime = wkf_timer_time(timer);
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377 bwmbsec = (reps * 3L * sz * sizeof(float) / (1024.0 * 1024.0)) / runtime;
00378
00379
00380 free(fv);
00381 wkf_timer_destroy(timer);
00382 }
00383
00384
00385 void vmdbench_analyze_selection(int sz, int reps,
00386 double &runtime, double &bwmbsec) {
00387 int i;
00388 int first=0, last=-1, selected=0;
00389 int *on = (int *) calloc(1, sz * sizeof(int));
00390
00391
00392 int lane=0;
00393 for (i=sz/2; i<(sz-7); i+=8) {
00394 on[i+lane] = 1;
00395 lane = (lane+1) & 0x7;
00396 }
00397
00398 wkf_timerhandle timer;
00399 timer = wkf_timer_create();
00400 wkf_timer_start(timer);
00401 int r;
00402 for (r=0; r<reps; r++)
00403 analyze_selection_aligned(sz, on, &first, &last, &selected);
00404 wkf_timer_stop(timer);
00405 runtime = wkf_timer_time(timer);
00406
00407
00408
00409 bwmbsec = (reps * sz * sizeof(int) / (1024.0 * 1024.0)) / runtime;
00410
00411
00412 free(on);
00413 wkf_timer_destroy(timer);
00414 }
00415
00416
00417
00418
00419
00420