#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cuda.h>
#include "Inform.h"
#include "WKFThreads.h"
#include "WKFUtils.h"
#include "CUDAKernels.h"
#include "Measure.h"
Go to the source code of this file.
Compounds | |
struct | busbwthrparms |
struct | globmembwthrparms |
struct | latthrparms |
struct | maddthrparms |
Defines | |
#define | RESTRICT __restrict__ |
#define | CUERR |
#define | FMADD16 |
#define | GRIDSIZEX 6144 |
#define | BLOCKSIZEX 64 |
#define | GLOOPS 2000 |
#define | FMADD16COUNT 32 |
#define | FLOPSPERFMADD16 32 |
#define | FLOPSPERLOOP (FMADD16COUNT * FLOPSPERFMADD16) |
#define | BWITER 500 |
#define | LATENCYITER 50000 |
Typedefs | |
typedef float4 | datatype |
Functions | |
__global__ void | madd_kernel (float *doutput) |
int | cudamaddgflops (int cudadev, double *gflops, int testloops) |
void * | cudamaddthread (void *voidparms) |
int | vmd_cuda_madd_gflops (int numdevs, int *devlist, double *gflops, int testloops) |
int | cudabusbw (int cudadev, double *hdmbsec, double *hdlatusec, double *phdmbsec, double *phdlatusec, double *dhmbsec, double *dhlatusec, double *pdhmbsec, double *pdhlatusec) |
void * | cudabusbwthread (void *voidparms) |
int | vmd_cuda_bus_bw (int numdevs, int *devlist, double *hdmbsec, double *hdlatusec, double *phdmbsec, double *phdlatusec, double *dhmbsec, double *dhlatusec, double *pdhmbsec, double *pdhlatusec) |
template<class T> __global__ void | gpuglobmemcpybw (T *dest, const T *src) |
template<class T> __global__ void | gpuglobmemsetbw (T *dest, const T val) |
int | cudaglobmembw (int cudadev, double *gpumemsetgbsec, double *gpumemcpygbsec) |
void * | cudaglobmembwthread (void *voidparms) |
int | vmd_cuda_globmem_bw (int numdevs, int *devlist, double *memsetgbsec, double *memcpygbsec) |
void * | vmddevpoollatencythread (void *voidparms) |
void * | vmddevpooltilelatencythread (void *voidparms) |
__global__ void | nopkernel (float *ddata) |
__global__ void | voidkernel (void) |
void * | vmddevpoolcudatilelatencythread (void *voidparms) |
int | vmd_cuda_devpool_latency (wkf_threadpool_t *devpool, int tilesize, double *kernlaunchlatency, double *barlatency, double *cyclelatency, double *tilelatency, double *kernellatency) |
void * | vmddevpoolcudalatencythread (void *voidparms) |
int | vmd_cuda_measure_latencies (wkf_threadpool_t *devpool) |
int | gpu_ooc_bench (wkf_threadpool_t *devpool, int nfiles, const char **trjfileset, const AtomSel *sel, int first, int last, int step) |
Definition in file CUDABench.cu.
|
Definition at line 88 of file CUDABench.cu. Referenced by cudamaddgflops. |
|
Definition at line 277 of file CUDABench.cu. Referenced by cudabusbw. |
|
Value: { cudaError_t err; \ if ((err = cudaGetLastError()) != cudaSuccess) { \ printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); \ return -1; }} Definition at line 54 of file CUDABench.cu. Referenced by cudabusbw, cudaglobmembw, cudamaddgflops, Msmpot_cuda_compute_latcut, Msmpot_cuda_compute_shortrng, Msmpot_cuda_setup_latcut, and Msmpot_cuda_setup_shortrng. |
|
Definition at line 91 of file CUDABench.cu. |
|
Definition at line 94 of file CUDABench.cu. Referenced by cudamaddgflops. |
|
Value: tmp0 = tmp0*tmp4+tmp7; \ tmp1 = tmp1*tmp5+tmp0; \ tmp2 = tmp2*tmp6+tmp1; \ tmp3 = tmp3*tmp7+tmp2; \ tmp4 = tmp4*tmp0+tmp3; \ tmp5 = tmp5*tmp1+tmp4; \ tmp6 = tmp6*tmp2+tmp5; \ tmp7 = tmp7*tmp3+tmp6; \ tmp8 = tmp8*tmp12+tmp15; \ tmp9 = tmp9*tmp13+tmp8; \ tmp10 = tmp10*tmp14+tmp9; \ tmp11 = tmp11*tmp15+tmp10; \ tmp12 = tmp12*tmp8+tmp11; \ tmp13 = tmp13*tmp9+tmp12; \ tmp14 = tmp14*tmp10+tmp13; \ tmp15 = tmp15*tmp11+tmp14; Definition at line 68 of file CUDABench.cu. Referenced by madd_kernel. |
|
Definition at line 90 of file CUDABench.cu. |
|
Definition at line 89 of file CUDABench.cu. Referenced by cudamaddgflops, and madd_kernel. |
|
Definition at line 87 of file CUDABench.cu. Referenced by VolMapCreateDistance::compute_frame, VolMapCreateOccupancy::compute_frame, VolMapCreateInterp::compute_frame, VolMapCreateDensity::compute_frame, VolMapCreateMask::compute_frame, and cudamaddgflops. |
|
Definition at line 278 of file CUDABench.cu. Referenced by cudabusbw. |
|
Definition at line 37 of file CUDABench.cu. |
|
Definition at line 536 of file CUDABench.cu. Referenced by cudaglobmembw. |
|
Definition at line 280 of file CUDABench.cu. References BWITER, CUERR, LATENCYITER, NULL, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle. Referenced by cudabusbwthread. |
|
Definition at line 445 of file CUDABench.cu. References cudabusbw, busbwthrparms::deviceid, busbwthrparms::dhlatusec, busbwthrparms::dhmbsec, busbwthrparms::hdlatusec, busbwthrparms::hdmbsec, NULL, busbwthrparms::pdhlatusec, busbwthrparms::pdhmbsec, busbwthrparms::phdlatusec, and busbwthrparms::phdmbsec. Referenced by vmd_cuda_bus_bw. |
|
Definition at line 538 of file CUDABench.cu. References CUERR, datatype, and make_float4. Referenced by cudaglobmembwthread. |
|
Definition at line 634 of file CUDABench.cu. References cudaglobmembw, globmembwthrparms::deviceid, globmembwthrparms::memcpygbsec, globmembwthrparms::memsetgbsec, and NULL. Referenced by vmd_cuda_globmem_bw. |
|
Definition at line 147 of file CUDABench.cu. References BLOCKSIZEX, CUERR, FLOPSPERLOOP, GLOOPS, GRIDSIZEX, NULL, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle. Referenced by cudamaddthread. |
|
Definition at line 213 of file CUDABench.cu. References cudamaddgflops, maddthrparms::deviceid, maddthrparms::gflops, NULL, and maddthrparms::testloops. Referenced by vmd_cuda_madd_gflops. |
|
Definition at line 1213 of file CUDABench.cu. References wkf_tasktile_struct::end, fio_fd, NULL, AtomSel::num_atoms, wkf_tasktile_struct::start, wkf_threadpool_create, wkf_threadpool_destroy, wkf_threadpool_launch, wkf_threadpool_sched_dynamic, wkf_timer_create, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle. |
|
Definition at line 525 of file CUDABench.cu. |
|
Definition at line 531 of file CUDABench.cu. |
|
Definition at line 96 of file CUDABench.cu. |
|
Definition at line 715 of file CUDABench.cu. References NULL. |
|
|
|
Definition at line 640 of file CUDABench.cu. References cudaglobmembwthread, globmembwthrparms::deviceid, globmembwthrparms::memcpygbsec, globmembwthrparms::memsetgbsec, NULL, wkf_thread_create, wkf_thread_join, and wkf_thread_t. |
|
Definition at line 219 of file CUDABench.cu. References cudamaddthread, maddthrparms::deviceid, maddthrparms::gflops, NULL, maddthrparms::testloops, wkf_thread_create, wkf_thread_join, and wkf_thread_t. |
|
Definition at line 909 of file CUDABench.cu. References latthrparms::deviceid, latthrparms::testloops, vmddevpoolcudalatencythread, wkf_threadpool_get_workercount, and wkf_threadpool_launch. |
|
Definition at line 854 of file CUDABench.cu. References latthrparms::deviceid, latthrparms::kernlatency, NULL, latthrparms::testloops, wkf_threadpool_worker_getdata, wkf_threadpool_worker_getid, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle. Referenced by vmd_cuda_measure_latencies. |
|
Definition at line 729 of file CUDABench.cu. References NULL, WKF_SCHED_DONE, wkf_threadpool_next_tile, wkf_threadpool_worker_devscaletile, wkf_threadpool_worker_getdata, and wkf_threadpool_worker_getid. Referenced by vmd_cuda_devpool_latency. |
|
Definition at line 693 of file CUDABench.cu. References NULL. Referenced by vmd_cuda_devpool_latency. |
|
Definition at line 697 of file CUDABench.cu. References NULL, WKF_SCHED_DONE, wkf_threadpool_next_tile, wkf_threadpool_worker_getdata, and wkf_threadpool_worker_getid. Referenced by vmd_cuda_devpool_latency. |
|
Definition at line 725 of file CUDABench.cu. |