Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

CUDABench.cu File Reference

Short benchmark kernels to measure GPU performance. More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cuda.h>
#include "Inform.h"
#include "WKFThreads.h"
#include "WKFUtils.h"
#include "CUDAKernels.h"
#include "Measure.h"

Go to the source code of this file.

Compounds

struct  busbwthrparms
struct  globmembwthrparms
struct  latthrparms
struct  maddthrparms

Defines

#define RESTRICT   __restrict__
#define CUERR
#define FMADD16
#define GRIDSIZEX   6144
#define BLOCKSIZEX   64
#define GLOOPS   2000
#define FMADD16COUNT   32
#define FLOPSPERFMADD16   32
#define FLOPSPERLOOP   (FMADD16COUNT * FLOPSPERFMADD16)
#define BWITER   500
#define LATENCYITER   50000

Typedefs

typedef float4 datatype

Functions

__global__ void madd_kernel (float *doutput)
int cudamaddgflops (int cudadev, double *gflops, int testloops)
void * cudamaddthread (void *voidparms)
int vmd_cuda_madd_gflops (int numdevs, int *devlist, double *gflops, int testloops)
int cudabusbw (int cudadev, double *hdmbsec, double *hdlatusec, double *phdmbsec, double *phdlatusec, double *dhmbsec, double *dhlatusec, double *pdhmbsec, double *pdhlatusec)
void * cudabusbwthread (void *voidparms)
int vmd_cuda_bus_bw (int numdevs, int *devlist, double *hdmbsec, double *hdlatusec, double *phdmbsec, double *phdlatusec, double *dhmbsec, double *dhlatusec, double *pdhmbsec, double *pdhlatusec)
template<class T> __global__ void gpuglobmemcpybw (T *dest, const T *src)
template<class T> __global__ void gpuglobmemsetbw (T *dest, const T val)
int cudaglobmembw (int cudadev, double *gpumemsetgbsec, double *gpumemcpygbsec)
void * cudaglobmembwthread (void *voidparms)
int vmd_cuda_globmem_bw (int numdevs, int *devlist, double *memsetgbsec, double *memcpygbsec)
void * vmddevpoollatencythread (void *voidparms)
void * vmddevpooltilelatencythread (void *voidparms)
__global__ void nopkernel (float *ddata)
__global__ void voidkernel (void)
void * vmddevpoolcudatilelatencythread (void *voidparms)
int vmd_cuda_devpool_latency (wkf_threadpool_t *devpool, int tilesize, double *kernlaunchlatency, double *barlatency, double *cyclelatency, double *tilelatency, double *kernellatency)
void * vmddevpoolcudalatencythread (void *voidparms)
int vmd_cuda_measure_latencies (wkf_threadpool_t *devpool)
int gpu_ooc_bench (wkf_threadpool_t *devpool, int nfiles, const char **trjfileset, const AtomSel *sel, int first, int last, int step)


Detailed Description

Short benchmark kernels to measure GPU performance.

Definition in file CUDABench.cu.


Define Documentation

#define BLOCKSIZEX   64
 

Definition at line 88 of file CUDABench.cu.

Referenced by cudamaddgflops.

#define BWITER   500
 

Definition at line 277 of file CUDABench.cu.

Referenced by cudabusbw.

#define CUERR
 

Value:

{ cudaError_t err; \
  if ((err = cudaGetLastError()) != cudaSuccess) { \
  printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); \
  return -1; }}

Definition at line 54 of file CUDABench.cu.

Referenced by cudabusbw, cudaglobmembw, cudamaddgflops, Msmpot_cuda_compute_latcut, Msmpot_cuda_compute_shortrng, Msmpot_cuda_setup_latcut, and Msmpot_cuda_setup_shortrng.

#define FLOPSPERFMADD16   32
 

Definition at line 91 of file CUDABench.cu.

#define FLOPSPERLOOP   (FMADD16COUNT * FLOPSPERFMADD16)
 

Definition at line 94 of file CUDABench.cu.

Referenced by cudamaddgflops.

#define FMADD16
 

Value:

tmp0  = tmp0*tmp4+tmp7;     \
    tmp1  = tmp1*tmp5+tmp0;     \
    tmp2  = tmp2*tmp6+tmp1;     \
    tmp3  = tmp3*tmp7+tmp2;     \
    tmp4  = tmp4*tmp0+tmp3;     \
    tmp5  = tmp5*tmp1+tmp4;     \
    tmp6  = tmp6*tmp2+tmp5;     \
    tmp7  = tmp7*tmp3+tmp6;     \
    tmp8  = tmp8*tmp12+tmp15;   \
    tmp9  = tmp9*tmp13+tmp8;    \
    tmp10 = tmp10*tmp14+tmp9;   \
    tmp11 = tmp11*tmp15+tmp10;  \
    tmp12 = tmp12*tmp8+tmp11;   \
    tmp13 = tmp13*tmp9+tmp12;   \
    tmp14 = tmp14*tmp10+tmp13;  \
    tmp15 = tmp15*tmp11+tmp14;

Definition at line 68 of file CUDABench.cu.

Referenced by madd_kernel.

#define FMADD16COUNT   32
 

Definition at line 90 of file CUDABench.cu.

#define GLOOPS   2000
 

Definition at line 89 of file CUDABench.cu.

Referenced by cudamaddgflops, and madd_kernel.

#define GRIDSIZEX   6144
 

Definition at line 87 of file CUDABench.cu.

Referenced by VolMapCreateDistance::compute_frame, VolMapCreateOccupancy::compute_frame, VolMapCreateInterp::compute_frame, VolMapCreateDensity::compute_frame, VolMapCreateMask::compute_frame, and cudamaddgflops.

#define LATENCYITER   50000
 

Definition at line 278 of file CUDABench.cu.

Referenced by cudabusbw.

#define RESTRICT   __restrict__
 

Definition at line 37 of file CUDABench.cu.


Typedef Documentation

typedef float4 datatype
 

Definition at line 536 of file CUDABench.cu.

Referenced by cudaglobmembw.


Function Documentation

int cudabusbw int    cudadev,
double *    hdmbsec,
double *    hdlatusec,
double *    phdmbsec,
double *    phdlatusec,
double *    dhmbsec,
double *    dhlatusec,
double *    pdhmbsec,
double *    pdhlatusec
[static]
 

Definition at line 280 of file CUDABench.cu.

References BWITER, CUERR, LATENCYITER, NULL, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle.

Referenced by cudabusbwthread.

void* cudabusbwthread void *    voidparms [static]
 

Definition at line 445 of file CUDABench.cu.

References cudabusbw, busbwthrparms::deviceid, busbwthrparms::dhlatusec, busbwthrparms::dhmbsec, busbwthrparms::hdlatusec, busbwthrparms::hdmbsec, NULL, busbwthrparms::pdhlatusec, busbwthrparms::pdhmbsec, busbwthrparms::phdlatusec, and busbwthrparms::phdmbsec.

Referenced by vmd_cuda_bus_bw.

int cudaglobmembw int    cudadev,
double *    gpumemsetgbsec,
double *    gpumemcpygbsec
[static]
 

Definition at line 538 of file CUDABench.cu.

References CUERR, datatype, and make_float4.

Referenced by cudaglobmembwthread.

void* cudaglobmembwthread void *    voidparms [static]
 

Definition at line 634 of file CUDABench.cu.

References cudaglobmembw, globmembwthrparms::deviceid, globmembwthrparms::memcpygbsec, globmembwthrparms::memsetgbsec, and NULL.

Referenced by vmd_cuda_globmem_bw.

int cudamaddgflops int    cudadev,
double *    gflops,
int    testloops
[static]
 

Definition at line 147 of file CUDABench.cu.

References BLOCKSIZEX, CUERR, FLOPSPERLOOP, GLOOPS, GRIDSIZEX, NULL, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle.

Referenced by cudamaddthread.

void* cudamaddthread void *    voidparms [static]
 

Definition at line 213 of file CUDABench.cu.

References cudamaddgflops, maddthrparms::deviceid, maddthrparms::gflops, NULL, and maddthrparms::testloops.

Referenced by vmd_cuda_madd_gflops.

int gpu_ooc_bench wkf_threadpool_t   devpool,
int    nfiles,
const char **    trjfileset,
const AtomSel   sel,
int    first,
int    last,
int    step
 

Definition at line 1213 of file CUDABench.cu.

References wkf_tasktile_struct::end, fio_fd, NULL, AtomSel::num_atoms, wkf_tasktile_struct::start, wkf_threadpool_create, wkf_threadpool_destroy, wkf_threadpool_launch, wkf_threadpool_sched_dynamic, wkf_timer_create, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle.

template<class T>
__global__ void gpuglobmemcpybw T *    dest,
const T *    src
 

Definition at line 525 of file CUDABench.cu.

template<class T>
__global__ void gpuglobmemsetbw T *    dest,
const T    val
 

Definition at line 531 of file CUDABench.cu.

__global__ void madd_kernel float *    doutput [static]
 

Definition at line 96 of file CUDABench.cu.

References FMADD16, and GLOOPS.

__global__ void nopkernel float *    ddata [static]
 

Definition at line 715 of file CUDABench.cu.

References NULL.

int vmd_cuda_bus_bw int    numdevs,
int *    devlist,
double *    hdmbsec,
double *    hdlatusec,
double *    phdmbsec,
double *    phdlatusec,
double *    dhmbsec,
double *    dhlatusec,
double *    pdhmbsec,
double *    pdhlatusec
 

Definition at line 455 of file CUDABench.cu.

References cudabusbwthread, busbwthrparms::deviceid, busbwthrparms::dhlatusec, busbwthrparms::dhmbsec, busbwthrparms::hdlatusec, busbwthrparms::hdmbsec, NULL, busbwthrparms::pdhlatusec, busbwthrparms::pdhmbsec, busbwthrparms::phdlatusec, busbwthrparms::phdmbsec, wkf_thread_create, wkf_thread_join, and wkf_thread_t.

int vmd_cuda_devpool_latency wkf_threadpool_t   devpool,
int    tilesize,
double *    kernlaunchlatency,
double *    barlatency,
double *    cyclelatency,
double *    tilelatency,
double *    kernellatency
 

Definition at line 761 of file CUDABench.cu.

References wkf_tasktile_struct::end, NULL, wkf_tasktile_struct::start, vmd_cuda_measure_latencies, vmddevpoolcudatilelatencythread, vmddevpoollatencythread, vmddevpooltilelatencythread, wkf_threadpool_launch, wkf_threadpool_sched_dynamic, wkf_threadpool_wait, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle.

int vmd_cuda_globmem_bw int    numdevs,
int *    devlist,
double *    memsetgbsec,
double *    memcpygbsec
 

Definition at line 640 of file CUDABench.cu.

References cudaglobmembwthread, globmembwthrparms::deviceid, globmembwthrparms::memcpygbsec, globmembwthrparms::memsetgbsec, NULL, wkf_thread_create, wkf_thread_join, and wkf_thread_t.

int vmd_cuda_madd_gflops int    numdevs,
int *    devlist,
double *    gflops,
int    testloops
 

Definition at line 219 of file CUDABench.cu.

References cudamaddthread, maddthrparms::deviceid, maddthrparms::gflops, NULL, maddthrparms::testloops, wkf_thread_create, wkf_thread_join, and wkf_thread_t.

int vmd_cuda_measure_latencies wkf_threadpool_t   devpool
 

Definition at line 909 of file CUDABench.cu.

References latthrparms::deviceid, latthrparms::testloops, vmddevpoolcudalatencythread, wkf_threadpool_get_workercount, and wkf_threadpool_launch.

void* vmddevpoolcudalatencythread void *    voidparms [static]
 

Definition at line 854 of file CUDABench.cu.

References latthrparms::deviceid, latthrparms::kernlatency, NULL, latthrparms::testloops, wkf_threadpool_worker_getdata, wkf_threadpool_worker_getid, wkf_timer_create, wkf_timer_destroy, wkf_timer_start, wkf_timer_stop, wkf_timer_time, and wkf_timerhandle.

Referenced by vmd_cuda_measure_latencies.

void* vmddevpoolcudatilelatencythread void *    voidparms [static]
 

Definition at line 729 of file CUDABench.cu.

References NULL, WKF_SCHED_DONE, wkf_threadpool_next_tile, wkf_threadpool_worker_devscaletile, wkf_threadpool_worker_getdata, and wkf_threadpool_worker_getid.

Referenced by vmd_cuda_devpool_latency.

void* vmddevpoollatencythread void *    voidparms [static]
 

Definition at line 693 of file CUDABench.cu.

References NULL.

Referenced by vmd_cuda_devpool_latency.

void* vmddevpooltilelatencythread void *    voidparms [static]
 

Definition at line 697 of file CUDABench.cu.

References NULL, WKF_SCHED_DONE, wkf_threadpool_next_tile, wkf_threadpool_worker_getdata, and wkf_threadpool_worker_getid.

Referenced by vmd_cuda_devpool_latency.

__global__ void voidkernel void    [static]
 

Definition at line 725 of file CUDABench.cu.


Generated on Fri Nov 8 02:45:56 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002