00001
00002
00003
00004
00005
00006
00007
00008
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 #ifndef WKF_THREADS_INC
00064 #define WKF_THREADS_INC 1
00065
00066 #ifdef __cplusplus
00067 extern "C" {
00068 #endif
00069
00070
00071 #if defined(USEPOSIXTHREADS) && defined(USEUITHREADS)
00072 #error You may only define USEPOSIXTHREADS or USEUITHREADS, but not both
00073 #endif
00074
00075
00076 #if defined(_AIX) || defined(__APPLE__) || defined(_CRAY) || defined(__hpux) || defined(__irix) || defined(__linux) || defined(__osf__) || defined(__PARAGON__)
00077 #if !defined(USEUITHREADS) && !defined(USEPOSIXTHREADS)
00078 #define USEPOSIXTHREADS
00079 #endif
00080 #endif
00081
00082
00083 #if defined(SunOS)
00084 #if !defined(USEPOSIXTHREADS) && !defined(USEUITHREADS)
00085 #define USEUITHREADS
00086 #endif
00087 #endif
00088
00089
00090
00091
00092
00093 #define CPU_SMTDEPTH_UNKNOWN 0
00094 #define CPU_UNKNOWN 0x00000001
00095
00096
00097 #define CPU_HT 0x00000010
00098 #define CPU_HYPERVISOR 0x00000020
00099 #define CPU_SSE2 0x00000100
00100 #define CPU_SSE4_1 0x00000200
00101 #define CPU_F16C 0x00000400
00102 #define CPU_FMA 0x00000800
00103 #define CPU_AVX 0x00001000
00104 #define CPU_AVX2 0x00002000
00105 #define CPU_AVX512F 0x00010000
00106 #define CPU_AVX512CD 0x00020000
00107 #define CPU_AVX512ER 0x00040000
00108 #define CPU_AVX512PF 0x00080000
00109 #define CPU_KNL (CPU_AVX512F | CPU_AVX512CD | \
00110 CPU_AVX512ER | CPU_AVX512PF)
00111
00112
00113 #define CPU_ARM64_CPUID 0x00000010
00114 #define CPU_ARM64_CRC32 0x00000020
00115 #define CPU_ARM64_FP 0x00000080
00116 #define CPU_ARM64_HPFP 0x00000080
00117 #define CPU_ARM64_AES 0x00000100
00118 #define CPU_ARM64_ATOMICS 0x00000200
00119 #define CPU_ARM64_ASIMD 0x00000400
00120 #define CPU_ARM64_ASIMDDP 0x00000800
00121 #define CPU_ARM64_ASIMDHP 0x00001000
00122 #define CPU_ARM64_ASIMDRDM 0x00002000
00123 #define CPU_ARM64_ASIMDFHM 0x00004000
00124 #define CPU_ARM64_SVE 0x00008000
00125 #define CPU_ARM64_SHA512 0x00010000
00126 #define CPU_ARM64_SHA1 0x00020000
00127 #define CPU_ARM64_SHA2 0x00040000
00128 #define CPU_ARM64_SHA3 0x00080000
00129
00130 typedef struct wkf_cpu_caps_struct {
00131 unsigned int flags;
00132 int smtdepth;
00133 } wkf_cpu_caps_t;
00134
00135
00136 #ifdef WKFTHREADS
00137 #ifdef USEPOSIXTHREADS
00138 #include <pthread.h>
00139
00140 typedef pthread_t wkf_thread_t;
00141 typedef pthread_mutex_t wkf_mutex_t;
00142 typedef pthread_cond_t wkf_cond_t;
00143
00144 typedef struct rwlock_struct {
00145 pthread_mutex_t lock;
00146 int rwlock;
00147 pthread_cond_t rdrs_ok;
00148 unsigned int waiting_writers;
00149 pthread_cond_t wrtr_ok;
00150 } wkf_rwlock_t;
00151
00152 #endif
00153
00154 #ifdef USEUITHREADS
00155 #include <thread.h>
00156
00157 typedef thread_t wkf_thread_t;
00158 typedef mutex_t wkf_mutex_t;
00159 typedef cond_t wkf_cond_t;
00160 typedef rwlock_t wkf_rwlock_t;
00161 #endif
00162
00163
00164 #ifdef _MSC_VER
00165 #include <windows.h>
00166 typedef HANDLE wkf_thread_t;
00167 typedef CRITICAL_SECTION wkf_mutex_t;
00168
00169 #if 0 && (NTDDI_VERSION >= NTDDI_WS08 || _WIN32_WINNT > 0x0600)
00170
00171 #define WKFUSEWIN2008CONDVARS 1
00172 typedef CONDITION_VARIABLE wkf_cond_t;
00173 #else
00174
00175
00176
00177
00178
00179
00180 #if 1
00181 #define WKFUSEINTERLOCKEDATOMICOPS 1
00182 #endif
00183 #define WKF_COND_SIGNAL 0
00184 #define WKF_COND_BROADCAST 1
00185 typedef struct {
00186 LONG waiters;
00188 CRITICAL_SECTION waiters_lock;
00189 HANDLE events[2];
00190 } wkf_cond_t;
00191 #endif
00192
00193 typedef struct rwlock_struct {
00194 wkf_mutex_t lock;
00195 int rwlock;
00196 wkf_cond_t rdrs_ok;
00197 unsigned int waiting_writers;
00198 wkf_cond_t wrtr_ok;
00199 } wkf_rwlock_t;
00200
00201 #endif
00202 #endif
00203
00204
00205 #ifndef WKFTHREADS
00206 typedef int wkf_thread_t;
00207 typedef int wkf_mutex_t;
00208 typedef int wkf_cond_t;
00209 typedef int wkf_rwlock_t;
00210 #endif
00211
00212 #if defined(USENETBSDATOMICS)
00213 #include <sys/atomic.h>
00214 #elif defined(USESOLARISATOMICS)
00215 #include <atomic.h>
00216 #endif
00217
00218 typedef struct atomic_int_struct {
00219 int padding1[8];
00220 wkf_mutex_t lock;
00221 #if defined(USENETBSDATOMICS)
00222 unsigned int val;
00223 #elif defined(USESOLARISATOMICS)
00224 unsigned int val;
00225 #elif defined(USEWIN32ATOMICS)
00226 LONG val;
00227 #else
00228 int val;
00229 #endif
00230 int padding2[8];
00231 } wkf_atomic_int_t;
00232
00233
00234 typedef struct barrier_struct {
00235 int padding1[8];
00236 wkf_mutex_t lock;
00237 int n_clients;
00238 int n_waiting;
00239 int phase;
00240 int sum;
00241 int result;
00242 wkf_cond_t wait_cv;
00243 int padding2[8];
00244 } wkf_barrier_t;
00245
00246 typedef struct wkf_run_barrier_struct {
00247 int padding1[8];
00248 wkf_mutex_t lock;
00249 int n_clients;
00250 int n_waiting;
00251 int phase;
00252 void * (*fctn)(void *);
00253 void * parms;
00254 void * (*rslt)(void *);
00255 void * rsltparms;
00256 wkf_cond_t wait_cv;
00257 int padding2[8];
00258 } wkf_run_barrier_t;
00259
00260
00261
00262
00263
00265 int wkf_thread_numphysprocessors(void);
00266
00268 int wkf_thread_numprocessors(void);
00269
00271 int wkf_cpu_capability_flags(wkf_cpu_caps_t *cpucaps);
00272
00274 int * wkf_cpu_affinitylist(int *cpuaffinitycount);
00275
00277 int wkf_thread_set_self_cpuaffinity(int cpu);
00278
00280 int wkf_thread_setconcurrency(int);
00281
00282
00283
00284
00285
00287 int wkf_thread_create(wkf_thread_t *, void * fctn(void *), void *);
00288
00290 int wkf_thread_join(wkf_thread_t, void **);
00291
00292
00293
00294
00295
00297 int wkf_mutex_init(wkf_mutex_t *);
00298
00300 int wkf_mutex_lock(wkf_mutex_t *);
00301
00303 int wkf_mutex_trylock(wkf_mutex_t *);
00304
00306 int wkf_mutex_spin_lock(wkf_mutex_t *);
00307
00309 int wkf_mutex_unlock(wkf_mutex_t *);
00310
00312 int wkf_mutex_destroy(wkf_mutex_t *);
00313
00314
00315
00316
00317
00319 int wkf_cond_init(wkf_cond_t *);
00320
00322 int wkf_cond_destroy(wkf_cond_t *);
00323
00325 int wkf_cond_wait(wkf_cond_t *, wkf_mutex_t *);
00326
00328 int wkf_cond_signal(wkf_cond_t *);
00329
00331 int wkf_cond_broadcast(wkf_cond_t *);
00332
00333
00334
00335
00336
00338 int wkf_atomic_int_init(wkf_atomic_int_t * atomp, int val);
00339
00341 int wkf_atomic_int_destroy(wkf_atomic_int_t * atomp);
00342
00344 int wkf_atomic_int_set(wkf_atomic_int_t * atomp, int val);
00345
00347 int wkf_atomic_int_get(wkf_atomic_int_t * atomp);
00348
00350 int wkf_atomic_int_fetch_and_add(wkf_atomic_int_t * atomp, int inc);
00351
00353 int wkf_atomic_int_add_and_fetch(wkf_atomic_int_t * atomp, int inc);
00354
00355
00356
00357
00358
00360 int wkf_rwlock_init(wkf_rwlock_t *);
00361
00363 int wkf_rwlock_readlock(wkf_rwlock_t *);
00364
00366 int wkf_rwlock_writelock(wkf_rwlock_t *);
00367
00369 int wkf_rwlock_unlock(wkf_rwlock_t *);
00370
00371
00372
00373
00374
00376 wkf_barrier_t * wkf_thread_barrier_init(int n_clients);
00377
00386 int wkf_thread_barrier_init_proc_shared(wkf_barrier_t *, int n_clients);
00387
00389 void wkf_thread_barrier_destroy(wkf_barrier_t *barrier);
00390
00392 int wkf_thread_barrier(wkf_barrier_t *barrier, int increment);
00393
00394
00395
00396
00397
00398
00400 int wkf_thread_run_barrier_init(wkf_run_barrier_t *barrier, int n_clients);
00401
00403 void wkf_thread_run_barrier_destroy(wkf_run_barrier_t *barrier);
00404
00406 void * (*wkf_thread_run_barrier(wkf_run_barrier_t *barrier,
00407 void * fctn(void*),
00408 void * parms,
00409 void **rsltparms))(void *);
00410
00412 int wkf_thread_run_barrier_poll(wkf_run_barrier_t *barrier);
00413
00414
00420 typedef struct wkf_tasktile_struct {
00421 int start;
00422 int end;
00423 } wkf_tasktile_t;
00424
00425
00426
00427
00428
00429 #define WKF_TILESTACK_EMPTY -1
00430
00434 typedef struct {
00435 wkf_mutex_t mtx;
00436 int growthrate;
00437 int size;
00438 int top;
00439 wkf_tasktile_t *s;
00440 } wkf_tilestack_t;
00441
00443 int wkf_tilestack_init(wkf_tilestack_t *s, int size);
00444
00446 void wkf_tilestack_destroy(wkf_tilestack_t *);
00447
00449 int wkf_tilestack_compact(wkf_tilestack_t *);
00450
00452 int wkf_tilestack_push(wkf_tilestack_t *, const wkf_tasktile_t *);
00453
00455 int wkf_tilestack_pop(wkf_tilestack_t *, wkf_tasktile_t *);
00456
00458 int wkf_tilestack_popall(wkf_tilestack_t *);
00459
00461 int wkf_tilestack_empty(wkf_tilestack_t *);
00462
00463
00469 #define WKF_SCHED_DONE -1
00470 #define WKF_SCHED_CONTINUE 0
00473 typedef struct wkf_shared_iterator_struct {
00474 wkf_mutex_t mtx;
00475 int start;
00476 int end;
00477 int current;
00478 int fatalerror;
00479 } wkf_shared_iterator_t;
00480
00482 int wkf_shared_iterator_init(wkf_shared_iterator_t *it);
00483
00485 int wkf_shared_iterator_destroy(wkf_shared_iterator_t *it);
00486
00488 int wkf_shared_iterator_set(wkf_shared_iterator_t *it, wkf_tasktile_t *tile);
00489
00496 int wkf_shared_iterator_next_tile(wkf_shared_iterator_t *it, int reqsize,
00497 wkf_tasktile_t *tile);
00498
00500 int wkf_shared_iterator_setfatalerror(wkf_shared_iterator_t *it);
00501
00503 int wkf_shared_iterator_getfatalerror(wkf_shared_iterator_t *it);
00504
00505
00506
00507
00508
00510 #define WKF_THREADPOOL_DEVLIST_CPUSONLY NULL
00511
00513 #define WKF_THREADPOOL_DEVID_CPU -1
00514
00516 typedef struct wkf_threadpool_workerdata_struct {
00517 int padding1[8];
00518 wkf_shared_iterator_t *iter;
00519 wkf_tilestack_t *errorstack;
00520 int threadid;
00521 int threadcount;
00522 int devid;
00523 float devspeed;
00524 void *parms;
00525 void *thrpool;
00526 int padding2[8];
00527 } wkf_threadpool_workerdata_t;
00528
00529 typedef struct wkf_threadpool_struct {
00530 int workercount;
00531 int *devlist;
00532 wkf_shared_iterator_t iter;
00533 wkf_tilestack_t errorstack;
00534 wkf_thread_t *threads;
00535 wkf_threadpool_workerdata_t *workerdata;
00536 wkf_run_barrier_t runbar;
00537 } wkf_threadpool_t;
00538
00540 wkf_threadpool_t * wkf_threadpool_create(int workercount, int *devlist);
00541
00543 int wkf_threadpool_launch(wkf_threadpool_t *thrpool,
00544 void *fctn(void *), void *parms, int blocking);
00545
00547 int wkf_threadpool_wait(wkf_threadpool_t *thrpool);
00548
00550 int wkf_threadpool_destroy(wkf_threadpool_t *thrpool);
00551
00553 int wkf_threadpool_get_workercount(wkf_threadpool_t *thrpool);
00554
00556 int wkf_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount);
00557
00559 int wkf_threadpool_worker_getdevid(void *voiddata, int *devid);
00560
00567 int wkf_threadpool_worker_setdevspeed(void *voiddata, float speed);
00568
00573 int wkf_threadpool_worker_getdevspeed(void *voiddata, float *speed);
00574
00579 int wkf_threadpool_worker_devscaletile(void *voiddata, int *tilesize);
00580
00582 int wkf_threadpool_worker_getdata(void *voiddata, void **clientdata);
00583
00585 int wkf_threadpool_sched_dynamic(wkf_threadpool_t *thrpool, wkf_tasktile_t *tile);
00586
00591 int wkf_threadpool_next_tile(void *thrpool, int reqsize, wkf_tasktile_t *tile);
00592
00597 int wkf_threadpool_tile_failed(void *thrpool, wkf_tasktile_t *tile);
00598
00600 int wkf_threadpool_setfatalerror(void *thrparms);
00601
00603 int wkf_threadpool_getfatalerror(void *thrparms);
00604
00605
00613 typedef struct wkf_threadlaunch_struct {
00614 int padding1[8];
00615 wkf_shared_iterator_t *iter;
00616 int threadid;
00617 int threadcount;
00618 void * clientdata;
00619 int padding2[8];
00620 } wkf_threadlaunch_t;
00621
00623 int wkf_threadlaunch(int numprocs, void *clientdata, void * fctn(void *),
00624 wkf_tasktile_t *tile);
00625
00627 int wkf_threadlaunch_getid(void *thrparms, int *threadid, int *threadcount);
00628
00630 int wkf_threadlaunch_getdata(void *thrparms, void **clientdata);
00631
00636 int wkf_threadlaunch_next_tile(void *voidparms, int reqsize,
00637 wkf_tasktile_t *tile);
00638
00640 int wkf_threadlaunch_setfatalerror(void *thrparms);
00641
00642
00643 #ifdef __cplusplus
00644 }
00645 #endif
00646
00647 #endif