00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 #include <stdio.h>
00064 #include <stdlib.h>
00065 #include <string.h>
00066 
00071 #if defined(__linux)
00072 #define _GNU_SOURCE 1
00073 #include <sched.h>
00074 #endif
00075 
00076 #include "WKFThreads.h"
00077 
00078 
00079 #if (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) || (defined(_MSC_VER) && (_MSC_VER >= 1916)) 
00080 #include <emmintrin.h>
00081 #include <immintrin.h>
00082 #endif
00083 
00084 #ifdef _MSC_VER
00085 #if 0
00086 #define WKFUSENEWWIN32APIS 1
00087 #define _WIN32_WINNT 0x0400 
00088 #define  WINVER      0x0400 
00089 #endif
00090 #include <windows.h>        
00091 #include <winbase.h>        
00092 #endif
00093 
00094 #if defined(_AIX) || defined(_CRAY) || defined(__irix) || defined(__linux) || defined(__osf__) || defined(__sun)
00095 #include <unistd.h>         
00096 #endif
00097 
00098 #if defined(__APPLE__) && defined(WKFTHREADS)
00099 #if 1
00100 #include <sys/types.h>
00101 #include <sys/sysctl.h>     
00102 #else
00103 #include <Carbon/Carbon.h>  
00104 #endif
00105 #endif
00106 
00107 #if defined(__linux) && (defined(ARCH_LINUXARM64) || defined(__ARM_ARCH_ISA_A64) || defined(__ARM_NEON))
00108 #include <sys/auxv.h>       
00109 #endif
00110 
00111 #if defined(__hpux)
00112 #include <sys/mpctl.h>      
00113 #endif
00114 
00115 
00116 #ifdef __cplusplus
00117 extern "C" {
00118 #endif
00119 
00120 int wkf_thread_numphysprocessors(void) {
00121   int a=1;
00122 
00123 #ifdef WKFTHREADS
00124 #if defined(__APPLE__)
00125 #if 1
00126   int rc;
00127   int mib[2];
00128   u_int miblen;
00129   size_t alen = sizeof(a);
00130   mib[0] = CTL_HW;
00131   mib[1] = HW_AVAILCPU;
00132   miblen = 2;
00133   rc = sysctl(mib, miblen, &a, &alen, NULL, 0); 
00134   if (rc < 0) {
00135     perror("Error during sysctl() query for CPU count");
00136     a = 1;
00137   }
00138 #else
00139   a = MPProcessorsScheduled();       
00140 #endif
00141 #endif
00142 
00143 #ifdef _MSC_VER
00144   struct _SYSTEM_INFO sysinfo;
00145   GetSystemInfo(&sysinfo);
00146   a = sysinfo.dwNumberOfProcessors;  
00147 #endif 
00148 
00149 #if defined(__PARAGON__) 
00150   a=2; 
00151 #endif  
00152 
00153 #if defined(_CRAY)
00154   a = sysconf(_SC_CRAY_NCPU);        
00155 #endif
00156 
00157 #if defined(ANDROID) || defined(USEPHYSCPUCOUNT)
00158   
00159   
00160   
00161   
00162   
00163   
00164   
00165 
00166   
00167   
00168   a = sysconf(_SC_NPROCESSORS_CONF); 
00170   
00171   
00172   {
00173     int rc=0, b=1, i=-1, j=-1;
00174     FILE *ifp;
00175 
00176     ifp = fopen("/sys/devices/system/cpu/present", "r");
00177     if (ifp != NULL) {
00178       rc = fscanf(ifp, "%d-%d", &i, &j); 
00179       fclose(ifp);
00180 
00181       if (rc == 2 && i == 0) {
00182         b = j+1; 
00183       }
00184     }
00185 
00186     
00187     a = (a > b) ? a : b;
00188   }
00189 #else
00190 #if defined(__sun) || defined(__linux) || defined(__osf__) || defined(_AIX)
00191   a = sysconf(_SC_NPROCESSORS_ONLN); 
00192 #endif 
00193 #endif 
00194 
00195 #if defined(__irix)
00196   a = sysconf(_SC_NPROC_ONLN);       
00197 #endif 
00198 
00199 #if defined(__hpux)
00200   a = mpctl(MPC_GETNUMSPUS, 0, 0);   
00201 #endif 
00202 #endif 
00203 
00204   return a;
00205 }
00206 
00207 
00208 int wkf_thread_numprocessors(void) {
00209   int a=1;
00210 
00211 #ifdef WKFTHREADS
00212   
00213   
00214   char *forcecount = getenv("WKFFORCECPUCOUNT");
00215 
00216   
00217   if (forcecount == NULL)
00218     forcecount = getenv("VMDFORCECPUCOUNT");
00219 
00220   if (forcecount != NULL) {
00221     if (sscanf(forcecount, "%d", &a) == 1) {
00222       return a; 
00223     } else {
00224       a=1;      
00225     }
00226   }
00227 
00228   
00229   a = wkf_thread_numphysprocessors();
00230 
00231   
00232   
00233   
00234 #endif 
00235 
00236   return a;
00237 }
00238 
00239 
00240 
00241 
00242 
00243 
00244 #define WKF_USEINTCPUID 1
00245 #if defined(WKF_USEINTCPUID) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || (defined(_MSC_VER) && (_MSC_VER >= 1916))) && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))
00246 #if 1
00247 
00248 static void wkf_cpuid(unsigned int eax, unsigned int ecx, unsigned int* abcd) {
00249 #if defined(_MSC_VER)
00250   __cpuidex((int*)abcd, eax, ecx);
00251 #else
00252 
00253   unsigned int ebx=0, edx=0;
00254 #if defined(__i386__) && defined (__PIC__)
00255   
00256   __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
00257 #else
00258   __asm__("cpuid" : "+b" (ebx),
00259 #endif
00260           "+a" (eax), "+c" (ecx), "=d" (edx));
00261           abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
00262 #endif
00263   }
00264 #else
00265 static void wkf_cpuid(unsigned int eax, unsigned int ecx, unsigned int *info) {
00266   __asm__ __volatile__(
00267     "xchg %%ebx, %%edi;"
00268     "cpuid;"
00269     "xchg %%ebx, %%edi;"
00270     :"=a" (info[0]), "=D" (info[1]), "=c" (info[2]), "=d" (info[3])
00271     :"0" (eax)
00272   );
00273 }
00274 #endif
00275 
00276 static unsigned long long wkf_xgetbv(unsigned int index) {
00277 #if defined(_MSC_VER)
00278   return _xgetbv(index);
00279 #else
00280   unsigned int eax=0, edx=0;
00281   __asm__ __volatile__(
00282     "xgetbv;"
00283     : "=a" (eax), "=d"(edx)
00284     : "c" (index)
00285   );
00286   return ((unsigned long long) edx << 32) | eax;
00287 #endif
00288 }
00289 #endif
00290 
00291 
00292 int wkf_cpu_capability_flags(wkf_cpu_caps_t *cpucaps) {
00293   int flags=CPU_UNKNOWN;
00294   int smtdepth = CPU_SMTDEPTH_UNKNOWN;
00295 
00296 #if defined(WKF_USEINTCPUID) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || (defined(_MSC_VER) && (_MSC_VER >= 1916))) && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))
00297 #define WKF_INTERNAL_ENABLE_CPUCAP_BAILOUT 1
00298   
00299   
00300   
00301   
00302   
00303   
00304   
00305   
00306   
00307   
00308   
00309   
00310   
00311   
00312   
00313   
00314   unsigned int vendcpuinfo[4] = { 0 };
00315   unsigned int cpuinfo[4] = { 0 };
00316   unsigned long long xcrFeatureMask = 0;
00317   int havexmmymm = 0;
00318   int havezmmmask = 0;
00319   int haveosxsave = 0;
00320 
00321   wkf_cpuid(0, 0, vendcpuinfo); 
00322   if (vendcpuinfo[0] == 0)
00323     goto nocpuinfo; 
00324 
00325   wkf_cpuid(1, 0, cpuinfo);     
00326   haveosxsave = (cpuinfo[2] & (1 << 27)) != 0; 
00327 
00328   flags = 0;
00329   flags |= ((cpuinfo[2] & (1 << 19)) != 0)           * CPU_SSE4_1;
00330   flags |= ((cpuinfo[2] & (1 << 29)) != 0)           * CPU_F16C;
00331   flags |= ((cpuinfo[2] & (1 << 31)) != 0)           * CPU_HYPERVISOR;
00332   flags |= ((cpuinfo[3] & (1 << 26)) != 0)           * CPU_SSE2;
00333   flags |= ((cpuinfo[3] & (1 << 28)) != 0)           * CPU_HT;
00334 
00335   
00336   if ((cpuinfo[2] & (1 << 28)) != 0) {
00337     xcrFeatureMask = wkf_xgetbv(0);
00338     havexmmymm  = (xcrFeatureMask & 0x06) == 0x06;
00339     havezmmmask = (xcrFeatureMask & 0xE6) == 0xE6;
00340   }
00341 
00342   flags |= (((cpuinfo[2] & (1 << 12)) != 0) &&
00343             havexmmymm && haveosxsave)               * CPU_FMA;
00344 
00345   flags |= (((cpuinfo[2] & (1 << 28)) != 0) &&
00346             havexmmymm && haveosxsave)               * CPU_AVX;
00347 
00348   
00349   if (cpuinfo[0] >= 0x7) {
00350     unsigned int extcpuinfo[4] = { 0 };
00351     wkf_cpuid(7, 0, extcpuinfo);
00352 
00353     flags |= (((extcpuinfo[1] & (1 << 5)) != 0) &&
00354               havexmmymm && haveosxsave)               * CPU_AVX2;
00355 
00356     flags |= (((extcpuinfo[1] & (1 << 16)) != 0) &&
00357               havezmmmask && haveosxsave)              * CPU_AVX512F;
00358     flags |= (((extcpuinfo[1] & (1 << 26)) != 0) &&
00359               havezmmmask && haveosxsave)              * CPU_AVX512PF;
00360     flags |= (((extcpuinfo[1] & (1 << 27)) != 0) &&
00361               havezmmmask && haveosxsave)              * CPU_AVX512ER;
00362     flags |= (((extcpuinfo[1] & (1 << 28)) != 0) &&
00363               havezmmmask && haveosxsave)              * CPU_AVX512CD;
00364   }
00365 
00366   smtdepth = 1;
00367   if (flags & CPU_HT) {
00368 #if 1
00369     
00370     smtdepth = 2;
00371 
00372     
00373             
00374     if ((flags & CPU_AVX512ER) && (flags & CPU_AVX512PF)) {
00375       smtdepth = 4;
00376     }
00377 #else
00378     int logicalcores = (cpuinfo[1] >> 16) && 0xFF;
00379     int physicalcores = logicalcores;
00380     char vendor[16] = { 0 };
00381     ((unsigned *)vendor)[0] = vendcpuinfo[1]; 
00382     ((unsigned *)vendor)[1] = vendcpuinfo[3]; 
00383     ((unsigned *)vendor)[2] = vendcpuinfo[2]; 
00384 
00385     
00386     if (!strcmp(vendor, "GenuineIntel")) {
00387       unsigned int corecpuinfo[4] = { 0 };
00388       wkf_cpuid(4, 0, corecpuinfo);
00389       physicalcores = ((corecpuinfo[0] >> 26) & 0x3f) + 1;
00390     } else if (!strcmp(vendor, "AuthenticAMD")) {
00391       unsigned int corecpuinfo[4] = { 0 };
00392       wkf_cpuid(0x80000008, 0, corecpuinfo);
00393       physicalcores = (corecpuinfo[2] & 0xFF) + 1;        
00394     }
00395 
00396 printf("cpuinfo: %d / %d  vend: %s\n", logicalcores, physicalcores, vendor);
00397 
00398     smtdepth = logicalcores / physicalcores; 
00399 #endif
00400   }  
00401 
00402 #elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
00403 
00404   
00405   flags = 0;
00406   flags |= _may_i_use_cpu_feature(_FEATURE_SSE2)     * CPU_SSE2;
00407   flags |= _may_i_use_cpu_feature(_FEATURE_SSE4_1)   * CPU_SSE4_1;
00408   flags |= _may_i_use_cpu_feature(_FEATURE_AVX)      * CPU_AVX;
00409   flags |= _may_i_use_cpu_feature(_FEATURE_AVX2)     * CPU_AVX2;
00410   flags |= _may_i_use_cpu_feature(_FEATURE_FMA)      * CPU_FMA;
00411   flags |= _may_i_use_cpu_feature(_FEATURE_AVX512F)  * CPU_AVX512F;
00412   flags |= _may_i_use_cpu_feature(_FEATURE_AVX512CD) * CPU_AVX512CD;
00413   flags |= _may_i_use_cpu_feature(_FEATURE_AVX512ER) * CPU_AVX512ER;
00414   flags |= _may_i_use_cpu_feature(_FEATURE_AVX512PF) * CPU_AVX512PF;
00415 
00416 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
00417 
00418   
00419   flags = 0;
00420   __builtin_cpu_init();
00421   flags |= (__builtin_cpu_supports("sse2")!=0)       * CPU_SSE2;
00422   flags |= (__builtin_cpu_supports("sse4.1")!=0)     * CPU_SSE4_1;
00423   flags |= (__builtin_cpu_supports("avx")!=0)        * CPU_AVX;
00424   flags |= (__builtin_cpu_supports("avx2")!=0)       * CPU_AVX2;
00425   flags |= (__builtin_cpu_supports("fma")!=0)        * CPU_FMA;
00426   flags |= (__builtin_cpu_supports("avx512f")!=0)    * CPU_AVX512F;
00427   flags |= (__builtin_cpu_supports("avx512cd")!=0)   * CPU_AVX512CD;
00428   flags |= (__builtin_cpu_supports("avx512er")!=0)   * CPU_AVX512ER;
00429   flags |= (__builtin_cpu_supports("avx512pf")!=0)   * CPU_AVX512PF;
00430 
00431 #elif defined(__linux) && (defined(ARCH_LINUXARM64) || defined(__ARM_ARCH_ISA_A64) || defined(__ARM_NEON))
00432 
00433   
00434   
00435   
00436   
00437   
00438   unsigned long auxval1=0;
00439 
00440   auxval1 = getauxval(AT_HWCAP); 
00441 
00442 
00443 
00444   flags = 0;
00445   flags |= ((auxval1 & HWCAP_FP) != 0)               * CPU_ARM64_FP;
00446 
00447   flags |= ((auxval1 & HWCAP_ASIMD) != 0)            * CPU_ARM64_ASIMD;
00448   flags |= ((auxval1 & HWCAP_ASIMDHP) != 0)          * CPU_ARM64_ASIMDHP;
00449   flags |= ((auxval1 & HWCAP_ASIMDRDM) != 0)         * CPU_ARM64_ASIMDRDM;
00450   flags |= ((auxval1 & HWCAP_ASIMDDP) != 0)          * CPU_ARM64_ASIMDDP;
00451   flags |= ((auxval1 & HWCAP_ASIMDFHM) != 0)         * CPU_ARM64_ASIMDFHM;
00452 
00453   flags |= ((auxval1 & HWCAP_SVE) != 0)              * CPU_ARM64_SVE;
00454 
00455   flags |= ((auxval1 & HWCAP_AES) != 0)              * CPU_ARM64_AES;
00456   flags |= ((auxval1 & HWCAP_CRC32) != 0)            * CPU_ARM64_CRC32;
00457   flags |= ((auxval1 & HWCAP_SHA1) != 0)             * CPU_ARM64_SHA1;
00458   flags |= ((auxval1 & HWCAP_SHA2) != 0)             * CPU_ARM64_SHA2;
00459   flags |= ((auxval1 & HWCAP_SHA3) != 0)             * CPU_ARM64_SHA3;
00460   flags |= ((auxval1 & HWCAP_SHA512) != 0)           * CPU_ARM64_SHA512;
00461 #endif
00462 
00463 #if defined (WKF_INTERNAL_ENABLE_CPUCAP_BAILOUT)
00464 nocpuinfo:
00465 #endif
00466   cpucaps->flags = flags;
00467   cpucaps->smtdepth = smtdepth;
00468 
00469   if (flags == CPU_UNKNOWN)
00470     return 1;
00471 
00472   return 0;
00473 }
00474 
00475 
00476 int wkf_cpu_smt_depth(void) {
00477   int smtdepth = CPU_SMTDEPTH_UNKNOWN;
00478 
00479 #if defined(WKF_USEINTCPUID) && (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (defined(__i386__) || defined(__x86_64__))
00480   
00481   
00482   
00483   wkf_cpu_caps_t cpucaps;
00484   if (!wkf_cpu_capability_flags(&cpucaps)) {
00485     smtdepth = cpucaps.smtdepth;
00486   }
00487 #endif
00488 
00489   return smtdepth;
00490 }
00491 
00492 
00493 int * wkf_cpu_affinitylist(int *cpuaffinitycount) {
00494   int *affinitylist = NULL;
00495   *cpuaffinitycount = -1; 
00496 
00497 
00498 #if 0 && defined(_MSC_VER)
00499   
00500   HANDLE myproc = GetCurrentProcess(); 
00501   DWORD affinitymask, sysaffinitymask;
00502 
00503   if (!GetProcessAffinityMask(myproc, &affinitymask, &sysaffinitymask)) {
00504     
00505     int affinitycount=0;
00506     int i;
00507     for (i=0; i<31; i++) {
00508       affinitycount += (affinitymask >> i) & 0x1;
00509     }
00510 
00511     
00512     if (affinitycount > 0) {
00513       affinitylist = (int *) malloc(affinitycount * sizeof(int));
00514       if (affinitylist == NULL)
00515         return NULL;
00516 
00517       int curcount = 0;
00518       for (i=0; i<CPU_SETSIZE; i++) {
00519         if (CPU_ISSET(i, &affinitymask)) {
00520           affinitylist[curcount] = i;
00521           curcount++;
00522         }
00523       }
00524     }
00525 
00526     *cpuaffinitycount = affinitycount; 
00527   }
00528 #endif
00529 
00530 
00531 #if defined(__linux)
00532 
00533 
00534 #if defined(CPU_SETSIZE)
00535   int i;
00536   cpu_set_t affinitymask;
00537   int affinitycount=0;
00538 
00539   
00540   if (sched_getaffinity(0, sizeof(affinitymask), &affinitymask) < 0) {
00541     perror("wkf_cpu_affinitylist: sched_getaffinity");
00542     return NULL;
00543   }
00544 
00545   
00546   for (i=0; i<CPU_SETSIZE; i++) {
00547     affinitycount += CPU_ISSET(i, &affinitymask);
00548   }
00549 
00550   
00551   if (affinitycount > 0) {
00552     affinitylist = (int *) malloc(affinitycount * sizeof(int));
00553     if (affinitylist == NULL)
00554       return NULL;
00555 
00556     int curcount = 0;
00557     for (i=0; i<CPU_SETSIZE; i++) {
00558       if (CPU_ISSET(i, &affinitymask)) {
00559         affinitylist[curcount] = i;
00560         curcount++;
00561       }
00562     }
00563   }
00564 
00565   *cpuaffinitycount = affinitycount; 
00566 #endif
00567 #endif
00568 
00569   
00570   
00571 
00572   
00573   
00574   
00575   
00576   
00577 
00578   return affinitylist;
00579 }
00580 
00581 
00582 int wkf_thread_set_self_cpuaffinity(int cpu) {
00583   int status=-1; 
00584 
00585 #ifdef WKFTHREADS
00586 
00587 #if defined(__linux) && defined(CPU_ZERO) && defined(CPU_SET)
00588 #if defined(__MIC__)
00589   
00590   
00591   
00592   
00593   cpu_set_t affinitymask;
00594   CPU_ZERO(&affinitymask);
00595   CPU_SET(cpu, &affinitymask);
00596   status = pthread_setaffinity_np(pthread_self(), sizeof(affinitymask), &affinitymask);
00597 #else
00598   
00599   cpu_set_t affinitymask;
00600   CPU_ZERO(&affinitymask);
00601   CPU_SET(cpu, &affinitymask);
00602 
00603   
00604   if ((status=sched_setaffinity(0, sizeof(affinitymask), &affinitymask)) < 0) {
00605     perror("wkf_thread_set_self_cpuaffinitylist: sched_setaffinity");
00606     return status;
00607   }
00608 #endif
00609 
00610   
00611   sched_yield();
00612 #endif 
00613 
00614   
00615   
00616 
00617   
00618   
00619   
00620   
00621   
00622 #endif
00623 
00624   return status;
00625 }
00626 
00627 
00628 int wkf_thread_setconcurrency(int nthr) {
00629   int status=0;
00630 
00631 #ifdef WKFTHREADS
00632 #if defined(__sun)
00633 #ifdef USEPOSIXTHREADS
00634   status = pthread_setconcurrency(nthr);
00635 #else
00636   status = thr_setconcurrency(nthr);
00637 #endif
00638 #endif 
00639 
00640 #if defined(__irix) || defined(_AIX)
00641   status = pthread_setconcurrency(nthr);
00642 #endif
00643 #endif 
00644 
00645   return status;
00646 }
00647 
00648 
00649 
00650 
00651 
00653 typedef void * (*WKFTHREAD_START_ROUTINE)(void *);
00654 
00655 int wkf_thread_create(wkf_thread_t * thr, void * fctn(void *), void * arg) {
00656   int status=0;
00657 
00658 #ifdef WKFTHREADS
00659 #ifdef _MSC_VER
00660   DWORD tid; 
00661   *thr = CreateThread(NULL, 8192, (LPTHREAD_START_ROUTINE) fctn, arg, 0, &tid);
00662   if (*thr == NULL) {
00663     status = -1;
00664   }
00665   
00666   
00667   
00668   
00669   
00670 #endif 
00671 
00672 #ifdef USEPOSIXTHREADS 
00673 #if defined(_AIX)
00674   
00675   {
00676     pthread_attr_t attr;
00677     pthread_attr_init(&attr);
00678     pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
00679     status = pthread_create(thr, &attr, (WKFTHREAD_START_ROUTINE)fctn, arg);
00680     pthread_attr_destroy(&attr);
00681   }
00682 #elif defined(__PARAGON__)
00683   status = pthread_create(thr, pthread_attr_default, fctn, arg);
00684 #else   
00685   status = pthread_create(thr, NULL, (WKFTHREAD_START_ROUTINE)fctn, arg);
00686 #endif 
00687 #endif 
00688 
00689 #ifdef USEUITHREADS 
00690   status = thr_create(NULL, 0, (WKFTHREAD_START_ROUTINE)fctn, arg, 0, thr); 
00691 #endif 
00692 #endif 
00693  
00694   return status;
00695 }
00696 
00697 
00698 int wkf_thread_join(wkf_thread_t thr, void ** stat) {
00699   int status=0;  
00700 
00701 #ifdef WKFTHREADS
00702 #ifdef _MSC_VER
00703   DWORD wstatus = 0;
00704  
00705   wstatus = WAIT_TIMEOUT;
00706  
00707   while (wstatus != WAIT_OBJECT_0) {
00708     wstatus = WaitForSingleObject(thr, INFINITE);
00709   }
00710   
00711   
00712   CloseHandle(thr);
00713 #endif 
00714 
00715 #ifdef USEPOSIXTHREADS
00716   status = pthread_join(thr, stat);
00717 #endif 
00718 
00719 #ifdef USEUITHREADS
00720   status = thr_join(thr, NULL, stat);
00721 #endif 
00722 #endif 
00723 
00724   return status;
00725 }  
00726 
00727 
00728 
00729 
00730 
00731 int wkf_mutex_init(wkf_mutex_t * mp) {
00732   int status=0;
00733 
00734 #ifdef WKFTHREADS
00735 #ifdef _MSC_VER
00736   InitializeCriticalSection(mp);
00737 #endif 
00738 
00739 #ifdef USEPOSIXTHREADS
00740   status = pthread_mutex_init(mp, 0);
00741 #endif 
00742 
00743 #ifdef USEUITHREADS 
00744   status = mutex_init(mp, USYNC_THREAD, NULL);
00745 #endif 
00746 #endif 
00747 
00748   return status;
00749 }
00750 
00751 
00752 int wkf_mutex_lock(wkf_mutex_t * mp) {
00753   int status=0;
00754 
00755 #ifdef WKFTHREADS
00756 #ifdef _MSC_VER
00757   EnterCriticalSection(mp);
00758 #endif 
00759 
00760 #ifdef USEPOSIXTHREADS
00761   status = pthread_mutex_lock(mp);
00762 #endif 
00763 
00764 #ifdef USEUITHREADS
00765   status = mutex_lock(mp);
00766 #endif 
00767 #endif 
00768 
00769   return status;
00770 }
00771 
00772 
00773 int wkf_mutex_trylock(wkf_mutex_t * mp) {
00774   int status=0;
00775 
00776 #ifdef WKFTHREADS
00777 #ifdef _MSC_VER
00778 #if defined(WKFUSENEWWIN32APIS)
00779   
00780   
00781   status = (!(TryEnterCriticalSection(mp)));
00782 #endif
00783 #endif 
00784 
00785 #ifdef USEPOSIXTHREADS
00786   status = (pthread_mutex_lock(mp) != 0);
00787 #endif 
00788 #endif 
00789 
00790   return status;
00791 }
00792 
00793 
00794 int wkf_mutex_spin_lock(wkf_mutex_t * mp) {
00795   int status=0;
00796 
00797 #ifdef WKFTHREADS
00798 #ifdef _MSC_VER
00799 #if defined(WKFUSENEWWIN32APIS)
00800   
00801   
00802   while (!TryEnterCriticalSection(mp));
00803 #else
00804   EnterCriticalSection(mp);
00805 #endif
00806 #endif 
00807 
00808 #ifdef USEPOSIXTHREADS
00809   while ((status = pthread_mutex_trylock(mp)) != 0);
00810 #endif 
00811 #endif 
00812 
00813   return status;
00814 }
00815 
00816 
00817 int wkf_mutex_unlock(wkf_mutex_t * mp) {
00818   int status=0;
00819 
00820 #ifdef WKFTHREADS  
00821 #ifdef _MSC_VER
00822   LeaveCriticalSection(mp);
00823 #endif 
00824 
00825 #ifdef USEPOSIXTHREADS
00826   status = pthread_mutex_unlock(mp);
00827 #endif 
00828 
00829 #ifdef USEUITHREADS
00830   status = mutex_unlock(mp);
00831 #endif 
00832 #endif 
00833 
00834   return status;
00835 }
00836 
00837 
00838 int wkf_mutex_destroy(wkf_mutex_t * mp) {
00839   int status=0;
00840 
00841 #ifdef WKFTHREADS
00842 #ifdef _MSC_VER
00843   DeleteCriticalSection(mp);
00844 #endif 
00845 
00846 #ifdef USEPOSIXTHREADS
00847   status = pthread_mutex_destroy(mp);
00848 #endif 
00849 
00850 #ifdef USEUITHREADS
00851   status = mutex_destroy(mp);
00852 #endif 
00853 #endif 
00854 
00855   return status;
00856 }
00857 
00858 
00859 
00860 
00861 
00862 int wkf_cond_init(wkf_cond_t * cvp) {
00863   int status=0;
00864 
00865 #ifdef WKFTHREADS
00866 #ifdef _MSC_VER
00867 #if defined(WKFUSEWIN2008CONDVARS)
00868   InitializeConditionVariable(cvp);
00869 #else
00870   
00871   cvp->waiters = 0;
00872 
00873   
00874   cvp->events[WKF_COND_SIGNAL] = CreateEvent(NULL,  
00875                                             FALSE, 
00876                                             FALSE, 
00877                                             NULL); 
00878 
00879   
00880   cvp->events[WKF_COND_BROADCAST] = CreateEvent(NULL,  
00881                                                TRUE,  
00882                                                FALSE, 
00883                                                NULL); 
00884 #endif
00885 #endif 
00886 
00887 #ifdef USEPOSIXTHREADS
00888   status = pthread_cond_init(cvp, NULL);
00889 #endif 
00890 #ifdef USEUITHREADS
00891   status = cond_init(cvp, USYNC_THREAD, NULL);
00892 #endif
00893 #endif 
00894 
00895   return status;
00896 }
00897 
00898 int wkf_cond_destroy(wkf_cond_t * cvp) {
00899   int status=0;
00900 
00901 #ifdef WKFTHREADS
00902 #ifdef _MSC_VER
00903 #if defined(WKFUSEWIN2008CONDVARS)
00904   
00905 #else
00906   CloseHandle(cvp->events[WKF_COND_SIGNAL]);
00907   CloseHandle(cvp->events[WKF_COND_BROADCAST]);
00908 #endif
00909 #endif 
00910 
00911 #ifdef USEPOSIXTHREADS
00912   status = pthread_cond_destroy(cvp);
00913 #endif 
00914 #ifdef USEUITHREADS
00915   status = cond_destroy(cvp);
00916 #endif
00917 #endif 
00918 
00919   return status;
00920 }
00921 
00922 int wkf_cond_wait(wkf_cond_t * cvp, wkf_mutex_t * mp) {
00923   int status=0;
00924 #if defined(WKFTHREADS) && defined(_MSC_VER)
00925   int result=0;
00926   LONG last_waiter;
00927   LONG my_waiter;
00928 #endif
00929 
00930 #ifdef WKFTHREADS
00931 #ifdef _MSC_VER
00932 #if defined(WKFUSEWIN2008CONDVARS)
00933   SleepConditionVariableCS(cvp, mp, INFINITE)
00934 #else
00935 #if !defined(WKFUSEINTERLOCKEDATOMICOPS)
00936   EnterCriticalSection(&cvp->waiters_lock);
00937   cvp->waiters++;
00938   LeaveCriticalSection(&cvp->waiters_lock);
00939 #else
00940   InterlockedIncrement(&cvp->waiters);
00941 #endif
00942 
00943   LeaveCriticalSection(mp); 
00944 
00945   
00946   result = WaitForMultipleObjects(2, cvp->events, FALSE, INFINITE);
00947 
00948 #if !defined(WKFUSEINTERLOCKEDATOMICOPS)
00949   EnterCriticalSection (&cvp->waiters_lock);
00950   cvp->waiters--;
00951   last_waiter =
00952     ((result == (WAIT_OBJECT_0 + WKF_COND_BROADCAST)) && cvp->waiters == 0);
00953   LeaveCriticalSection (&cvp->waiters_lock);
00954 #else
00955   my_waiter = InterlockedDecrement(&cvp->waiters);
00956   last_waiter =
00957     ((result == (WAIT_OBJECT_0 + WKF_COND_BROADCAST)) && my_waiter == 0);
00958 #endif
00959 
00960   
00961   if (last_waiter)
00962     
00963     
00964     ResetEvent(cvp->events[WKF_COND_BROADCAST]);
00965 
00966   EnterCriticalSection(mp);
00967 #endif
00968 #endif 
00969 
00970 #ifdef USEPOSIXTHREADS
00971   status = pthread_cond_wait(cvp, mp);
00972 #endif 
00973 #ifdef USEUITHREADS
00974   status = cond_wait(cvp, mp);
00975 #endif
00976 #endif 
00977 
00978   return status;
00979 }
00980 
00981 int wkf_cond_signal(wkf_cond_t * cvp) {
00982   int status=0;
00983 
00984 #ifdef WKFTHREADS
00985 #ifdef _MSC_VER
00986 #if defined(WKFUSEWIN2008CONDVARS)
00987   WakeConditionVariable(cvp);
00988 #else
00989 #if !defined(WKFUSEINTERLOCKEDATOMICOPS)
00990   EnterCriticalSection(&cvp->waiters_lock);
00991   int have_waiters = (cvp->waiters > 0);
00992   LeaveCriticalSection(&cvp->waiters_lock);
00993   if (have_waiters)
00994     SetEvent (cvp->events[WKF_COND_SIGNAL]);
00995 #else
00996   if (InterlockedExchangeAdd(&cvp->waiters, 0) > 0)
00997     SetEvent(cvp->events[WKF_COND_SIGNAL]);
00998 #endif
00999 #endif
01000 #endif 
01001 
01002 #ifdef USEPOSIXTHREADS
01003   status = pthread_cond_signal(cvp);
01004 #endif 
01005 #ifdef USEUITHREADS
01006   status = cond_signal(cvp);
01007 #endif
01008 #endif 
01009 
01010   return status;
01011 }
01012 
01013 int wkf_cond_broadcast(wkf_cond_t * cvp) {
01014   int status=0;
01015 
01016 #ifdef WKFTHREADS
01017 #ifdef _MSC_VER
01018 #if defined(WKFUSEWIN2008CONDVARS)
01019   WakeAllConditionVariable(cvp);
01020 #else
01021 #if !defined(WKFUSEINTERLOCKEDATOMICOPS)
01022   EnterCriticalSection(&cvp->waiters_lock);
01023   int have_waiters = (cvp->waiters > 0);
01024   LeaveCriticalSection(&cvp->waiters_lock);
01025   if (have_waiters)
01026     SetEvent(cvp->events[WKF_COND_BROADCAST]);
01027 #else
01028   if (InterlockedExchangeAdd(&cvp->waiters, 0) > 0)
01029     SetEvent(cvp->events[WKF_COND_BROADCAST]);
01030 #endif
01031 
01032 #endif
01033 #endif 
01034 
01035 #ifdef USEPOSIXTHREADS
01036   status = pthread_cond_broadcast(cvp);
01037 #endif 
01038 #ifdef USEUITHREADS
01039   status = cond_broadcast(cvp);
01040 #endif
01041 #endif 
01042 
01043   return status;
01044 }
01045 
01046 
01047 
01048 
01049 
01050 
01051 
01052 
01053 int wkf_atomic_int_init(wkf_atomic_int_t * atomp, int val) {
01054   memset(atomp, 0, sizeof(wkf_atomic_int_t));
01055 #ifdef WKFTHREADS
01056 #if defined(USEGCCATOMICS)
01057   
01058 #elif defined(USENETBSDATOMICS) 
01059   
01060 #elif defined(USESOLARISATOMICS) 
01061   
01062 #elif defined(USEWIN32ATOMICS) 
01063   
01064 #else  
01065   wkf_mutex_init(&atomp->lock);
01066 #endif
01067 #else
01068   
01069 #endif
01070   atomp->val = val;
01071 
01072   return 0;
01073 }
01074 
01075 
01076 int wkf_atomic_int_destroy(wkf_atomic_int_t * atomp) {
01077 #ifdef WKFTHREADS
01078 #if defined(USEGCCATOMICS)
01079   
01080 #elif defined(USENETBSDATOMICS) 
01081   
01082 #elif defined(USESOLARISATOMICS) 
01083   
01084 #elif defined(USEWIN32ATOMICS) 
01085   
01086 #else  
01087   wkf_mutex_destroy(&atomp->lock);
01088 #endif
01089 #else
01090   
01091 #endif
01092 
01093   return 0;
01094 }
01095 
01096 
01097 int wkf_atomic_int_set(wkf_atomic_int_t * atomp, int val) {
01098   int retval;
01099 
01100 #ifdef WKFTHREADS
01101 #if defined(USEGCCATOMICS)
01102   
01103   atomp->val = val;  
01104   retval = val;
01105 #elif defined(USENETBSDATOMICS) 
01106   
01107   atomp->val = val;  
01108   retval = val;
01109 #elif defined(USESOLARISATOMICS) 
01110   
01111   atomp->val = val;  
01112   retval = val;
01113 #elif defined(USEWIN32ATOMICS) 
01114   
01115   atomp->val = val;  
01116   retval = val;
01117 #else  
01118   wkf_mutex_lock(&atomp->lock);
01119   atomp->val = val;  
01120   retval = atomp->val;
01121   wkf_mutex_unlock(&atomp->lock);
01122 #endif
01123 #else
01124   
01125   atomp->val = val;  
01126   retval = atomp->val;
01127 #endif
01128   
01129   return retval;
01130 }
01131 
01132 
01133 int wkf_atomic_int_get(wkf_atomic_int_t * atomp) {
01134   int retval;
01135 
01136 #ifdef WKFTHREADS
01137 #if defined(USEGCCATOMICS)
01138   
01139   retval = atomp->val;
01140 #elif defined(USENETBSDATOMICS) 
01141   
01142   retval = atomp->val;
01143 #elif defined(USESOLARISATOMICS) 
01144   
01145   retval = atomp->val;
01146 #elif defined(USEWIN32ATOMICS) 
01147   
01148   retval = atomp->val;
01149 #else  
01150   wkf_mutex_lock(&atomp->lock);
01151   retval = atomp->val;
01152   wkf_mutex_unlock(&atomp->lock);
01153 #endif
01154 #else
01155   
01156   retval = atomp->val;
01157 #endif
01158   
01159   return retval;
01160 }
01161 
01162 int wkf_atomic_int_fetch_and_add(wkf_atomic_int_t * atomp, int inc) {
01163 #ifdef WKFTHREADS
01164 #if defined(USEGCCATOMICS)
01165   return __sync_fetch_and_add(&atomp->val, inc);
01166 #elif defined(USENETBSDATOMICS) 
01167   
01168   return atomic_add_int_nv(&atomp->val, inc) - inc;
01169 #elif defined(USESOLARISATOMICS) 
01170   
01171   return atomic_add_int_nv(&atomp->val, inc) - inc;
01172 #elif defined(USEWIN32ATOMICS) 
01173   return InterlockedExchangeAdd(&atomp->val, inc);
01174 #else  
01175   int retval;
01176   wkf_mutex_lock(&atomp->lock);
01177   retval = atomp->val;
01178   atomp->val+=inc;
01179   wkf_mutex_unlock(&atomp->lock);
01180   return retval;
01181 #endif
01182 #else
01183   int retval = atomp->val;
01184   atomp->val+=inc;
01185   return retval;
01186 #endif
01187 }
01188 
01189 
01190 int wkf_atomic_int_add_and_fetch(wkf_atomic_int_t * atomp, int inc) {
01191 #ifdef WKFTHREADS
01192 #if defined(USEGCCATOMICS)
01193   return __sync_add_and_fetch(&atomp->val, inc);
01194 #elif defined(USENETBSDATOMICS) 
01195   return atomic_add_int_nv(&atomp->val, inc);
01196 #elif defined(USESOLARISATOMICS) 
01197   return atomic_add_int_nv(&atomp->val, inc);
01198 #elif defined(USEWIN32ATOMICS) 
01199   
01200   return InterlockedExchangeAdd(&atomp->val, inc) + inc;
01201 #else  
01202   int retval; 
01203   wkf_mutex_lock(&atomp->lock);
01204   atomp->val+=inc;
01205   retval = atomp->val;
01206   wkf_mutex_unlock(&atomp->lock);
01207   return retval;
01208 #endif
01209 #else
01210   int retval;
01211   atomp->val+=inc;
01212   retval = atomp->val;
01213   return retval;
01214 #endif
01215 }
01216 
01217 
01218 
01219 
01220 
01221 
01222 int wkf_rwlock_init(wkf_rwlock_t * rwp) {
01223   int status=0;
01224 
01225 #ifdef WKFTHREADS  
01226 #ifdef _MSC_VER
01227   wkf_mutex_init(&rwp->lock);
01228   wkf_cond_init(&rwp->rdrs_ok);
01229   wkf_cond_init(&rwp->wrtr_ok);
01230   rwp->rwlock = 0;
01231   rwp->waiting_writers = 0;
01232 #endif
01233 
01234 #ifdef USEPOSIXTHREADS
01235   pthread_mutex_init(&rwp->lock, NULL);
01236   pthread_cond_init(&rwp->rdrs_ok, NULL);
01237   pthread_cond_init(&rwp->wrtr_ok, NULL);
01238   rwp->rwlock = 0;
01239   rwp->waiting_writers = 0;
01240 #endif 
01241 
01242 #ifdef USEUITHREADS
01243   status = rwlock_init(rwp, USYNC_THREAD, NULL);
01244 #endif 
01245 #endif 
01246 
01247   return status;
01248 }
01249 
01250 
01251 int wkf_rwlock_readlock(wkf_rwlock_t * rwp) {
01252   int status=0;
01253 
01254 #ifdef WKFTHREADS  
01255 #ifdef _MSC_VER
01256   wkf_mutex_lock(&rwp->lock);
01257   while (rwp->rwlock < 0 || rwp->waiting_writers) 
01258     wkf_cond_wait(&rwp->rdrs_ok, &rwp->lock);   
01259   rwp->rwlock++;   
01260   wkf_mutex_unlock(&rwp->lock);
01261 #endif
01262 
01263 #ifdef USEPOSIXTHREADS
01264   pthread_mutex_lock(&rwp->lock);
01265   while (rwp->rwlock < 0 || rwp->waiting_writers) 
01266     pthread_cond_wait(&rwp->rdrs_ok, &rwp->lock);   
01267   rwp->rwlock++;   
01268   pthread_mutex_unlock(&rwp->lock);
01269 #endif 
01270 
01271 #ifdef USEUITHREADS
01272   status = rw_rdlock(rwp);
01273 #endif 
01274 #endif 
01275 
01276   return status;
01277 }
01278 
01279 
01280 int wkf_rwlock_writelock(wkf_rwlock_t * rwp) {
01281   int status=0;
01282 
01283 #ifdef WKFTHREADS  
01284 #ifdef _MSC_VER
01285   wkf_mutex_lock(&rwp->lock);
01286   while (rwp->rwlock != 0) {
01287     rwp->waiting_writers++;
01288     wkf_cond_wait(&rwp->wrtr_ok, &rwp->lock);
01289     rwp->waiting_writers--;
01290   }
01291   rwp->rwlock=-1;
01292   wkf_mutex_unlock(&rwp->lock);
01293 #endif
01294 
01295 #ifdef USEPOSIXTHREADS
01296   pthread_mutex_lock(&rwp->lock);
01297   while (rwp->rwlock != 0) {
01298     rwp->waiting_writers++;
01299     pthread_cond_wait(&rwp->wrtr_ok, &rwp->lock);
01300     rwp->waiting_writers--;
01301   }
01302   rwp->rwlock=-1;
01303   pthread_mutex_unlock(&rwp->lock);
01304 #endif 
01305 
01306 #ifdef USEUITHREADS
01307   status = rw_wrlock(rwp);
01308 #endif 
01309 #endif 
01310 
01311   return status;
01312 }
01313 
01314 
01315 int wkf_rwlock_unlock(wkf_rwlock_t * rwp) {
01316   int status=0;
01317 
01318 #ifdef WKFTHREADS  
01319 #ifdef _MSC_VER
01320   int ww, wr;
01321   wkf_mutex_lock(&rwp->lock);
01322   if (rwp->rwlock > 0) {
01323     rwp->rwlock--;
01324   } else {
01325     rwp->rwlock = 0;
01326   } 
01327   ww = (rwp->waiting_writers && rwp->rwlock == 0);
01328   wr = (rwp->waiting_writers == 0);
01329   wkf_mutex_unlock(&rwp->lock);
01330   if (ww) 
01331     wkf_cond_signal(&rwp->wrtr_ok);
01332   else if (wr)
01333     wkf_cond_signal(&rwp->rdrs_ok);
01334 #endif
01335 
01336 #ifdef USEPOSIXTHREADS
01337   int ww, wr;
01338   pthread_mutex_lock(&rwp->lock);
01339   if (rwp->rwlock > 0) {
01340     rwp->rwlock--;
01341   } else {
01342     rwp->rwlock = 0;
01343   } 
01344   ww = (rwp->waiting_writers && rwp->rwlock == 0);
01345   wr = (rwp->waiting_writers == 0);
01346   pthread_mutex_unlock(&rwp->lock);
01347   if (ww) 
01348     pthread_cond_signal(&rwp->wrtr_ok);
01349   else if (wr)
01350     pthread_cond_signal(&rwp->rdrs_ok);
01351 #endif 
01352 
01353 #ifdef USEUITHREADS
01354   status = rw_unlock(rwp);
01355 #endif 
01356 #endif 
01357 
01358   return status;
01359 }
01360 
01361 
01362 
01363 
01364 
01365 wkf_barrier_t * wkf_thread_barrier_init(int n_clients) {
01366   wkf_barrier_t *barrier = (wkf_barrier_t *) malloc(sizeof(wkf_barrier_t));
01367 
01368 #ifdef WKFTHREADS
01369   if (barrier != NULL) {
01370     barrier->n_clients = n_clients;
01371     barrier->n_waiting = 0;
01372     barrier->phase = 0;
01373     barrier->sum = 0;
01374     wkf_mutex_init(&barrier->lock);
01375     wkf_cond_init(&barrier->wait_cv);
01376   }
01377 #endif
01378 
01379   return barrier;
01380 }
01381 
01382 
01383 
01384 
01385 
01386 
01387 
01388 
01389 int wkf_thread_barrier_init_proc_shared(wkf_barrier_t *barrier, int n_clients) {
01390 #ifdef WKFTHREADS
01391 #ifdef USEPOSIXTHREADS
01392   if (barrier != NULL) {
01393     barrier->n_clients = n_clients;
01394     barrier->n_waiting = 0;
01395     barrier->phase = 0;
01396     barrier->sum = 0;
01397 
01398     pthread_mutexattr_t mattr;
01399     pthread_condattr_t  cattr;
01400 
01401     printf("Setting barriers to have system scope...\n");
01402 
01403     pthread_mutexattr_init(&mattr);
01404     if (pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED) != 0) {
01405       printf("WARNING: could not set mutex to process shared scope\n");
01406     }
01407 
01408     pthread_condattr_init(&cattr);
01409     if (pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED) != 0) {
01410       printf("WARNING: could not set mutex to process shared scope\n");
01411     }
01412 
01413     pthread_mutex_init(&barrier->lock, &mattr);
01414     pthread_cond_init(&barrier->wait_cv, &cattr);
01415 
01416     pthread_condattr_destroy(&cattr);
01417     pthread_mutexattr_destroy(&mattr);
01418   }
01419 #endif
01420 #endif
01421 
01422   return 0;
01423 }
01424 
01425 
01426 void wkf_thread_barrier_destroy(wkf_barrier_t *barrier) {
01427 #ifdef WKFTHREADS
01428   wkf_mutex_destroy(&barrier->lock);
01429   wkf_cond_destroy(&barrier->wait_cv);
01430 #endif
01431   free(barrier);
01432 }
01433 
01434 
01435 int wkf_thread_barrier(wkf_barrier_t *barrier, int increment) {
01436 #ifdef WKFTHREADS
01437   int my_phase;
01438   int my_result;
01439 
01440   wkf_mutex_lock(&barrier->lock);
01441   my_phase = barrier->phase;
01442   barrier->sum += increment;
01443   barrier->n_waiting++;
01444 
01445   if (barrier->n_waiting == barrier->n_clients) {
01446     barrier->result = barrier->sum;
01447     barrier->sum = 0;
01448     barrier->n_waiting = 0;
01449     barrier->phase = 1 - my_phase;
01450     wkf_cond_broadcast(&barrier->wait_cv);
01451   }
01452 
01453   while (barrier->phase == my_phase) {
01454     wkf_cond_wait(&barrier->wait_cv, &barrier->lock);
01455   }
01456 
01457   my_result = barrier->result;
01458 
01459   wkf_mutex_unlock(&barrier->lock);
01460 
01461   return my_result; 
01462 #else 
01463   return 0;
01464 #endif
01465 }
01466 
01467 
01468 
01469 
01470 
01471 
01472 int wkf_thread_run_barrier_init(wkf_run_barrier_t *barrier, int n_clients) {
01473 #ifdef WKFTHREADS
01474   if (barrier != NULL) {
01475     barrier->n_clients = n_clients;
01476     barrier->n_waiting = 0;
01477     barrier->phase = 0;
01478     barrier->fctn = NULL;
01479 
01480     wkf_mutex_init(&barrier->lock);
01481     wkf_cond_init(&barrier->wait_cv);
01482   }
01483 #endif
01484 
01485   return 0;
01486 }
01487 
01488 void wkf_thread_run_barrier_destroy(wkf_run_barrier_t *barrier) {
01489 #ifdef WKFTHREADS
01490   wkf_mutex_destroy(&barrier->lock);
01491   wkf_cond_destroy(&barrier->wait_cv);
01492 #endif
01493 }
01494 
01495 
01500 void * (*wkf_thread_run_barrier(wkf_run_barrier_t *barrier,
01501                                void * fctn(void*),
01502                                void * parms,
01503                                void **rsltparms))(void *) {
01504 #if defined(WKFTHREADS)
01505   int my_phase;
01506   void * (*my_result)(void*);
01507 
01508   wkf_mutex_lock(&barrier->lock);
01509   my_phase = barrier->phase;
01510   if (fctn != NULL)
01511     barrier->fctn = fctn;
01512   if (parms != NULL)
01513     barrier->parms = parms;
01514   barrier->n_waiting++;
01515 
01516   if (barrier->n_waiting == barrier->n_clients) {
01517     barrier->rslt = barrier->fctn;
01518     barrier->rsltparms = barrier->parms;
01519     barrier->fctn = NULL;
01520     barrier->parms = NULL;
01521     barrier->n_waiting = 0;
01522     barrier->phase = 1 - my_phase;
01523     wkf_cond_broadcast(&barrier->wait_cv);
01524   }
01525 
01526   while (barrier->phase == my_phase) {
01527     wkf_cond_wait(&barrier->wait_cv, &barrier->lock);
01528   }
01529 
01530   my_result = barrier->rslt;
01531   if (rsltparms != NULL)
01532     *rsltparms = barrier->rsltparms;
01533 
01534   wkf_mutex_unlock(&barrier->lock);
01535 #else
01536   void * (*my_result)(void*) = fctn;
01537   if (rsltparms != NULL)
01538     *rsltparms = parms;
01539 #endif
01540 
01541   return my_result;
01542 }
01543 
01544 
01546 int wkf_thread_run_barrier_poll(wkf_run_barrier_t *barrier) {
01547   int rc=0;
01548 #if defined(WKFTHREADS)
01549   wkf_mutex_lock(&barrier->lock);
01550   if (barrier->n_waiting == (barrier->n_clients-1)) {
01551     rc=1;
01552   }
01553   wkf_mutex_unlock(&barrier->lock);
01554 #endif
01555   return rc;
01556 }
01557 
01558 
01559 
01560 
01561 
01562 int wkf_tilestack_init(wkf_tilestack_t *s, int size) {
01563   if (s == NULL)
01564     return -1;
01565 
01566 #if defined(WKFTHREADS)
01567   wkf_mutex_init(&s->mtx);
01568 #endif
01569 
01570   s->growthrate = 512;
01571   s->top = -1;
01572 
01573   if (size > 0) {
01574     s->size = size;
01575     s->s = (wkf_tasktile_t *) malloc(s->size * sizeof(wkf_tasktile_t));
01576   } else {
01577     s->size = 0;
01578     s->s = NULL;
01579   }
01580 
01581   return 0;
01582 }
01583 
01584 
01585 void wkf_tilestack_destroy(wkf_tilestack_t *s) {
01586 #if defined(WKFTHREADS)
01587   wkf_mutex_destroy(&s->mtx);
01588 #endif
01589   free(s->s);
01590   s->s = NULL; 
01591 }
01592 
01593 
01594 int wkf_tilestack_compact(wkf_tilestack_t *s) {
01595 #if defined(WKFTHREADS)
01596   wkf_mutex_lock(&s->mtx);
01597 #endif
01598   if (s->size > (s->top + 1)) {
01599     int newsize = s->top + 1;
01600     wkf_tasktile_t *tmp = (wkf_tasktile_t *) realloc(s->s, newsize * sizeof(wkf_tasktile_t));
01601     if (tmp == NULL) {
01602 #if defined(WKFTHREADS)
01603       wkf_mutex_unlock(&s->mtx);
01604 #endif
01605       return -1; 
01606     }
01607     s->s = tmp;
01608     s->size = newsize;
01609   }
01610 #if defined(WKFTHREADS)
01611   wkf_mutex_unlock(&s->mtx);
01612 #endif
01613 
01614   return 0;
01615 }
01616 
01617 
01618 int wkf_tilestack_push(wkf_tilestack_t *s, const wkf_tasktile_t *t) {
01619 #if defined(WKFTHREADS)
01620   wkf_mutex_lock(&s->mtx);
01621 #endif
01622   s->top++;
01623   if (s->top >= s->size) {
01624     int newsize = s->size + s->growthrate;
01625     wkf_tasktile_t *tmp = (wkf_tasktile_t *) realloc(s->s, newsize * sizeof(wkf_tasktile_t));
01626     if (tmp == NULL) {
01627       s->top--;
01628 #if defined(WKFTHREADS)
01629       wkf_mutex_unlock(&s->mtx);
01630 #endif
01631       return -1; 
01632     }
01633     s->s = tmp;
01634     s->size = newsize;
01635   }
01636 
01637   s->s[s->top] = *t; 
01638 
01639 #if defined(WKFTHREADS)
01640   wkf_mutex_unlock(&s->mtx);
01641 #endif
01642 
01643   return 0;
01644 }
01645 
01646 
01647 int wkf_tilestack_pop(wkf_tilestack_t *s, wkf_tasktile_t *t) {
01648 #if defined(WKFTHREADS)
01649   wkf_mutex_lock(&s->mtx);
01650 #endif
01651 
01652   if (s->top < 0) {
01653 #if defined(WKFTHREADS)
01654     wkf_mutex_unlock(&s->mtx);
01655 #endif
01656     return WKF_TILESTACK_EMPTY; 
01657   }
01658 
01659   *t = s->s[s->top];
01660   s->top--;
01661 
01662 #if defined(WKFTHREADS)
01663   wkf_mutex_unlock(&s->mtx);
01664 #endif
01665 
01666   return 0;
01667 }
01668 
01669 
01670 int wkf_tilestack_popall(wkf_tilestack_t *s) {
01671 #if defined(WKFTHREADS)
01672   wkf_mutex_lock(&s->mtx);
01673 #endif
01674 
01675   s->top = -1;
01676 
01677 #if defined(WKFTHREADS)
01678   wkf_mutex_unlock(&s->mtx);
01679 #endif
01680 
01681   return 0;
01682 }
01683 
01684 
01685 int wkf_tilestack_empty(wkf_tilestack_t *s) {
01686 #if defined(WKFTHREADS)
01687   wkf_mutex_lock(&s->mtx);
01688 #endif
01689 
01690   if (s->top < 0) {
01691 #if defined(WKFTHREADS)
01692     wkf_mutex_unlock(&s->mtx);
01693 #endif
01694     return 1;
01695   }
01696 
01697 #if defined(WKFTHREADS)
01698   wkf_mutex_unlock(&s->mtx);
01699 #endif
01700 
01701   return 0;
01702 }
01703 
01704 
01705 
01706 
01707 
01708 
01710 int wkf_shared_iterator_init(wkf_shared_iterator_t *it) {
01711   memset(it, 0, sizeof(wkf_shared_iterator_t));
01712 #if defined(WKFTHREADS)
01713   wkf_mutex_init(&it->mtx);
01714 #endif
01715   return 0;
01716 }
01717 
01718 
01720 int wkf_shared_iterator_destroy(wkf_shared_iterator_t *it) {
01721 #if defined(WKFTHREADS)
01722   wkf_mutex_destroy(&it->mtx);
01723 #endif
01724   return 0;
01725 }
01726 
01727 
01729 int wkf_shared_iterator_set(wkf_shared_iterator_t *it,
01730                            wkf_tasktile_t *tile) {
01731 #if defined(WKFTHREADS)
01732   wkf_mutex_lock(&it->mtx);
01733 #endif
01734   it->start = tile->start;
01735   it->current = tile->start;
01736   it->end = tile->end;
01737   it->fatalerror = 0;
01738 #if defined(WKFTHREADS)
01739   wkf_mutex_unlock(&it->mtx);
01740 #endif
01741   return 0;
01742 }
01743 
01744 
01746 int wkf_shared_iterator_next_tile(wkf_shared_iterator_t *it, int reqsize,
01747                                  wkf_tasktile_t *tile) {
01748   int rc=WKF_SCHED_CONTINUE;
01749 
01750 #if defined(WKFTHREADS)
01751   wkf_mutex_spin_lock(&it->mtx);
01752 #endif
01753   if (!it->fatalerror) {
01754     tile->start=it->current; 
01755     it->current+=reqsize;    
01756     tile->end=it->current;   
01757 
01758     
01759     if (tile->start >= it->end) {
01760       tile->start=0;
01761       tile->end=0;
01762       rc = WKF_SCHED_DONE;
01763     }
01764 
01765     
01766     
01767     if (tile->end > it->end) {
01768       tile->end = it->end;
01769     }
01770   } else {
01771     rc = WKF_SCHED_DONE;
01772   }
01773 #if defined(WKFTHREADS)
01774   wkf_mutex_unlock(&it->mtx);
01775 #endif
01776 
01777   return rc;
01778 }
01779 
01780 
01782 int wkf_shared_iterator_setfatalerror(wkf_shared_iterator_t *it) {
01783 #if defined(WKFTHREADS)
01784   wkf_mutex_spin_lock(&it->mtx);
01785 #endif
01786   it->fatalerror=1;
01787 #if defined(WKFTHREADS)
01788   wkf_mutex_unlock(&it->mtx);
01789 #endif
01790   return 0;
01791 }
01792 
01793 
01795 int wkf_shared_iterator_getfatalerror(wkf_shared_iterator_t *it) {
01796   int rc=0;
01797 #if defined(WKFTHREADS)
01798   wkf_mutex_lock(&it->mtx);
01799 #endif
01800   if (it->fatalerror)
01801     rc = -1;
01802 #if defined(WKFTHREADS)
01803   wkf_mutex_unlock(&it->mtx);
01804 #endif
01805   return rc;
01806 }
01807 
01808 
01809 #if defined(WKFTHREADS)
01810 
01811 
01812 
01813 static void * wkf_threadpool_workerproc(void *voidparms) {
01814   void *(*fctn)(void*);
01815   wkf_threadpool_workerdata_t *workerdata = (wkf_threadpool_workerdata_t *) voidparms;
01816   wkf_threadpool_t *thrpool = (wkf_threadpool_t *) workerdata->thrpool;
01817 
01818   while ((fctn = wkf_thread_run_barrier(&thrpool->runbar, NULL, NULL, &workerdata->parms)) != NULL) {
01819     (*fctn)(workerdata);
01820   }
01821 
01822   return NULL;
01823 }
01824 
01825 
01826 static void * wkf_threadpool_workersync(void *voidparms) {
01827   return NULL;
01828 }
01829 #endif
01830 
01831 
01832 wkf_threadpool_t * wkf_threadpool_create(int workercount, int *devlist) {
01833   int i;
01834   wkf_threadpool_t *thrpool = NULL;
01835   thrpool = (wkf_threadpool_t *) malloc(sizeof(wkf_threadpool_t));
01836   if (thrpool == NULL)
01837     return NULL;
01838 
01839   memset(thrpool, 0, sizeof(wkf_threadpool_t));
01840 
01841 #if !defined(WKFTHREADS)
01842   workercount=1;
01843 #endif
01844 
01845   
01846   
01847   thrpool->devlist = (int *) malloc(sizeof(int) * workercount);
01848   if (devlist == NULL) {
01849     for (i=0; i<workercount; i++)
01850       thrpool->devlist[i] = -1; 
01851   } else {
01852     memcpy(thrpool->devlist, devlist, sizeof(int) * workercount);
01853   }
01854 
01855   
01856   wkf_shared_iterator_init(&thrpool->iter);
01857 
01858   
01859   wkf_tilestack_init(&thrpool->errorstack, 64);
01860 
01861   
01862   thrpool->workercount = workercount;
01863   wkf_thread_run_barrier_init(&thrpool->runbar, workercount+1);
01864 
01865   
01866   thrpool->threads = (wkf_thread_t *) malloc(sizeof(wkf_thread_t) * workercount);
01867   thrpool->workerdata = (wkf_threadpool_workerdata_t *) malloc(sizeof(wkf_threadpool_workerdata_t) * workercount);
01868   memset(thrpool->workerdata, 0, sizeof(wkf_threadpool_workerdata_t) * workercount);
01869 
01870   
01871   for (i=0; i<workercount; i++) {
01872     thrpool->workerdata[i].iter=&thrpool->iter;
01873     thrpool->workerdata[i].errorstack=&thrpool->errorstack;
01874     thrpool->workerdata[i].threadid=i;
01875     thrpool->workerdata[i].threadcount=workercount;
01876     thrpool->workerdata[i].devid=thrpool->devlist[i];
01877     thrpool->workerdata[i].devspeed=1.0f; 
01878     thrpool->workerdata[i].thrpool=thrpool;
01879   }
01880 
01881 #if defined(WKFTHREADS)
01882   
01883   for (i=0; i<workercount; i++) {
01884     wkf_thread_create(&thrpool->threads[i], wkf_threadpool_workerproc, &thrpool->workerdata[i]);
01885   }
01886 #endif
01887 
01888   return thrpool;
01889 }
01890 
01891 
01892 int wkf_threadpool_launch(wkf_threadpool_t *thrpool,
01893                           void *fctn(void *), void *parms, int blocking) {
01894   if (thrpool == NULL)
01895     return -1;
01896 
01897 #if defined(WKFTHREADS)
01898   
01899   wkf_thread_run_barrier(&thrpool->runbar, fctn, parms, NULL);
01900   if (blocking)
01901     wkf_thread_run_barrier(&thrpool->runbar, wkf_threadpool_workersync, NULL, NULL);
01902 #else
01903   thrpool->workerdata[0].parms = parms;
01904   (*fctn)(&thrpool->workerdata[0]);
01905 #endif
01906   return 0;
01907 }
01908 
01909 
01910 int wkf_threadpool_wait(wkf_threadpool_t *thrpool) {
01911 #if defined(WKFTHREADS)
01912   wkf_thread_run_barrier(&thrpool->runbar, wkf_threadpool_workersync, NULL, NULL);
01913 #endif
01914   return 0;
01915 }
01916 
01917 
01918 int wkf_threadpool_poll(wkf_threadpool_t *thrpool) {
01919 #if defined(WKFTHREADS)
01920   return wkf_thread_run_barrier_poll(&thrpool->runbar);
01921 #else
01922   return 1;
01923 #endif
01924 }
01925 
01926 
01927 int wkf_threadpool_destroy(wkf_threadpool_t *thrpool) {
01928 #if defined(WKFTHREADS)
01929   int i;
01930 #endif
01931 
01932   
01933   wkf_thread_run_barrier(&thrpool->runbar, NULL, NULL, NULL);
01934 
01935 #if defined(WKFTHREADS)
01936   
01937   for (i=0; i<thrpool->workercount; i++) {
01938     wkf_thread_join(thrpool->threads[i], NULL);
01939   }
01940 #endif
01941 
01942   
01943   wkf_thread_run_barrier_destroy(&thrpool->runbar);
01944 
01945   
01946   wkf_shared_iterator_destroy(&thrpool->iter);
01947 
01948   
01949   wkf_tilestack_destroy(&thrpool->errorstack);
01950 
01951   free(thrpool->devlist);
01952   free(thrpool->threads);
01953   free(thrpool->workerdata);
01954   free(thrpool);
01955 
01956   return 0;
01957 }
01958 
01959 
01961 int wkf_threadpool_get_workercount(wkf_threadpool_t *thrpool) {
01962   return thrpool->workercount;
01963 }
01964 
01965 
01967 int wkf_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount) {
01968   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voiddata;
01969   if (threadid != NULL)
01970     *threadid = worker->threadid;
01971 
01972   if (threadcount != NULL)
01973     *threadcount = worker->threadcount;
01974 
01975   return 0;
01976 }
01977 
01978 
01980 int wkf_threadpool_worker_getdevid(void *voiddata, int *devid) {
01981   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voiddata;
01982   if (devid != NULL)
01983     *devid = worker->devid;
01984 
01985   return 0;
01986 }
01987 
01988 
01995 int wkf_threadpool_worker_setdevspeed(void *voiddata, float speed) {
01996   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voiddata;
01997   worker->devspeed = speed;
01998   return 0;
01999 }
02000 
02001 
02006 int wkf_threadpool_worker_getdevspeed(void *voiddata, float *speed) {
02007   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voiddata;
02008   if (speed != NULL)
02009     *speed = worker->devspeed;
02010   return 0;
02011 }
02012 
02013 
02018 int wkf_threadpool_worker_devscaletile(void *voiddata, int *tilesize) {
02019   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voiddata;
02020   if (tilesize != NULL) {
02021     int scaledtilesize;
02022     scaledtilesize = (int) (worker->devspeed * ((float) (*tilesize)));
02023     if (scaledtilesize < 1)
02024       scaledtilesize = 1;
02025 
02026     *tilesize = scaledtilesize;
02027   }
02028 
02029   return 0;
02030 }
02031 
02032 
02034 int wkf_threadpool_worker_getdata(void *voiddata, void **clientdata) {
02035   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voiddata;
02036   if (clientdata != NULL)
02037     *clientdata = worker->parms;
02038 
02039   return 0;
02040 }
02041 
02042 
02044 int wkf_threadpool_sched_dynamic(wkf_threadpool_t *thrpool, wkf_tasktile_t *tile) {
02045   if (thrpool == NULL)
02046     return -1;
02047   return wkf_shared_iterator_set(&thrpool->iter, tile);
02048 }
02049 
02050 
02052 int wkf_threadpool_next_tile(void *voidparms, int reqsize,
02053                             wkf_tasktile_t *tile) {
02054   int rc;
02055   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voidparms;
02056   rc = wkf_shared_iterator_next_tile(worker->iter, reqsize, tile);
02057   if (rc == WKF_SCHED_DONE) {
02058     
02059     
02060     if (wkf_tilestack_pop(worker->errorstack, tile) != WKF_TILESTACK_EMPTY)
02061       return WKF_SCHED_CONTINUE;
02062   }
02063 
02064   return rc;
02065 }
02066 
02067 
02072 int wkf_threadpool_tile_failed(void *voidparms, wkf_tasktile_t *tile) {
02073   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voidparms;
02074   return wkf_tilestack_push(worker->errorstack, tile);
02075 }
02076 
02077 
02078 
02079 int wkf_threadpool_setfatalerror(void *voidparms) {
02080   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voidparms;
02081   wkf_shared_iterator_setfatalerror(worker->iter);
02082   return 0;
02083 }
02084 
02085 
02086 
02087 int wkf_threadpool_getfatalerror(void *voidparms) {
02088   wkf_threadpool_workerdata_t *worker = (wkf_threadpool_workerdata_t *) voidparms;
02089   
02090   return wkf_shared_iterator_getfatalerror(worker->iter);
02091 }
02092 
02093 
02094 
02095 int wkf_threadlaunch(int numprocs, void *clientdata, void * fctn(void *),
02096                     wkf_tasktile_t *tile) {
02097   wkf_shared_iterator_t iter;
02098   wkf_threadlaunch_t *parms=NULL;
02099   wkf_thread_t * threads=NULL;
02100   int i, rc;
02101 
02102   
02103 #if !defined(WKFTHREADS)
02104   numprocs=1;
02105 #endif
02106 
02107   
02108   wkf_shared_iterator_init(&iter);
02109   if (wkf_shared_iterator_set(&iter, tile))
02110     return -1;
02111 
02112   
02113   threads = (wkf_thread_t *) calloc(numprocs * sizeof(wkf_thread_t), 1);
02114   if (threads == NULL)
02115     return -1;
02116 
02117   
02118   parms = (wkf_threadlaunch_t *) malloc(numprocs * sizeof(wkf_threadlaunch_t));
02119   if (parms == NULL) {
02120     free(threads);
02121     return -1;
02122   }
02123   for (i=0; i<numprocs; i++) {
02124     parms[i].iter = &iter;
02125     parms[i].threadid = i;
02126     parms[i].threadcount = numprocs;
02127     parms[i].clientdata = clientdata;
02128   }
02129 
02130 #if defined(WKFTHREADS)
02131   if (numprocs == 1) {
02132     
02133     
02134     
02135     
02136     
02137     
02138     
02139     
02140     fctn((void *) &parms[0]);
02141   } else {
02142     
02143     for (i=0; i<numprocs; i++) {
02144       wkf_thread_create(&threads[i], fctn, &parms[i]);
02145     }
02146 
02147     
02148     for (i=0; i<numprocs; i++) {
02149       wkf_thread_join(threads[i], NULL);
02150     }
02151   }
02152 #else
02153   
02154   fctn((void *) &parms[0]);
02155 #endif
02156 
02157   
02158   free(parms);
02159   free(threads);
02160 
02161   
02162   rc=wkf_shared_iterator_getfatalerror(&iter);
02163 
02164   
02165   wkf_shared_iterator_destroy(&iter);
02166 
02167   return rc;
02168 }
02169 
02170 
02172 int wkf_threadlaunch_getid(void *voidparms, int *threadid, int *threadcount) {
02173   wkf_threadlaunch_t *worker = (wkf_threadlaunch_t *) voidparms;
02174   if (threadid != NULL)
02175     *threadid = worker->threadid;
02176 
02177   if (threadcount != NULL)
02178     *threadcount = worker->threadcount;
02179 
02180   return 0;
02181 }
02182 
02183 
02185 int wkf_threadlaunch_getdata(void *voidparms, void **clientdata) {
02186   wkf_threadlaunch_t *worker = (wkf_threadlaunch_t *) voidparms;
02187   if (clientdata != NULL)
02188     *clientdata = worker->clientdata;
02189 
02190   return 0;
02191 }
02192 
02193 
02195 int wkf_threadlaunch_next_tile(void *voidparms, int reqsize,
02196                               wkf_tasktile_t *tile) {
02197   wkf_threadlaunch_t *worker = (wkf_threadlaunch_t *) voidparms;
02198   return wkf_shared_iterator_next_tile(worker->iter, reqsize, tile);
02199 }
02200 
02201 
02203 int wkf_threadlaunch_setfatalerror(void *voidparms) {
02204   wkf_threadlaunch_t *worker = (wkf_threadlaunch_t *) voidparms;
02205   return wkf_shared_iterator_setfatalerror(worker->iter);
02206 }
02207 
02208 
02209 #ifdef __cplusplus
02210 }
02211 #endif
02212 
02213