00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00021
00022 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))
00023 #if !defined(__SSE2__) && defined(_WIN64)
00024 #define __SSE2__ 1
00025 #endif
00026 #if defined(__SSE2__)
00027 #include <emmintrin.h>
00028 #define VMDORBUSESSE 1
00029 #endif
00030
00031 #if defined(VMDCPUDISPATCH)
00032 #define VECPADSZ 16
00033 #else
00034 #define VECPADSZ 4
00035 #endif
00036
00037
00038
00039 #elif defined(__VSX__)
00040 #if defined(__GNUC__) && defined(__VEC__)
00041 #include <altivec.h>
00042 #endif
00043
00044
00045
00046
00047
00048 #define VECPADSZ 4
00049 #define VMDORBUSEVSX 1
00050
00051
00052 #elif defined(__ARM_ARCH_ISA_A64) && !defined(ARCH_MACOSXARM64)
00053 #define VMDUSESVE 1
00054
00055 #if defined(VMDCPUDISPATCH)
00056 #define VECPADSZ 16
00057 #else
00058 #define VECPADSZ 4
00059 #endif
00060
00061
00062 #else
00063 #define VECPADSZ 1
00064 #endif
00065
00066
00067 #define VECPADMASK (VECPADSZ - 1)
00068
00069
00070
00071 #include <math.h>
00072 #include <stdio.h>
00073 #include "VMDApp.h"
00074 #include "Orbital.h"
00075 #include "DrawMolecule.h"
00076 #include "utilities.h"
00077 #include "Inform.h"
00078 #include "WKFThreads.h"
00079 #include "WKFUtils.h"
00080 #if defined(VMDCUDA)
00081 #include "CUDAOrbital.h"
00082 #endif
00083 #if defined(VMDOPENCL)
00084 #include "OpenCLUtils.h"
00085 #include "OpenCLKernels.h"
00086 #endif
00087 #include "ProfileHooks.h"
00088
00089
00090
00091
00092
00093
00094 extern int evaluate_grid_avx512er(int numatoms,
00095 const float *wave_f, const float *basis_array,
00096 const float *atompos,
00097 const int *atom_basis,
00098 const int *num_shells_per_atom,
00099 const int *num_prim_per_shell,
00100 const int *shell_types,
00101 const int *numvoxels,
00102 float voxelsize,
00103 const float *origin,
00104 int density,
00105 float * orbitalgrid);
00106
00107
00108 extern int evaluate_grid_avx512f(int numatoms,
00109 const float *wave_f, const float *basis_array,
00110 const float *atompos,
00111 const int *atom_basis,
00112 const int *num_shells_per_atom,
00113 const int *num_prim_per_shell,
00114 const int *shell_types,
00115 const int *numvoxels,
00116 float voxelsize,
00117 const float *origin,
00118 int density,
00119 float * orbitalgrid);
00120
00121
00122 extern int evaluate_grid_avx2(int numatoms,
00123 const float *wave_f, const float *basis_array,
00124 const float *atompos,
00125 const int *atom_basis,
00126 const int *num_shells_per_atom,
00127 const int *num_prim_per_shell,
00128 const int *shell_types,
00129 const int *numvoxels,
00130 float voxelsize,
00131 const float *origin,
00132 int density,
00133 float * orbitalgrid);
00134
00135
00136 extern int evaluate_grid_neon(int numatoms,
00137 const float *wave_f, const float *basis_array,
00138 const float *atompos,
00139 const int *atom_basis,
00140 const int *num_shells_per_atom,
00141 const int *num_prim_per_shell,
00142 const int *shell_types,
00143 const int *numvoxels,
00144 float voxelsize,
00145 const float *origin,
00146 int density,
00147 float * orbitalgrid);
00148
00149
00150 extern int evaluate_grid_sve(int numatoms,
00151 const float *wave_f, const float *basis_array,
00152 const float *atompos,
00153 const int *atom_basis,
00154 const int *num_shells_per_atom,
00155 const int *num_prim_per_shell,
00156 const int *shell_types,
00157 const int *numvoxels,
00158 float voxelsize,
00159 const float *origin,
00160 int density,
00161 float * orbitalgrid);
00162
00163
00164 #define ANGS_TO_BOHR 1.88972612478289694072f
00165
00167 Orbital::Orbital(const float *pos,
00168 const float *wfn,
00169 const float *barray,
00170 const basis_atom_t *bset,
00171 const int *types,
00172 const int *asort,
00173 const int *abasis,
00174 const float **norm,
00175 const int *nshells,
00176 const int *nprimshell,
00177 const int *shelltypes,
00178 int natoms, int ntypes, int numwave, int numbasis,
00179 int orbid) :
00180 numatoms(natoms), atompos(pos),
00181 num_wave_f(numwave),
00182 wave_f(NULL),
00183 num_basis_funcs(numbasis),
00184 basis_array(barray),
00185 numtypes(ntypes),
00186 basis_set(bset),
00187 atom_types(types),
00188 atom_sort(asort),
00189 atom_basis(abasis),
00190 norm_factors(norm),
00191 num_shells_per_atom(nshells),
00192 num_prim_per_shell(nprimshell),
00193 shell_types(shelltypes),
00194 grid_data(NULL)
00195 {
00196 origin[0] = origin[1] = origin[2] = 0.0;
00197
00198
00199
00200
00201 normalize_wavefunction(wfn + num_wave_f*orbid);
00202
00203
00204 }
00205
00207 Orbital::~Orbital() {
00208 if (wave_f) delete [] wave_f;
00209 }
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 void Orbital::normalize_wavefunction(const float *wfn) {
00221 #ifdef DEBUGORBS
00222 char shellname[8] = {'S', 'P', 'D', 'F', 'G', 'H', 'I', 'K'};
00223 #endif
00224 int i, j, k;
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236 wave_f = new float[num_wave_f];
00237 int ifunc = 0;
00238 for (i=0; i<numatoms; i++) {
00239 const basis_atom_t *basis_atom = &basis_set[atom_types[i]];
00240 for (j=0; j<basis_atom->numshells; j++) {
00241 int stype = basis_atom->shell[j].type;
00242 #ifdef DEBUGORBS
00243 printf("atom %i/%i, %i/%i %c-shell\n", i+1, numatoms, j+1, basis_atom->numshells, shellname[stype]);
00244 #endif
00245 for (k=0; k<basis_atom->shell[j].num_cart_func; k++) {
00246 wave_f[ifunc] = wfn[ifunc] * norm_factors[stype][k];
00247
00248 #ifdef DEBUGORBS
00249 printf("%3i %c %2i wave_f[%3i]=% f norm=%.3f normwave=% f\n",
00250 i, shellname[stype], k, ifunc, wfn[ifunc],
00251 norm_factors[stype][k], wave_f[ifunc]);
00252 #endif
00253 ifunc++;
00254 }
00255 }
00256 }
00257 }
00258
00259
00260
00261
00262
00263
00264 int Orbital::set_grid_to_bbox(const float *pos, float padding,
00265 float resolution) {
00266 int i = 0;
00267 float xyzdim[3];
00268
00269
00270
00271 origin[0] = xyzdim[0] = pos[0];
00272 origin[1] = xyzdim[1] = pos[1];
00273 origin[2] = xyzdim[2] = pos[2];
00274
00275
00276
00277
00278 for(i=1; i<numatoms; i++) {
00279 if (pos[3*i ] < origin[0]) origin[0] = pos[3*i];
00280 if (pos[3*i+1] < origin[1]) origin[1] = pos[3*i+1];
00281 if (pos[3*i+2] < origin[2]) origin[2] = pos[3*i+2];
00282 if (pos[3*i ] > xyzdim[0]) xyzdim[0] = pos[3*i];
00283 if (pos[3*i+1] > xyzdim[1]) xyzdim[1] = pos[3*i+1];
00284 if (pos[3*i+2] > xyzdim[2]) xyzdim[2] = pos[3*i+2];
00285 }
00286
00287
00288 origin[0] -= padding;
00289 origin[1] -= padding;
00290 origin[2] -= padding;
00291 gridsize[0] = xyzdim[0] + padding - origin[0];
00292 gridsize[1] = xyzdim[1] + padding - origin[1];
00293 gridsize[2] = xyzdim[2] + padding - origin[2];
00294
00295 set_resolution(resolution);
00296
00297 return TRUE;
00298 }
00299
00300
00301
00302
00303
00304
00305 void Orbital::set_grid(float newori[3], float newdim[3], float newvoxelsize) {
00306 origin[0] = newori[0];
00307 origin[1] = newori[1];
00308 origin[2] = newori[2];
00309 gridsize[0] = newdim[0];
00310 gridsize[1] = newdim[1];
00311 gridsize[2] = newdim[2];
00312 set_resolution(newvoxelsize);
00313 }
00314
00315
00316 void Orbital::set_resolution(float resolution) {
00317 voxelsize = resolution;
00318 int i;
00319 for (i=0; i<3; i++) {
00320 numvoxels[i] = (int)(gridsize[i]/voxelsize) + 1;
00321 gridsize[i] = voxelsize*(numvoxels[i]-1);
00322 }
00323 }
00324
00325 #define XNEG 0
00326 #define YNEG 1
00327 #define ZNEG 2
00328 #define XPOS 3
00329 #define YPOS 4
00330 #define ZPOS 5
00331
00332
00333
00334
00335 int Orbital::check_plane(int dir, float threshold, int minstepsize,
00336 int &stepsize) {
00337 bool repeat=0;
00338 int u, v, w, nu, nv;
00339
00340
00341
00342 u = (dir+1)%3;
00343 v = (dir+2)%3;
00344 w = dir%3;
00345
00346
00347
00348
00349
00350
00351 do {
00352 int success = 0;
00353 int gridstep = stepsize;
00354
00355 if (repeat) {
00356
00357
00358
00359 gridstep = 2*stepsize;
00360 }
00361
00362
00363 float grid[3];
00364 grid[w] = origin[w] + (dir/3)*(numvoxels[w]-1) * voxelsize;
00365
00366
00367 for (nu=0; nu<numvoxels[u]; nu+=gridstep) {
00368 grid[u] = origin[u] + nu * voxelsize;
00369
00370 for (nv=0; nv<numvoxels[v]; nv+=gridstep) {
00371 grid[v] = origin[v] + nv * voxelsize;
00372
00373 if (fabs(evaluate_grid_point(grid[0], grid[1], grid[2])) > threshold) {
00374 success = 1;
00375 break;
00376 }
00377 }
00378 if (success) break;
00379 }
00380
00381 if (success) {
00382
00383
00384
00385 if (!(dir/3)) origin[w] -= stepsize*voxelsize;
00386 numvoxels[w] += stepsize;
00387 if (stepsize<=minstepsize) {
00388
00389 return 1;
00390 }
00391 stepsize /=2;
00392 repeat = 1;
00393
00394
00395 } else {
00396
00397 if (!(dir/3)) origin[w] += stepsize*voxelsize;
00398 numvoxels[w] -= stepsize;
00399
00400 repeat = 0;
00401 if (numvoxels[w] <= 1) {
00402
00403
00404
00405 numvoxels[w] = stepsize;
00406 if (!(dir/3)) origin[w] -= stepsize*voxelsize;
00407 stepsize /=2;
00408 repeat = 1;
00409
00410 }
00411 }
00412
00413 } while (repeat);
00414
00415 return 0;
00416 }
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439 void Orbital::find_optimal_grid(float threshold,
00440 int minstepsize, int maxstepsize) {
00441 int optimal[6] = {0, 0, 0, 0, 0, 0};
00442 int stepsize[6];
00443 int i;
00444 for (i=0; i<6; i++) stepsize[i] = maxstepsize;
00445
00446 #ifdef DEBUGORBS
00447 printf("origin = {%f %f %f}\n", origin[0], origin[1], origin[2]);
00448 printf("gridsize = {%f %f %f}\n", gridsize[0], gridsize[1], gridsize[2]);
00449 #endif
00450
00451
00452
00453
00454 int iter = 0;
00455 while ( !optimal[0] || !optimal[1] || !optimal[2] ||
00456 !optimal[3] || !optimal[4] || !optimal[5] )
00457 {
00458 if (iter>100) {
00459 msgInfo << "WARNING: Could not optimize orbital grid boundaries in"
00460 << iter << "steps!" << sendmsg;
00461 break;
00462 }
00463 iter++;
00464
00465
00466
00467 if (!optimal[XNEG])
00468 optimal[XNEG] = check_plane(XNEG, threshold, minstepsize, stepsize[XNEG]);
00469
00470 if (!optimal[XPOS])
00471 optimal[XPOS] = check_plane(XPOS, threshold, minstepsize, stepsize[XPOS]);
00472
00473 if (!optimal[YNEG])
00474 optimal[YNEG] = check_plane(YNEG, threshold, minstepsize, stepsize[YNEG]);
00475
00476 if (!optimal[YPOS])
00477 optimal[YPOS] = check_plane(YPOS, threshold, minstepsize, stepsize[YPOS]);
00478
00479 if (!optimal[ZNEG])
00480 optimal[ZNEG] = check_plane(ZNEG, threshold, minstepsize, stepsize[ZNEG]);
00481
00482 if (!optimal[ZPOS])
00483 optimal[ZPOS] = check_plane(ZPOS, threshold, minstepsize, stepsize[ZPOS]);
00484
00485 #if defined(DEBUGORBS)
00486 printf("origin {%f %f %f}\n", origin[0], origin[1], origin[2]);
00487 printf("ngrid {%i %i %i}\n", numvoxels[0], numvoxels[1], numvoxels[2]);
00488 printf("stepsize {%i %i %i %i %i %i}\n", stepsize[0], stepsize[1], stepsize[2],
00489 stepsize[3], stepsize[4], stepsize[5]);
00490 #endif
00491 }
00492
00493
00494 gridsize[0] = numvoxels[0]*voxelsize;
00495 gridsize[1] = numvoxels[1]*voxelsize;
00496 gridsize[2] = numvoxels[2]*voxelsize;
00497 }
00498
00499
00500
00501 int Orbital::calculate_mo(DrawMolecule *mol, int density) {
00502 PROFILE_PUSH_RANGE("Orbital", 4);
00503
00504 wkf_timerhandle timer=wkf_timer_create();
00505 wkf_timer_start(timer);
00506
00507
00508
00509
00510
00511 int vecpadmask = VECPADMASK;
00512 if ((mol->app->cpucaps != NULL) && (mol->app->cpucaps->flags & CPU_AVX2)) {
00513 vecpadmask = 7;
00514 }
00515 if ((mol->app->cpucaps != NULL) && (mol->app->cpucaps->flags & (CPU_AVX512F | CPU_AVX512ER))) {
00516 vecpadmask = 15;
00517 }
00518
00519
00520 numvoxels[0] = (numvoxels[0] + vecpadmask) & ~(vecpadmask);
00521 gridsize[0] = numvoxels[0]*voxelsize;
00522
00523
00524 int numgridpoints = numvoxels[0] * numvoxels[1] * numvoxels[2];
00525 grid_data = new float[numgridpoints];
00526
00527 #if defined(DEBUGORBS)
00528 printf("num_wave_f=%i\n", num_wave_f);
00529
00530 int i=0;
00531 for (i=0; i<num_wave_f; i++) {
00532 printf("wave_f[%i] = %f\n", i, wave_f[i]);
00533 }
00534
00535
00536
00537 printf("Calculating %ix%ix%i orbital grid.\n",
00538 numvoxels[0], numvoxels[1], numvoxels[2]);
00539 #endif
00540
00541
00542 int rc=-1;
00543
00544
00545 #if defined(VMDCUDA)
00546
00547
00548 if ((max_shell_type() <= G_SHELL) &&
00549 (max_primitives() <= 32) &&
00550 (!getenv("VMDNOCUDA"))) {
00551 rc = vmd_cuda_evaluate_orbital_grid(mol->cuda_devpool(),
00552 numatoms, wave_f, num_wave_f,
00553 basis_array, num_basis_funcs,
00554 atompos, atom_basis,
00555 num_shells_per_atom,
00556 num_prim_per_shell,
00557 shell_types, total_shells(),
00558 numvoxels, voxelsize,
00559 origin, density, grid_data);
00560 }
00561 #endif
00562 #if defined(VMDOPENCL)
00563
00564
00565 if (rc!=0 &&
00566 (max_shell_type() <= G_SHELL) &&
00567 (max_primitives() <= 32) &&
00568 (!getenv("VMDNOOPENCL"))) {
00569
00570 #if 1
00571
00572 static vmd_opencl_orbital_handle *orbh = NULL;
00573 static cl_context clctx = NULL;
00574 static cl_command_queue clcmdq = NULL;
00575 static cl_device_id *cldevs = NULL;
00576 if (orbh == NULL) {
00577 printf("Attaching OpenCL device:\n");
00578 wkf_timer_start(timer);
00579 cl_int clerr = CL_SUCCESS;
00580
00581 cl_platform_id clplatid = vmd_cl_get_platform_index(0);
00582 cl_context_properties clctxprops[] = {(cl_context_properties) CL_CONTEXT_PLATFORM, (cl_context_properties) clplatid, (cl_context_properties) 0};
00583 clctx = clCreateContextFromType(clctxprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &clerr);
00584
00585 size_t parmsz;
00586 clerr |= clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0, NULL, &parmsz);
00587 if (clerr != CL_SUCCESS) return -1;
00588 cldevs = (cl_device_id *) malloc(parmsz);
00589 if (clerr != CL_SUCCESS) return -1;
00590 clerr |= clGetContextInfo(clctx, CL_CONTEXT_DEVICES, parmsz, cldevs, NULL);
00591 if (clerr != CL_SUCCESS) return -1;
00592 clcmdq = clCreateCommandQueue(clctx, cldevs[0], 0, &clerr);
00593 if (clerr != CL_SUCCESS) return -1;
00594 wkf_timer_stop(timer);
00595 printf(" OpenCL context creation time: %.3f sec\n", wkf_timer_time(timer));
00596
00597 wkf_timer_start(timer);
00598 orbh = vmd_opencl_create_orbital_handle(clctx, clcmdq, cldevs);
00599 wkf_timer_stop(timer);
00600 printf(" OpenCL kernel compilation time: %.3f sec\n", wkf_timer_time(timer));
00601
00602 wkf_timer_start(timer);
00603 }
00604 #endif
00605
00606 rc = vmd_opencl_evaluate_orbital_grid(mol->cuda_devpool(), orbh,
00607 numatoms, wave_f, num_wave_f,
00608 basis_array, num_basis_funcs,
00609 atompos, atom_basis,
00610 num_shells_per_atom,
00611 num_prim_per_shell,
00612 shell_types, total_shells(),
00613 numvoxels, voxelsize,
00614 origin, density, grid_data);
00615
00616 #if 0
00617
00618 vmd_opencl_destroy_orbital_handle(parms.orbh);
00619 clReleaseCommandQueue(clcmdq);
00620 clReleaseContext(clctx);
00621 free(cldevs);
00622 #endif
00623 }
00624 #endif
00625 #if 0
00626 int numprocs = 1;
00627 if (getenv("VMDDUMPORBITALS")) {
00628 write_orbital_data(getenv("VMDDUMPORBITALS"), numatoms,
00629 wave_f, num_wave_f, basis_array, num_basis,
00630 atompos, atom_basis, num_shells_per_atom,
00631 num_prim_per_shell, shell_types,
00632 num_shells, numvoxels, voxelsize, origin);
00633
00634 read_calc_orbitals(devpool, getenv("VMDDUMPORBITALS"));
00635 }
00636 #endif
00637
00638
00639 #if !defined(VMDORBUSETHRPOOL)
00640 #if defined(VMDTHREADS)
00641 int numcputhreads = wkf_thread_numprocessors();
00642 #else
00643 int numcputhreads = 1;
00644 #endif
00645 #endif
00646 if (rc!=0) rc = evaluate_grid_fast(mol->app->cpucaps,
00647 #if defined(VMDORBUSETHRPOOL)
00648 mol->cpu_threadpool(),
00649 #else
00650 numcputhreads,
00651 #endif
00652 numatoms, wave_f, basis_array,
00653 atompos, atom_basis,
00654 num_shells_per_atom, num_prim_per_shell,
00655 shell_types, numvoxels, voxelsize,
00656 origin, density, grid_data);
00657
00658 if (rc!=0) {
00659 msgErr << "Error computing orbital grid" << sendmsg;
00660 delete [] grid_data;
00661 grid_data=NULL;
00662
00663 PROFILE_POP_RANGE();
00664
00665 return FALSE;
00666 }
00667
00668 wkf_timer_stop(timer);
00669
00670 #if 1
00671 if (getenv("VMDORBTIMING") != NULL) {
00672 double gflops = (numgridpoints * flops_per_gridpoint()) / (wkf_timer_time(timer) * 1000000000.0);
00673
00674 char strbuf[1024];
00675 sprintf(strbuf, "Orbital calc. time %.3f secs, %.2f gridpoints/sec, %.2f GFLOPS",
00676 wkf_timer_time(timer),
00677 (((double) numgridpoints) / wkf_timer_time(timer)),
00678 gflops);
00679 msgInfo << strbuf << sendmsg;
00680 }
00681 #endif
00682
00683 wkf_timer_destroy(timer);
00684
00685 PROFILE_POP_RANGE();
00686
00687 return TRUE;
00688 }
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718 float Orbital::evaluate_grid_point(float grid_x, float grid_y, float grid_z) {
00719 int at;
00720 int prim, shell;
00721
00722
00723 float value = 0.0;
00724
00725
00726 int ifunc = 0;
00727 int shell_counter = 0;
00728
00729
00730 for (at=0; at<numatoms; at++) {
00731 int maxshell = num_shells_per_atom[at];
00732 int prim_counter = atom_basis[at];
00733
00734
00735 float xdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
00736 float ydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
00737 float zdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
00738 float dist2 = xdist*xdist + ydist*ydist + zdist*zdist;
00739
00740
00741
00742
00743
00744
00745 for (shell=0; shell < maxshell; shell++) {
00746 float contracted_gto = 0.0f;
00747
00748
00749
00750 int maxprim = num_prim_per_shell[shell_counter];
00751 int shelltype = shell_types[shell_counter];
00752 for (prim=0; prim < maxprim; prim++) {
00753 float exponent = basis_array[prim_counter ];
00754 float contract_coeff = basis_array[prim_counter + 1];
00755 contracted_gto += contract_coeff * expf(-exponent*dist2);
00756 prim_counter += 2;
00757 }
00758
00759
00760 float tmpshell=0;
00761
00762
00763 int i, j;
00764 float xdp, ydp, zdp;
00765 float xdiv = 1.0f / xdist;
00766 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
00767 int imax = shelltype - j;
00768 for (i=0, ydp=1.0f, xdp=powf(xdist, float(imax)); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
00769 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
00770 }
00771 }
00772 value += tmpshell * contracted_gto;
00773
00774 shell_counter++;
00775 }
00776 }
00777
00778
00779 return value;
00780 }
00781
00782
00783
00784
00785
00786 int Orbital::max_primitives(void) {
00787 int maxprim=-1;
00788
00789 int shell_counter = 0;
00790 for (int at=0; at<numatoms; at++) {
00791 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00792 int numprim = num_prim_per_shell[shell_counter];
00793 if (numprim > maxprim)
00794 maxprim = numprim;
00795 }
00796 }
00797
00798 return maxprim;
00799 }
00800
00801
00802
00803
00804
00805 int Orbital::max_shell_type(void) {
00806 int maxshell=-1;
00807
00808 int shell_counter = 0;
00809 for (int at=0; at<numatoms; at++) {
00810 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00811 int shelltype = shell_types[shell_counter];
00812 shell_counter++;
00813 if (shelltype > maxshell)
00814 maxshell=shelltype;
00815 }
00816 }
00817
00818 return maxshell;
00819 }
00820
00821
00822
00823
00824
00825
00826 int Orbital::max_wave_f_count(void) {
00827 int maxcount=0;
00828
00829 int shell_counter = 0;
00830 for (int at=0; at<numatoms; at++) {
00831 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00832 int shelltype = shell_types[shell_counter];
00833 int i, j;
00834 int count=0;
00835 for (i=0; i<=shelltype; i++) {
00836 int jmax = shelltype - i;
00837 for (j=0; j<=jmax; j++) {
00838 count++;
00839 }
00840 }
00841 shell_counter++;
00842 if (count > maxcount)
00843 maxcount=count;
00844 }
00845 }
00846
00847 return maxcount;
00848 }
00849
00850
00851
00852
00853
00854 double Orbital::flops_per_gridpoint() {
00855 double flops=0.0;
00856
00857 int shell_counter = 0;
00858 for (int at=0; at<numatoms; at++) {
00859 flops += 7;
00860
00861 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00862 for (int prim=0; prim < num_prim_per_shell[shell_counter]; prim++)
00863 flops += 4;
00864
00865 int shelltype = shell_types[shell_counter];
00866
00867 switch (shelltype) {
00868
00869 case S_SHELL: flops += 2; break;
00870 case P_SHELL: flops += 8; break;
00871 case D_SHELL: flops += 17; break;
00872 case F_SHELL: flops += 30; break;
00873 case G_SHELL: flops += 50; break;
00874
00875
00876 default:
00877 int i, j;
00878 for (i=0; i<=shelltype; i++) {
00879 int jmax = shelltype - i;
00880 flops += 1;
00881 for (j=0; j<=jmax; j++) {
00882 flops += 6;
00883 }
00884 }
00885 break;
00886 }
00887
00888 shell_counter++;
00889 }
00890 }
00891
00892 return flops;
00893 }
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905 static const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
00906 static const float MAXLOGF = 88.72283905206835f;
00907 static const float MINLOGF = -103.278929903431851103f;
00908 static const float LOG2EF = 1.44269504088896341f;
00909 static const float C1 = 0.693359375f;
00910 static const float C2 = -2.12194440e-4f;
00911
00912 static inline float cephesfastexpf(float x) {
00913 float z;
00914 int n;
00915
00916 if(x > MAXLOGF)
00917 return MAXNUMF;
00918
00919 if(x < MINLOGF)
00920 return 0.0;
00921
00922
00923 z = floorf( LOG2EF * x + 0.5f );
00924 x -= z * C1;
00925 x -= z * C2;
00926 n = (int) z;
00927
00928 z = x * x;
00929
00930 z = ((((( 1.9875691500E-4f * x + 1.3981999507E-3f) * x
00931 + 8.3334519073E-3f) * x + 4.1665795894E-2f) * x
00932 + 1.6666665459E-1f) * x + 5.0000001201E-1f) * z + x + 1.0f;
00933
00934 x = ldexpf(z, n);
00935 return x;
00936 }
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980 #define MLOG2EF -1.44269504088896f
00981
00982
00983
00984
00985
00986 #define SCEXP0 1.0000000000000000f
00987 #define SCEXP1 0.6987082824680118f
00988 #define SCEXP2 0.2633174272827404f
00989 #define SCEXP3 0.0923611991471395f
00990 #define SCEXP4 0.0277520543324108f
00991
00992
00993 #define EXPOBIAS 127
00994 #define EXPOSHIFT 23
00995
00996
00997 #define ACUTOFF -10
00998
00999 typedef union flint_t {
01000 float f;
01001 int n;
01002 } flint;
01003
01004 float aexpfnx(float x) {
01005
01006 float mb;
01007 int mbflr;
01008 float d;
01009 float sy;
01010 flint scalfac;
01011
01012 if (x < ACUTOFF) return 0.f;
01013
01014 mb = x * MLOG2EF;
01015 mbflr = (int) mb;
01016 d = mbflr - mb;
01017 sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
01018
01019 scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;
01020 return (sy * scalfac.f);
01021 }
01022
01023
01024
01025
01026
01027
01028 #define S_SHELL 0
01029 #define P_SHELL 1
01030 #define D_SHELL 2
01031 #define F_SHELL 3
01032 #define G_SHELL 4
01033 #define H_SHELL 5
01034
01035 int evaluate_grid(int numatoms,
01036 const float *wave_f, const float *basis_array,
01037 const float *atompos,
01038 const int *atom_basis,
01039 const int *num_shells_per_atom,
01040 const int *num_prim_per_shell,
01041 const int *shell_types,
01042 const int *numvoxels,
01043 float voxelsize,
01044 const float *origin,
01045 int density,
01046 float * orbitalgrid) {
01047 if (!orbitalgrid)
01048 return -1;
01049
01050 int nx, ny, nz;
01051
01052
01053 int numgridxy = numvoxels[0]*numvoxels[1];
01054 for (nz=0; nz<numvoxels[2]; nz++) {
01055 float grid_x, grid_y, grid_z;
01056 grid_z = origin[2] + nz * voxelsize;
01057 for (ny=0; ny<numvoxels[1]; ny++) {
01058 grid_y = origin[1] + ny * voxelsize;
01059 int gaddrzy = ny*numvoxels[0] + nz*numgridxy;
01060 for (nx=0; nx<numvoxels[0]; nx++) {
01061 grid_x = origin[0] + nx * voxelsize;
01062
01063
01064
01065 int at;
01066 int prim, shell;
01067
01068
01069 float value = 0.0;
01070
01071
01072 int ifunc = 0;
01073 int shell_counter = 0;
01074
01075
01076 for (at=0; at<numatoms; at++) {
01077 int maxshell = num_shells_per_atom[at];
01078 int prim_counter = atom_basis[at];
01079
01080
01081 float xdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
01082 float ydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
01083 float zdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
01084
01085 float xdist2 = xdist*xdist;
01086 float ydist2 = ydist*ydist;
01087 float zdist2 = zdist*zdist;
01088 float xdist3 = xdist2*xdist;
01089 float ydist3 = ydist2*ydist;
01090 float zdist3 = zdist2*zdist;
01091
01092 float dist2 = xdist2 + ydist2 + zdist2;
01093
01094
01095
01096
01097
01098
01099 for (shell=0; shell < maxshell; shell++) {
01100 float contracted_gto = 0.0f;
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110 int maxprim = num_prim_per_shell[shell_counter];
01111 int shelltype = shell_types[shell_counter];
01112 for (prim=0; prim<maxprim; prim++) {
01113 float exponent = basis_array[prim_counter ];
01114 float contract_coeff = basis_array[prim_counter + 1];
01115
01116
01117
01118
01119 #if defined(__GNUC__) && !defined(__ICC)
01120
01121
01122 contracted_gto += contract_coeff * aexpfnx(-exponent*dist2);
01123 #elif defined(__ICC)
01124
01125
01126
01127
01128 contracted_gto += contract_coeff * cephesfastexpf(-exponent*dist2);
01129 #else
01130
01131
01132 contracted_gto += contract_coeff * expf(-exponent*dist2);
01133 #endif
01134 prim_counter += 2;
01135 }
01136
01137
01138 float tmpshell=0;
01139 switch (shelltype) {
01140 case S_SHELL:
01141 value += wave_f[ifunc++] * contracted_gto;
01142 break;
01143
01144 case P_SHELL:
01145 tmpshell += wave_f[ifunc++] * xdist;
01146 tmpshell += wave_f[ifunc++] * ydist;
01147 tmpshell += wave_f[ifunc++] * zdist;
01148 value += tmpshell * contracted_gto;
01149 break;
01150
01151 case D_SHELL:
01152 tmpshell += wave_f[ifunc++] * xdist2;
01153 tmpshell += wave_f[ifunc++] * xdist * ydist;
01154 tmpshell += wave_f[ifunc++] * ydist2;
01155 tmpshell += wave_f[ifunc++] * xdist * zdist;
01156 tmpshell += wave_f[ifunc++] * ydist * zdist;
01157 tmpshell += wave_f[ifunc++] * zdist2;
01158 value += tmpshell * contracted_gto;
01159 break;
01160
01161 case F_SHELL:
01162 tmpshell += wave_f[ifunc++] * xdist3;
01163 tmpshell += wave_f[ifunc++] * xdist2 * ydist;
01164 tmpshell += wave_f[ifunc++] * ydist2 * xdist;
01165 tmpshell += wave_f[ifunc++] * ydist3;
01166 tmpshell += wave_f[ifunc++] * xdist2 * zdist;
01167 tmpshell += wave_f[ifunc++] * xdist * ydist * zdist;
01168 tmpshell += wave_f[ifunc++] * ydist2 * zdist;
01169 tmpshell += wave_f[ifunc++] * zdist2 * xdist;
01170 tmpshell += wave_f[ifunc++] * zdist2 * ydist;
01171 tmpshell += wave_f[ifunc++] * zdist3;
01172 value += tmpshell * contracted_gto;
01173 break;
01174
01175 case G_SHELL:
01176 tmpshell += wave_f[ifunc++] * xdist2 * xdist2;
01177 tmpshell += wave_f[ifunc++] * xdist3 * ydist;
01178 tmpshell += wave_f[ifunc++] * xdist2 * ydist2;
01179 tmpshell += wave_f[ifunc++] * ydist3 * xdist;
01180 tmpshell += wave_f[ifunc++] * ydist2 * ydist2;
01181 tmpshell += wave_f[ifunc++] * xdist3 * zdist;
01182 tmpshell += wave_f[ifunc++] * xdist2 * ydist * zdist;
01183 tmpshell += wave_f[ifunc++] * ydist2 * xdist * zdist;
01184 tmpshell += wave_f[ifunc++] * ydist3 * zdist;
01185 tmpshell += wave_f[ifunc++] * xdist2 * zdist2;
01186 tmpshell += wave_f[ifunc++] * zdist2 * xdist * ydist;
01187 tmpshell += wave_f[ifunc++] * ydist2 * zdist2;
01188 tmpshell += wave_f[ifunc++] * zdist3 * xdist;
01189 tmpshell += wave_f[ifunc++] * zdist3 * ydist;
01190 tmpshell += wave_f[ifunc++] * zdist2 * zdist2;
01191 value += tmpshell * contracted_gto;
01192 break;
01193
01194 default:
01195 #if 1
01196
01197 int i, j;
01198 float xdp, ydp, zdp;
01199 float xdiv = 1.0f / xdist;
01200 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
01201 int imax = shelltype - j;
01202 for (i=0, ydp=1.0f, xdp=powf(xdist, float(imax)); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
01203 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
01204 }
01205 }
01206 value += tmpshell * contracted_gto;
01207 #else
01208 int i, j, k;
01209 for (k=0; k<=shelltype; k++) {
01210 for (j=0; j<=shelltype; j++) {
01211 for (i=0; i<=shelltype; i++) {
01212 if (i+j+k==shelltype) {
01213 value += wave_f[ifunc++] * contracted_gto
01214 * pow(xdist,i) * pow(ydist,j) * pow(zdist,k);
01215 }
01216 }
01217 }
01218 }
01219 #endif
01220 }
01221
01222 shell_counter++;
01223 }
01224 }
01225
01226
01227 if (density) {
01228 float orbdensity = value * value;
01229 if (value < 0.0)
01230 orbdensity = -orbdensity;
01231 orbitalgrid[gaddrzy + nx] = orbdensity;
01232 } else {
01233 orbitalgrid[gaddrzy + nx] = value;
01234 }
01235 }
01236 }
01237 }
01238
01239 return 0;
01240 }
01241
01242
01243
01244 #if defined(VMDORBUSESSE) && defined(__SSE2__)
01245
01246 #if 0 && !defined(_WIN64) // MSVC doesn't support old MMX intrinsics
01247
01248
01249
01250
01251
01252
01253 #ifdef _MSC_VER
01254 # define ALIGN16_BEG __declspec(align(16))
01255 # define ALIGN16_END
01256 #else
01257 # define ALIGN16_BEG
01258 # define ALIGN16_END __attribute__((aligned(16)))
01259 #endif
01260
01261 #define _PS_CONST(Name, Val) \
01262 static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
01263 #define _PI32_CONST(Name, Val) \
01264 static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
01265
01266 _PS_CONST(exp_hi, 88.3762626647949f);
01267 _PS_CONST(exp_lo, -88.3762626647949f);
01268
01269 _PS_CONST(cephes_LOG2EF, 1.44269504088896341);
01270 _PS_CONST(cephes_exp_C1, 0.693359375);
01271 _PS_CONST(cephes_exp_C2, -2.12194440e-4);
01272
01273 _PS_CONST(cephes_exp_p0, 1.9875691500E-4);
01274 _PS_CONST(cephes_exp_p1, 1.3981999507E-3);
01275 _PS_CONST(cephes_exp_p2, 8.3334519073E-3);
01276 _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
01277 _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
01278 _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
01279 _PS_CONST(one, 1.0);
01280 _PS_CONST(half, 0.5);
01281
01282 _PI32_CONST(0x7f, 0x7f);
01283
01284 typedef union xmm_mm_union {
01285 __m128 xmm;
01286 __m64 mm[2];
01287 } xmm_mm_union;
01288
01289 #define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
01290 xmm_mm_union u; u.xmm = xmm_; \
01291 mm0_ = u.mm[0]; \
01292 mm1_ = u.mm[1]; \
01293 }
01294
01295 #define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
01296 xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
01297 }
01298
01299 __m128 exp_ps(__m128 x) {
01300 __m128 tmp = _mm_setzero_ps(), fx;
01301 __m64 mm0, mm1;
01302
01303 x = _mm_min_ps(x, *(__m128*)_ps_exp_hi);
01304 x = _mm_max_ps(x, *(__m128*)_ps_exp_lo);
01305
01306
01307 fx = _mm_mul_ps(x, *(__m128*)_ps_cephes_LOG2EF);
01308 fx = _mm_add_ps(fx,*(__m128*)_ps_half);
01309
01310
01311
01312 tmp = _mm_movehl_ps(tmp, fx);
01313 mm0 = _mm_cvttps_pi32(fx);
01314 mm1 = _mm_cvttps_pi32(tmp);
01315
01316 tmp = _mm_cvtpi32x2_ps(mm0, mm1);
01317
01318 __m128 mask = _mm_cmpgt_ps(tmp, fx);
01319 mask = _mm_and_ps(mask, *(__m128*)_ps_one);
01320 fx = _mm_sub_ps(tmp, mask);
01321
01322 tmp = _mm_mul_ps(fx, *(__m128*)_ps_cephes_exp_C1);
01323 __m128 z = _mm_mul_ps(fx, *(__m128*)_ps_cephes_exp_C2);
01324 x = _mm_sub_ps(x, tmp);
01325 x = _mm_sub_ps(x, z);
01326
01327 z = _mm_mul_ps(x,x);
01328
01329 __m128 y = *(__m128*)_ps_cephes_exp_p0;
01330 y = _mm_mul_ps(y, x);
01331 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p1);
01332 y = _mm_mul_ps(y, x);
01333 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p2);
01334 y = _mm_mul_ps(y, x);
01335 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p3);
01336 y = _mm_mul_ps(y, x);
01337 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p4);
01338 y = _mm_mul_ps(y, x);
01339 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p5);
01340 y = _mm_mul_ps(y, z);
01341 y = _mm_add_ps(y, x);
01342 y = _mm_add_ps(y, *(__m128*)_ps_one);
01343
01344
01345 z = _mm_movehl_ps(z, fx);
01346 mm0 = _mm_cvttps_pi32(fx);
01347 mm1 = _mm_cvttps_pi32(z);
01348 mm0 = _mm_add_pi32(mm0, *(__m64*)_pi32_0x7f);
01349 mm1 = _mm_add_pi32(mm1, *(__m64*)_pi32_0x7f);
01350 mm0 = _mm_slli_pi32(mm0, 23);
01351 mm1 = _mm_slli_pi32(mm1, 23);
01352
01353 __m128 pow2n;
01354 COPY_MM_TO_XMM(mm0, mm1, pow2n);
01355
01356 y = _mm_mul_ps(y, pow2n);
01357 _mm_empty();
01358 return y;
01359 }
01360 #endif // MSVC doesn't support old MMX intrinsics
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370 #if defined(__GNUC__) && ! defined(__INTEL_COMPILER)
01371 #define __align(X) __attribute__((aligned(X) ))
01372 #if (__GNUC__ < 4)
01373 #define MISSING_mm_cvtsd_f64
01374 #endif
01375 #else
01376 #define __align(X) __declspec(align(X) )
01377 #endif
01378
01379 #define MLOG2EF -1.44269504088896f
01380
01381
01382
01383
01384
01385 #define SCEXP0 1.0000000000000000f
01386 #define SCEXP1 0.6987082824680118f
01387 #define SCEXP2 0.2633174272827404f
01388 #define SCEXP3 0.0923611991471395f
01389 #define SCEXP4 0.0277520543324108f
01390
01391
01392 #define EXPOBIAS 127
01393 #define EXPOSHIFT 23
01394
01395
01396 #define ACUTOFF -10
01397
01398 typedef union SSEreg_t {
01399 __m128 f;
01400 __m128i i;
01401 } SSEreg;
01402
01403 __m128 aexpfnxsse(__m128 x) {
01404 __align(16) SSEreg scal;
01405 __align(16) SSEreg n;
01406 __align(16) SSEreg y;
01407
01408 scal.f = _mm_cmpge_ps(x, _mm_set_ps1(ACUTOFF));
01409
01410
01411 if (_mm_movemask_ps(scal.f) == 0) {
01412 return _mm_setzero_ps();
01413 }
01414
01415
01416
01417
01418
01419
01420
01421
01422 y.f = _mm_mul_ps(x, _mm_set_ps1(MLOG2EF));
01423 n.i = _mm_cvttps_epi32(y.f);
01424 x = _mm_cvtepi32_ps(n.i);
01425 x = _mm_sub_ps(x, y.f);
01426
01427
01428
01429
01430
01431 y.f = _mm_mul_ps(x, _mm_set_ps1(SCEXP4));
01432 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP3));
01433 y.f = _mm_mul_ps(y.f, x);
01434 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP2));
01435 y.f = _mm_mul_ps(y.f, x);
01436 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP1));
01437 y.f = _mm_mul_ps(y.f, x);
01438 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP0));
01439
01440
01441
01442
01443
01444
01445 n.i = _mm_sub_epi32(_mm_set1_epi32(EXPOBIAS), n.i);
01446 n.i = _mm_slli_epi32(n.i, EXPOSHIFT);
01447 scal.f = _mm_and_ps(scal.f, n.f);
01448 y.f = _mm_mul_ps(y.f, scal.f);
01449
01450 return y.f;
01451 }
01452
01453
01454 int evaluate_grid_sse(int numatoms,
01455 const float *wave_f, const float *basis_array,
01456 const float *atompos,
01457 const int *atom_basis,
01458 const int *num_shells_per_atom,
01459 const int *num_prim_per_shell,
01460 const int *shell_types,
01461 const int *numvoxels,
01462 float voxelsize,
01463 const float *origin,
01464 int density,
01465 float * orbitalgrid) {
01466 if (!orbitalgrid)
01467 return -1;
01468
01469 int nx, ny, nz;
01470 __align(16) float sxdelta[4];
01471 for (nx=0; nx<4; nx++)
01472 sxdelta[nx] = ((float) nx) * voxelsize * ANGS_TO_BOHR;
01473
01474
01475
01476 int numgridxy = numvoxels[0]*numvoxels[1];
01477 for (nz=0; nz<numvoxels[2]; nz++) {
01478 float grid_x, grid_y, grid_z;
01479 grid_z = origin[2] + nz * voxelsize;
01480 for (ny=0; ny<numvoxels[1]; ny++) {
01481 grid_y = origin[1] + ny * voxelsize;
01482 int gaddrzy = ny*numvoxels[0] + nz*numgridxy;
01483 for (nx=0; nx<numvoxels[0]; nx+=4) {
01484 grid_x = origin[0] + nx * voxelsize;
01485
01486
01487
01488 int at;
01489 int prim, shell;
01490
01491
01492 __m128 value = _mm_setzero_ps();
01493
01494
01495 int ifunc = 0;
01496 int shell_counter = 0;
01497
01498
01499 for (at=0; at<numatoms; at++) {
01500 int maxshell = num_shells_per_atom[at];
01501 int prim_counter = atom_basis[at];
01502
01503
01504 float sxdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
01505 float sydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
01506 float szdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
01507
01508 float sydist2 = sydist*sydist;
01509 float szdist2 = szdist*szdist;
01510 float yzdist2 = sydist2 + szdist2;
01511
01512 __m128 xdelta = _mm_load_ps(&sxdelta[0]);
01513 __m128 xdist = _mm_load_ps1(&sxdist);
01514 xdist = _mm_add_ps(xdist, xdelta);
01515 __m128 ydist = _mm_load_ps1(&sydist);
01516 __m128 zdist = _mm_load_ps1(&szdist);
01517 __m128 xdist2 = _mm_mul_ps(xdist, xdist);
01518 __m128 ydist2 = _mm_mul_ps(ydist, ydist);
01519 __m128 zdist2 = _mm_mul_ps(zdist, zdist);
01520 __m128 dist2 = _mm_load_ps1(&yzdist2);
01521 dist2 = _mm_add_ps(dist2, xdist2);
01522
01523
01524
01525
01526
01527
01528 for (shell=0; shell < maxshell; shell++) {
01529 __m128 contracted_gto = _mm_setzero_ps();
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539 int maxprim = num_prim_per_shell[shell_counter];
01540 int shelltype = shell_types[shell_counter];
01541 for (prim=0; prim<maxprim; prim++) {
01542
01543 float exponent = -basis_array[prim_counter ];
01544 float contract_coeff = basis_array[prim_counter + 1];
01545
01546
01547 __m128 expval = _mm_mul_ps(_mm_load_ps1(&exponent), dist2);
01548
01549 #if 1
01550 __m128 retval = aexpfnxsse(expval);
01551 #else
01552 __m128 retval = exp_ps(expval);
01553 #endif
01554 __m128 ctmp = _mm_mul_ps(_mm_load_ps1(&contract_coeff), retval);
01555 contracted_gto = _mm_add_ps(contracted_gto, ctmp);
01556 prim_counter += 2;
01557 }
01558
01559
01560 __m128 tmpshell = _mm_setzero_ps();
01561 switch (shelltype) {
01562 case S_SHELL:
01563 value = _mm_add_ps(value, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), contracted_gto));
01564 break;
01565
01566 case P_SHELL:
01567 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), xdist));
01568 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), ydist));
01569 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), zdist));
01570 value = _mm_add_ps(value, _mm_mul_ps(tmpshell, contracted_gto));
01571 break;
01572
01573 case D_SHELL:
01574 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), xdist2));
01575 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist, ydist)));
01576 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), ydist2));
01577 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist, zdist)));
01578 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist, zdist)));
01579 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), zdist2));
01580 value = _mm_add_ps(value, _mm_mul_ps(tmpshell, contracted_gto));
01581 break;
01582
01583 case F_SHELL:
01584 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist2, xdist)));
01585 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist2, ydist)));
01586 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist2, xdist)));
01587 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist2, ydist)));
01588 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist2, zdist)));
01589 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(_mm_mul_ps(xdist, ydist), zdist)));
01590 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist2, zdist)));
01591 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(zdist2, xdist)));
01592 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(zdist2, ydist)));
01593 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(zdist2, zdist)));
01594 value = _mm_add_ps(value, _mm_mul_ps(tmpshell, contracted_gto));
01595 break;
01596
01597 #if 0
01598 default:
01599
01600 int i, j;
01601 float xdp, ydp, zdp;
01602 float xdiv = 1.0f / xdist;
01603 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
01604 int imax = shelltype - j;
01605 for (i=0, ydp=1.0f, xdp=pow(xdist, imax); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
01606 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
01607 }
01608 }
01609 value += tmpshell * contracted_gto;
01610 #endif
01611 }
01612
01613 shell_counter++;
01614 }
01615 }
01616
01617
01618 if (density) {
01619 __m128 mask = _mm_cmplt_ps(value, _mm_setzero_ps());
01620 __m128 sqdensity = _mm_mul_ps(value, value);
01621 __m128 orbdensity = sqdensity;
01622 __m128 nsqdensity = _mm_and_ps(sqdensity, mask);
01623 orbdensity = _mm_sub_ps(orbdensity, nsqdensity);
01624 orbdensity = _mm_sub_ps(orbdensity, nsqdensity);
01625 _mm_storeu_ps(&orbitalgrid[gaddrzy + nx], orbdensity);
01626 } else {
01627 _mm_storeu_ps(&orbitalgrid[gaddrzy + nx], value);
01628 }
01629 }
01630 }
01631 }
01632
01633 return 0;
01634 }
01635
01636 #endif
01637
01638
01639
01640 #if defined(VMDORBUSEVSX) && defined(__VSX__)
01641
01642
01643
01644
01645
01646 #if defined(__GNUC__) && ! defined(__INTEL_COMPILER)
01647 #define __align(X) __attribute__((aligned(X) ))
01648 #else
01649 #define __align(X) __declspec(align(X) )
01650 #endif
01651
01652 #define MLOG2EF -1.44269504088896f
01653
01654
01655
01656
01657
01658 #define SCEXP0 1.0000000000000000f
01659 #define SCEXP1 0.6987082824680118f
01660 #define SCEXP2 0.2633174272827404f
01661 #define SCEXP3 0.0923611991471395f
01662 #define SCEXP4 0.0277520543324108f
01663
01664
01665 #define EXPOBIAS 127
01666 #define EXPOSHIFT 23
01667
01668
01669 #define ACUTOFF -10
01670
01671 #if 0
01672 vector float ref_expf(vector float x) {
01673 vector float result;
01674
01675 int i;
01676 for (i=0; i<4; i++) {
01677 result[i] = expf(x[i]);
01678 }
01679
01680 return result;
01681 }
01682 #endif
01683
01684 vector float aexpfnxvsx(vector float x) {
01685
01686
01687
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699 vector float mb = vec_mul(x, vec_splats(MLOG2EF));
01700 vector float mbflr = vec_floor(mb);
01701 vector float d = vec_sub(mbflr, mb);
01702 vector float y;
01703
01704
01705
01706 y = vec_madd(d, vec_splats(SCEXP4), vec_splats(SCEXP3));
01707 y = vec_madd(y, d, vec_splats(SCEXP2));
01708 y = vec_madd(y, d, vec_splats(SCEXP1));
01709 y = vec_madd(y, d, vec_splats(SCEXP0));
01710
01711 return vec_mul(y, vec_expte(-mbflr));
01712 }
01713
01714
01715 int evaluate_grid_vsx(int numatoms,
01716 const float *wave_f, const float *basis_array,
01717 const float *atompos,
01718 const int *atom_basis,
01719 const int *num_shells_per_atom,
01720 const int *num_prim_per_shell,
01721 const int *shell_types,
01722 const int *numvoxels,
01723 float voxelsize,
01724 const float *origin,
01725 int density,
01726 float * orbitalgrid) {
01727 if (!orbitalgrid)
01728 return -1;
01729
01730 int nx, ny, nz;
01731 __attribute__((aligned(16))) float sxdelta[4];
01732 for (nx=0; nx<4; nx++)
01733 sxdelta[nx] = ((float) nx) * voxelsize * ANGS_TO_BOHR;
01734
01735
01736
01737 int numgridxy = numvoxels[0]*numvoxels[1];
01738 for (nz=0; nz<numvoxels[2]; nz++) {
01739 float grid_x, grid_y, grid_z;
01740 grid_z = origin[2] + nz * voxelsize;
01741 for (ny=0; ny<numvoxels[1]; ny++) {
01742 grid_y = origin[1] + ny * voxelsize;
01743 int gaddrzy = ny*numvoxels[0] + nz*numgridxy;
01744 for (nx=0; nx<numvoxels[0]; nx+=4) {
01745 grid_x = origin[0] + nx * voxelsize;
01746
01747
01748
01749 int at;
01750 int prim, shell;
01751
01752
01753 vector float value = vec_splats(0.0f);
01754
01755
01756 int ifunc = 0;
01757 int shell_counter = 0;
01758
01759
01760 for (at=0; at<numatoms; at++) {
01761 int maxshell = num_shells_per_atom[at];
01762 int prim_counter = atom_basis[at];
01763
01764
01765 float sxdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
01766 float sydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
01767 float szdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
01768
01769 float sydist2 = sydist*sydist;
01770 float szdist2 = szdist*szdist;
01771 float yzdist2 = sydist2 + szdist2;
01772
01773 vector float xdelta = *((__vector float *) &sxdelta[0]);
01774 vector float xdist = vec_splats(sxdist);
01775 xdist = vec_add(xdist, xdelta);
01776 vector float ydist = vec_splats(sydist);
01777 vector float zdist = vec_splats(szdist);
01778 vector float xdist2 = vec_mul(xdist, xdist);
01779 vector float ydist2 = vec_mul(ydist, ydist);
01780 vector float zdist2 = vec_mul(zdist, zdist);
01781 vector float dist2 = vec_splats(yzdist2);
01782 dist2 = vec_add(dist2, xdist2);
01783
01784
01785
01786
01787
01788
01789 for (shell=0; shell < maxshell; shell++) {
01790 vector float contracted_gto = vec_splats(0.0f);
01791
01792
01793
01794
01795
01796
01797
01798
01799
01800 int maxprim = num_prim_per_shell[shell_counter];
01801 int shelltype = shell_types[shell_counter];
01802 for (prim=0; prim<maxprim; prim++) {
01803
01804 float exponent = -basis_array[prim_counter ];
01805 float contract_coeff = basis_array[prim_counter + 1];
01806
01807
01808 vector float expval = vec_mul(vec_splats(exponent), dist2);
01809
01810
01811 vector float retval = aexpfnxvsx(expval);
01812
01813 vector float ctmp = vec_mul(vec_splats(contract_coeff), retval);
01814 contracted_gto = vec_add(contracted_gto, ctmp);
01815 prim_counter += 2;
01816 }
01817
01818
01819 vector float tmpshell = vec_splats(0.0f);
01820 switch (shelltype) {
01821 case S_SHELL:
01822 value = vec_add(value, vec_mul(vec_splats(wave_f[ifunc++]), contracted_gto));
01823 break;
01824
01825 case P_SHELL:
01826 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), xdist));
01827 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), ydist));
01828 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), zdist));
01829 value = vec_add(value, vec_mul(tmpshell, contracted_gto));
01830 break;
01831
01832 case D_SHELL:
01833 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), xdist2));
01834 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(xdist, ydist)));
01835 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), ydist2));
01836 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(xdist, zdist)));
01837 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(ydist, zdist)));
01838 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), zdist2));
01839 value = vec_add(value, vec_mul(tmpshell, contracted_gto));
01840 break;
01841
01842 case F_SHELL:
01843 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(xdist2, xdist)));
01844 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(xdist2, ydist)));
01845 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(ydist2, xdist)));
01846 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(ydist2, ydist)));
01847 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(xdist2, zdist)));
01848 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(vec_mul(xdist, ydist), zdist)));
01849 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(ydist2, zdist)));
01850 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(zdist2, xdist)));
01851 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(zdist2, ydist)));
01852 tmpshell = vec_add(tmpshell, vec_mul(vec_splats(wave_f[ifunc++]), vec_mul(zdist2, zdist)));
01853 value = vec_add(value, vec_mul(tmpshell, contracted_gto));
01854 break;
01855
01856 #if 0
01857 default:
01858
01859 int i, j;
01860 float xdp, ydp, zdp;
01861 float xdiv = 1.0f / xdist;
01862 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
01863 int imax = shelltype - j;
01864 for (i=0, ydp=1.0f, xdp=pow(xdist, imax); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
01865 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
01866 }
01867 }
01868 value += tmpshell * contracted_gto;
01869 #endif
01870 }
01871
01872 shell_counter++;
01873 }
01874 }
01875
01876
01877 if (density) {
01878 value = vec_cpsgn(value, vec_mul(value, value));
01879
01880 float *ufptr = &orbitalgrid[gaddrzy + nx];
01881 ufptr[0] = value[0];
01882 ufptr[1] = value[1];
01883 ufptr[2] = value[2];
01884 ufptr[3] = value[3];
01885 } else {
01886 float *ufptr = &orbitalgrid[gaddrzy + nx];
01887 ufptr[0] = value[0];
01888 ufptr[1] = value[1];
01889 ufptr[2] = value[2];
01890 ufptr[3] = value[3];
01891 }
01892 }
01893 }
01894 }
01895
01896 return 0;
01897 }
01898
01899 #endif
01900
01901
01902
01903
01904
01905
01906
01907 typedef struct {
01908 wkf_cpu_caps_t *cpucaps;
01909 int numatoms;
01910 const float *wave_f;
01911 const float *basis_array;
01912 const float *atompos;
01913 const int *atom_basis;
01914 const int *num_shells_per_atom;
01915 const int *num_prim_per_shell;
01916 const int *shell_types;
01917 const int *numvoxels;
01918 float voxelsize;
01919 int density;
01920 const float *origin;
01921 float *orbitalgrid;
01922 } orbthrparms;
01923
01924
01925 extern "C" void * orbitalthread(void *voidparms) {
01926 int numvoxels[3];
01927 float origin[3];
01928 orbthrparms *parms = NULL;
01929 #if defined(VMDORBUSETHRPOOL)
01930 wkf_threadpool_worker_getdata(voidparms, (void **) &parms);
01931 #else
01932 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
01933 #endif
01934
01935 #if defined(VMDCPUDISPATCH)
01936 wkf_cpu_caps_t *cpucaps = parms->cpucaps;
01937
01938 #if defined(VMDUSEAVX512)
01939 int dispatch_AVX512ER = 0;
01940 if ((cpucaps->flags & CPU_AVX512ER) && (getenv("VMDNOAVX512ER") == NULL)) {
01941
01942 dispatch_AVX512ER = 1;
01943 }
01944
01945 int dispatch_AVX512F = 0;
01946 if ((cpucaps->flags & CPU_AVX512F) && (getenv("VMDNOAVX512F") == NULL)) {
01947 dispatch_AVX512F = 1;
01948
01949 }
01950 #endif
01951
01952 #if defined(VMDUSEAVX2)
01953 int dispatch_AVX2 = 0;
01954 if ((cpucaps->flags & CPU_AVX2) && (getenv("VMDNOAVX2") == NULL)) {
01955 dispatch_AVX2 = 1;
01956
01957 }
01958 #endif
01959
01960 #if defined(VMDUSESVE)
01961 int dispatch_SVE = 0;
01962 if ((cpucaps->flags & CPU_ARM64_SVE) && (getenv("VMDNOSVE") == NULL)) {
01963 dispatch_SVE = 1;
01964
01965 }
01966 #endif
01967
01968 #if defined(VMDUSENEON)
01969 int dispatch_NEON = 0;
01970 if ((cpucaps->flags & CPU_ARM64_ASIMD) && (getenv("VMDNONEON") == NULL)) {
01971 dispatch_NEON = 1;
01972
01973 }
01974 #endif
01975
01976 #endif // VMDCPUDISPATCH
01977
01978
01979
01980
01981 #if defined(VMDORBUSESSE) && defined(__SSE2__)
01982 int dispatch_SSE2 = 0;
01983 if ((getenv("VMDNOSSE2") == NULL)) {
01984
01985 dispatch_SSE2 = 1;
01986 }
01987 #endif
01988
01989 #if defined(VMDORBUSEVSX) && defined(__VSX__)
01990 int dispatch_VSX = 0;
01991 if (getenv("VMDNOVSX") == NULL) {
01992
01993 dispatch_VSX = 1;
01994 }
01995 #endif
01996
01997 numvoxels[0] = parms->numvoxels[0];
01998 numvoxels[1] = parms->numvoxels[1];
01999 numvoxels[2] = 1;
02000
02001 origin[0] = parms->origin[0];
02002 origin[1] = parms->origin[1];
02003
02004
02005 int planesize = numvoxels[0] * numvoxels[1];
02006 wkf_tasktile_t tile;
02007 #if defined(VMDORBUSETHRPOOL)
02008 while (wkf_threadpool_next_tile(voidparms, 1, &tile) != WKF_SCHED_DONE) {
02009 #else
02010 while (wkf_threadlaunch_next_tile(voidparms, 1, &tile) != WKF_SCHED_DONE) {
02011 #endif
02012 int k;
02013 for (k=tile.start; k<tile.end; k++) {
02014 origin[2] = parms->origin[2] + parms->voxelsize * k;
02015
02016 #if defined(VMDCPUDISPATCH)
02017
02018
02019
02020
02021
02022 if (cpucaps != NULL) {
02023 #if defined(VMDUSEAVX512)
02024 if (dispatch_AVX512ER) {
02025 evaluate_grid_avx512er(parms->numatoms, parms->wave_f,
02026 parms->basis_array, parms->atompos, parms->atom_basis,
02027 parms->num_shells_per_atom, parms->num_prim_per_shell,
02028 parms->shell_types, numvoxels, parms->voxelsize,
02029 origin, parms->density, parms->orbitalgrid + planesize*k);
02030 continue;
02031 }
02032
02033 if (dispatch_AVX512F) {
02034 evaluate_grid_avx512f(parms->numatoms, parms->wave_f,
02035 parms->basis_array, parms->atompos, parms->atom_basis,
02036 parms->num_shells_per_atom, parms->num_prim_per_shell,
02037 parms->shell_types, numvoxels, parms->voxelsize,
02038 origin, parms->density, parms->orbitalgrid + planesize*k);
02039 continue;
02040 }
02041 #endif
02042
02043 #if defined(VMDUSEAVX2)
02044 if (dispatch_AVX2) {
02045 evaluate_grid_avx2(parms->numatoms, parms->wave_f,
02046 parms->basis_array, parms->atompos, parms->atom_basis,
02047 parms->num_shells_per_atom, parms->num_prim_per_shell,
02048 parms->shell_types, numvoxels, parms->voxelsize,
02049 origin, parms->density, parms->orbitalgrid + planesize*k);
02050 continue;
02051 }
02052 #endif
02053
02054 #if defined(VMDUSESVE)
02055 if (dispatch_SVE) {
02056 evaluate_grid_sve(parms->numatoms, parms->wave_f,
02057 parms->basis_array, parms->atompos, parms->atom_basis,
02058 parms->num_shells_per_atom, parms->num_prim_per_shell,
02059 parms->shell_types, numvoxels, parms->voxelsize,
02060 origin, parms->density, parms->orbitalgrid + planesize*k);
02061 continue;
02062 }
02063 #endif
02064
02065 #if defined(VMDUSENEON)
02066 if (dispatch_NEON) {
02067 evaluate_grid_neon(parms->numatoms, parms->wave_f,
02068 parms->basis_array, parms->atompos, parms->atom_basis,
02069 parms->num_shells_per_atom, parms->num_prim_per_shell,
02070 parms->shell_types, numvoxels, parms->voxelsize,
02071 origin, parms->density, parms->orbitalgrid + planesize*k);
02072 continue;
02073 }
02074 #endif
02075
02076 }
02077 #endif
02078
02079
02080
02081
02082
02083 #if defined(VMDORBUSESSE) && defined(__SSE2__)
02084 if (dispatch_SSE2) {
02085 evaluate_grid_sse(parms->numatoms, parms->wave_f,
02086 parms->basis_array, parms->atompos, parms->atom_basis,
02087 parms->num_shells_per_atom, parms->num_prim_per_shell,
02088 parms->shell_types, numvoxels, parms->voxelsize,
02089 origin, parms->density, parms->orbitalgrid + planesize*k);
02090 continue;
02091 }
02092 #endif
02093
02094 #if defined(VMDORBUSEVSX) && defined(__VSX__)
02095 if (dispatch_VSX) {
02096 evaluate_grid_vsx(parms->numatoms, parms->wave_f,
02097 parms->basis_array, parms->atompos, parms->atom_basis,
02098 parms->num_shells_per_atom, parms->num_prim_per_shell,
02099 parms->shell_types, numvoxels, parms->voxelsize,
02100 origin, parms->density, parms->orbitalgrid + planesize*k);
02101 continue;
02102 }
02103 #endif
02104
02105
02106
02107
02108 evaluate_grid(parms->numatoms, parms->wave_f,
02109 parms->basis_array, parms->atompos, parms->atom_basis,
02110 parms->num_shells_per_atom, parms->num_prim_per_shell,
02111 parms->shell_types, numvoxels, parms->voxelsize,
02112 origin, parms->density, parms->orbitalgrid + planesize*k);
02113 }
02114 }
02115
02116 return NULL;
02117 }
02118
02119
02120 int evaluate_grid_fast(wkf_cpu_caps_t *cpucaps,
02121 #if defined(VMDORBUSETHRPOOL)
02122 wkf_threadpool_t *thrpool,
02123 #else
02124 int numcputhreads,
02125 #endif
02126 int numatoms,
02127 const float *wave_f,
02128 const float *basis_array,
02129 const float *atompos,
02130 const int *atom_basis,
02131 const int *num_shells_per_atom,
02132 const int *num_prim_per_shell,
02133 const int *shell_types,
02134 const int *numvoxels,
02135 float voxelsize,
02136 const float *origin,
02137 int density,
02138 float * orbitalgrid) {
02139 int rc=0;
02140 orbthrparms parms;
02141
02142 parms.cpucaps = cpucaps;
02143 parms.numatoms = numatoms;
02144 parms.wave_f = wave_f;
02145 parms.basis_array = basis_array;
02146 parms.atompos = atompos;
02147 parms.atom_basis = atom_basis;
02148 parms.num_shells_per_atom = num_shells_per_atom;
02149 parms.num_prim_per_shell = num_prim_per_shell;
02150 parms.shell_types = shell_types;
02151 parms.numvoxels = numvoxels;
02152 parms.voxelsize = voxelsize;
02153 parms.origin = origin;
02154 parms.density = density;
02155 parms.orbitalgrid = orbitalgrid;
02156
02157
02158 wkf_tasktile_t tile;
02159 tile.start = 0;
02160 tile.end = numvoxels[2];
02161
02162 #if defined(VMDORBUSETHRPOOL)
02163 wkf_threadpool_sched_dynamic(thrpool, &tile);
02164 rc = wkf_threadpool_launch(thrpool, orbitalthread, &parms, 1);
02165 #else
02166 rc = wkf_threadlaunch(numcputhreads, &parms, orbitalthread, &tile);
02167 #endif
02168
02169 return rc;
02170 }
02171
02172
02173 void Orbital::print_wavefunction() {
02174
02175
02176 #if !(defined(_MSC_VER) || defined(ARCH_IRIX6) || defined(ARCH_IRIX6_64) || defined(ARCH_ANDROIDARMV7A))
02177 char shellname[6] = {'S', 'P', 'D', 'F', 'G', 'H'};
02178 int ifunc = 0;
02179 int at;
02180 int shell;
02181 for (at=0; at<numatoms; at++) {
02182 for (shell=0; shell < num_shells_per_atom[at]; shell++) {
02183 int shelltype = basis_set[at].shell[shell].type;
02184
02185
02186 int i, j, iang=0;
02187 float xdist=2.0;
02188 float ydist=2.0;
02189 float zdist=2.0;
02190 float xdp, ydp, zdp;
02191 float xdiv = 1.0f / xdist;
02192 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
02193 int imax = shelltype - j;
02194 for (i=0, ydp=1.0f, xdp=pow(xdist, imax); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
02195 printf("%3i %c", at, shellname[shelltype]);
02196 int k, m=0;
02197 char buf[20]; buf[0] = '\0';
02198 for (k=0; k<(int)log2f(xdp); k++, m++) sprintf(buf+m, "x");
02199 for (k=0; k<(int)log2f(ydp); k++, m++) sprintf(buf+m, "y");
02200 for (k=0; k<(int)log2f(zdp); k++, m++) sprintf(buf+m, "z");
02201
02202 printf("%-5s (%1.0f%1.0f%1.0f) wave_f[%3i] = % 11.6f\n", buf,
02203 log2f(xdp), log2f(ydp), log2f(zdp), ifunc, wave_f[ifunc]);
02204
02205 iang++;
02206 ifunc++;
02207 }
02208 }
02209 }
02210 }
02211 #endif
02212
02213 }