# Invocation command line: # /gpfs/alpine/stf006/world-shared/vgv/specHPG/v1.0.2/bin/harness/runhpc --config summit_nvhpc --define pmodel=acc --action run --nobuild --ranks=4200 -I --reportable medium # output_root was not used for this run ############################################################################ ###################################################################### # Example configuration file for the GNU Compilers # # Defines: "pmodel" => "mpi", "acc", "omp", "tgt", "tgtgpu" default "mpi" # "label" => ext base label, default "nv" # # MPI-only Command: # runhpc -c Example_gnu --reportable -T base --define pmodel=mpi --ranks=40 small # # OpenACC Command: # runhpc -c Example_gnu --reportable -T base --define pmodel=acc --ranks=4 small # # OpenMP Command: # runhpc -c Example_gnu --reportable -T base --define pmodel=omp --ranks=1 --threads=40 small # # OpenMP Target Offload to Host Command: # runhpc -c Example_gnu --reportable -T base --define pmodel=tgt --ranks=1 --threads=40 small # # OpenMP Target Offload to NVIDIA GPU Command: # runhpc -c Example_gnu --reportable -T base --define pmodel=tgtnv --ranks=4 small # ####################################################################### %ifndef %{label} # IF label is not set use gnu % define label gnu %endif %ifndef %{pmodel} # IF pmodel is not set use mpi % define pmodel mpi %endif teeout = yes makeflags=-j 40 # Tester Information license_num = 056A test_sponsor = Oak Ridge National Laboratory tester = Oak Ridge National Laboratory ###################################################### # SUT Section ###################################################### #include: Example_SUT.inc # ----- Begin inclusion of 'Example_SUT.inc' ############################################################################ ###################################################### # Example configuration information for a # system under test (SUT) Section ###################################################### # General SUT info system_vendor = IBM system_name = Summit: IBM Power System AC922 (IBM Power9, Tesla V100-SXM2-16GB) node_compute_sw_accel_driver = NVIDIA CUDA 450.80.02 node_compute_hw_adapter_ib_slot_type = None node_compute_hw_adapter_ib_ports_used = 2 node_compute_hw_adapter_ib_model = Mellanox ConnectX-5 node_compute_hw_adapter_ib_interconnect = EDR InfiniBand node_compute_hw_adapter_ib_firmware = 16.29.1016 node_compute_hw_adapter_ib_driver = 4.9-2.2.4.1 node_compute_hw_adapter_ib_data_rate = 100 Gb/s (4X EDR) node_compute_hw_adapter_ib_count = 2 interconnect_ib_syslbl = Mellanox InfiniBand interconnect_ib_purpose = MPI Traffic and GPFS access interconnect_ib_order = 1 interconnect_ib_hw_vendor = Mellanox interconnect_ib_hw_topo = Non-blocking Fat-tree interconnect_ib_hw_switch_ib_ports = 36 interconnect_ib_hw_switch_ib_data_rate = 100 Gb/s interconnect_ib_hw_switch_ib_count = 1 interconnect_ib_hw_model = Mellanox Switch IB-2 hw_avail = Nov-2018 sw_avail = Jul-2021 prepared_by = Veronica G. Melesse Vergara # Computation node info # [Node_Description: Hardware] node_compute_syslbl = IBM Power System AC922 node_compute_order = 1 node_compute_count = 700 node_compute_purpose = compute node_compute_hw_vendor = IBM node_compute_hw_model = IBM Power System AC922 node_compute_hw_cpu_name = IBM POWER9 2.1 (pvr 004e 1201) node_compute_hw_ncpuorder = 2 chips node_compute_hw_nchips = 2 node_compute_hw_ncores = 22 node_compute_hw_ncoresperchip = 44 node_compute_hw_nthreadspercore = 4 node_compute_hw_cpu_char = Up to 3.8 GHz node_compute_hw_cpu_mhz = 2300 node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core node_compute_hw_scache = 512 KB I+D on chip per core node_compute_hw_tcache000= 110 MB I+D on chip per chip node_compute_hw_ocache = None node_compute_hw_memory = 512 GB (16 x 32 GB RDIMM-DDR4-2666) node_compute_hw_disk000= 2 x 800 GB (Samsung Electronics Co Ltd NVMe SSD node_compute_hw_disk001 = Controller 172Xa/172Xb) node_compute_hw_other = None #[Node_Description: Accelerator] node_compute_hw_accel_model = Tesla V100-SXM2-16GB node_compute_hw_accel_count = 4 node_compute_hw_accel_vendor= NVIDIA Corporation node_compute_hw_accel_type = GPU node_compute_hw_accel_connect = NVLink 2.0 node_compute_hw_accel_ecc = Yes node_compute_hw_accel_desc = See Notes #[Node_Description: Software] node_compute_sw_os000 = Red Hat Enterprise Linux node_compute_sw_os001 = 8.2 node_compute_sw_localfile = xfs node_compute_sw_sharedfile000= 250 PB IBM Spectrum Scale parallel filesystem node_compute_sw_sharedfile001 = over 4X EDR InfiniBand node_compute_sw_state = Multi-user, run level 3 node_compute_sw_other = None #[Fileserver] #[Interconnect] ####################################################################### # End of SUT section # If this config file were to be applied to several SUTs, edits would # be needed only ABOVE this point. ###################################################################### # ---- End inclusion of '/gpfs/alpine/stf006/world-shared/vgv/specHPG/v1.0.2/config/Example_SUT.inc' #[Software] sw_compiler000 = C/C++/Fortran: Version 21.7 of sw_compiler001 = NVHPC Toolkit sw_mpi_library = Spectrum MPI Version 10.4.0.3 sw_mpi_other = None system_class = Homogenous Cluster sw_other = None #[General notes] notes_000 = MPI startup command: notes_005 = jsrun command was used to launch job using 1 GPU/rank. notes_010 =Detailed information from nvaccelinfo notes_015 = notes_020 =CUDA Driver Version: 11000 notes_025 =NVRM version: NVIDIA UNIX ppc64le Kernel Module 450.80.02 Wed Sep 23 00:55:04 UTC 2020 notes_030 = notes_035 =Device Number: 0 notes_040 =Device Name: Tesla V100-SXM2-16GB notes_045 =Device Revision Number: 7.0 notes_050 =Global Memory Size: 16911433728 notes_055 =Number of Multiprocessors: 80 notes_060 =Concurrent Copy and Execution: Yes notes_065 =Total Constant Memory: 65536 notes_070 =Total Shared Memory per Block: 49152 notes_075 =Registers per Block: 65536 notes_080 =Warp Size: 32 notes_085 =Maximum Threads per Block: 1024 notes_090 =Maximum Block Dimensions: 1024, 1024, 64 notes_095 =Maximum Grid Dimensions: 2147483647 x 65535 x 65535 notes_100 =Maximum Memory Pitch: 2147483647B notes_105 =Texture Alignment: 512B notes_110 =Clock Rate: 1530 MHz notes_115 =Execution Timeout: No notes_120 =Integrated Device: No notes_125 =Can Map Host Memory: Yes notes_130 =Compute Mode: exclusive-process notes_135 =Concurrent Kernels: Yes notes_140 =ECC Enabled: Yes notes_145 =Memory Clock Rate: 877 MHz notes_150 =Memory Bus Width: 4096 bits notes_155 =L2 Cache Size: 6291456 bytes notes_160 =Max Threads Per SMP: 2048 notes_165 =Async Engines: 4 notes_170 =Unified Addressing: Yes notes_175 =Managed Memory: Yes notes_180 =Concurrent Managed Memory: Yes notes_185 =Preemption Supported: Yes notes_190 =Cooperative Launch: Yes notes_195 = Multi-Device: Yes notes_200 =Default Target: cc70 notes_205 = ####################################################################### # End of SUT section ###################################################################### ###################################################################### # The header section of the config file. Must appear # before any instances of "section markers" (see below) # # ext = how the binaries you generated will be identified # tune = specify "base" or "peak" or "all" label = %{label}_%{pmodel} tune = base output_format = text use_submit_for_speed = 1 # Compiler Settings default: CC = mpicc CXX = mpic++ FC = mpif90 # Compiler Version Flags CC_VERSION_OPTION = --version CXX_VERSION_OPTION = --version FC_VERSION_OPTION = --version # MPI options and binding environment, dependent upon Model being run # Adjust to match your system # OpenMP (CPU) Settings %if %{pmodel} eq 'omp' preENV_OMP_PLACES=cores #preENV_OMP_PROC_BIND=true #preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39 #preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,2\ #3,24 %endif #OpenMP Targeting Host Settings %if %{pmodel} eq 'tgt' #preENV_OMP_PROC_BIND=true preENV_MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 preEnv_MPICH_GPU_SUPPORT_ENABLED=1 preEnv_MPICH_SMP_SINGLE_COPY_MODE=CMA preEnv_MPICH_GPU_EAGER_DEVICE_MEM=0 #preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39 #preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24 %endif #MPIRUN_OPTS = --bind-to none -q #submit = mpirun ${MPIRUN_OPTS} -np $ranks $command # 1 GPU per rs, 2 cores per RS, 1 MPI task per RS, 6 RS per host MPIRUN_OPTS = --gpu_per_rs 1 --cpu_per_rs 7 --tasks_per_rs 1 --rs_per_host 6 submit = jsrun ${MPIRUN_OPTS} -n $ranks $command ####################################################################### # Optimization # Note that SPEC baseline rules require that all uses of a given compiler # use the same flags in the same order. See the SPEChpc Run Rules # for more details # http://www.spec.org/hpc2021/Docs/runrules.html # # OPTIMIZE = flags applicable to all compilers # FOPTIMIZE = flags appliable to the Fortran compiler # COPTIMIZE = flags appliable to the C compiler # CXXOPTIMIZE = flags appliable to the C++ compiler # # See your compiler manual for information on the flags available # for your compiler # Compiler flags applied to all models default=base=default: OPTIMIZE = -O3 COPTIMIZE = -lm # use -mcpu=native for ARM CXXOPTIMIZE = -std=c++11 #FOPTIMIZE = -ffree-line-length-none -fno-stack-protector FOPTIMIZE = #ARM %if %{armOn} eq 'arm' COPTIMIZE += -mcpu=native #OPTIMIZE += -mcpu=a64fx %endif # SVE %if %{sveOn} eq 'sve' COPTIMIZE += -march=armv8-a+sve %endif %if %{model} eq 'mpi' pmodel=MPI %endif # OpenACC flags %if %{pmodel} eq 'acc' # Use with PGI compiler only pmodel=ACC OPTIMIZE += -acc=gpu %endif # OpenMP (CPU) flags %if %{pmodel} eq 'omp' pmodel=OMP OPTIMIZE += -qsmp=omp #FOPTIMIZE += %endif # OpenMP Targeting host flags %if %{pmodel} eq 'tgt' pmodel=TGT OPTIMIZE += -qsmp=omp -qoffload #FOPTIMIZE += -homp %endif # No peak flags set, so make peak use the same flags as base default=peak=default: basepeak=1 ####################################################################### # Portability ####################################################################### # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: flagsurl000 = http://www.spec.org/hpc2021/flags/nv2021_flags.xml interconnect_ib_hw_switch_ib_model000 = Mellanox IB EDR Switch IB-2