# Invocation command line: # /share/app/hpc2021-1.1.8/bin/harness/runhpc --reportable --define model=omp --define EXPID=submission/omp_8490H/node2/small.omp.rank_24.ppn_12.thread_20 --iterations=2 -c xfusion.omp.small.node2.cfg -T base,peak --input ref --define HOSTFILE=node2 --define RANKS=24 --define PPN=12 --define THREADS=20 --flagsurl config/flags/Intel_compiler_flags.2023-08-16.xml small # output_root was not used for this run ############################################################################ #!/bin/bash ############################################################################ allow_label_override=yes # label controls srcalt: simd - for simd expid= %ifdef %{EXPID} expid=%{EXPID} %endif build_in_build_dir = 0 # build in run dir env_vars = 1 %ifndef %{label} # IF acctype is not set use mpi % define label xfusion %endif %ifndef %{model} # IF acctype is not set use mpi % define pmodel MPI %endif %if %{model} eq 'mpi' pmodel=MPI %endif # OpenMP (CPU) flags %if %{model} eq 'omp' pmodel=OMP OPTIMIZE += -fiopenmp %endif teeout = yes makeflags=-j # System Description system_class = Homogenous Cluster # Tester description license_num = 6488 showtimer = 0 test_sponsor = xFusion tester = xFusion # Operating system, file system sw_mpi_other = None sw_other = None ###################################################### # SUT Section ###################################################### #include: Example_SUT.inc # General SUT info system_vendor = xFusion node_compute_sw_accel_driver = None node_compute_hw_accel_vendor = None node_compute_hw_accel_type = None node_compute_hw_accel_model = None node_compute_hw_accel_ecc = None node_compute_hw_accel_desc = None node_compute_hw_accel_count = 0 node_compute_hw_accel_connect = None hw_vendor_list = xFusion hw_total_accel = 0 hw_model_list = xFusion FusionServer 2288H V7 hw_cpu_name_list = Intel Xeon Platinum 8490H system_name000 = xFusion FusionServer 2288H V7 (Intel Xeon system_name001 = Platinum 8490H) hw_avail = Jan-2023 sw_avail = Apr-2023 prepared_by = Lu Xu # Computation node info # [Node_Description: Hardware] node_compute_syslbl = xFusion FusionServer 2288H V7 node_compute_order = 1 node_compute_count = 2 node_compute_purpose = Compute Node node_compute_hw_vendor = xFusion node_compute_hw_model = xFusion FusionServer 2288H V7 node_compute_hw_cpu_name = Intel Xeon Platinum 8490H node_compute_hw_ncpuorder = 1, 2 chips node_compute_hw_nchips = 2 node_compute_hw_ncores = 120 node_compute_hw_ncoresperchip = 60 node_compute_hw_nthreadspercore = 2 node_compute_hw_cpu_char = Turbo Boost Technology up to 3.5 GHz node_compute_hw_cpu_mhz = 1900 node_compute_hw_pcache = 32 KB I + 48 KB D on chip per core node_compute_hw_scache = 2 MB I+D on chip per core node_compute_hw_tcache = 112.5 MB I+D on chip per chip node_compute_hw_ocache = None node_compute_hw_memory = 512 GB (16 x 32 GB 2Rx8 PC5-4800B-R) node_compute_hw_disk = 1 x 7.68 TB NVMe SSD node_compute_hw_other = None #[Node_Description: Accelerator] #[Node_Description: Software] node_compute_hw_adapter_fs_model = MCX653105A-EFAT node_compute_hw_adapter_fs_count = 1 node_compute_hw_adapter_fs_slot_type = PCI-Express 4.0 x16 node_compute_hw_adapter_fs_data_rate = 100 Gb/s node_compute_hw_adapter_fs_ports_used = 1 node_compute_hw_adapter_fs_interconnect = Mellanox HDR node_compute_hw_adapter_fs_driver = 5.4-3.1.0 node_compute_hw_adapter_fs_firmware = 20.32.1010 node_compute_sw_os000 = Rocky Linux release 8.7 (Green Obsidian) node_compute_sw_os001 = 4.18.0-425.3.1.el8.x86_64 node_compute_sw_localfile = xfs node_compute_sw_sharedfile = NFS node_compute_sw_state = Multi-user, run level 3 node_compute_sw_other = N/A #[Fileserver] #[Interconnect] interconnect_fs_syslbl = Mellanox HDR interconnect_fs_order = 0 interconnect_fs_purpose = MPI interconnect_fs_hw_vendor = Mellanox interconnect_fs_hw_model = Mellanox HDR interconnect_fs_hw_switch_fs_model000= Mellanox MQM8790-HS2F interconnect_fs_hw_switch_fs_model001 = InfiniBand Switch interconnect_fs_hw_switch_fs_count = 1 interconnect_fs_hw_switch_fs_ports = 40 interconnect_fs_hw_topo = Mesh interconnect_fs_hw_switch_fs_data_rate = 200 Gbit/s interconnect_fs_hw_switch_fs_firmware = 27.2010.1202 ####################################################################### # End of SUT section ###################################################################### ###################################################################### # The header section of the config file. Must appear # before any instances of "section markers" (see below) # # ext = how the binaries you generated will be identified # tune = specify "base" or "peak" or "all" label = %{label}_%{model} tune = all output_format = all use_submit_for_speed = 1 # Compiler Settings default: AR = ar ARFLAGS = cr CC = mpiicc -cc=icx CXX = mpiicpc -cxx=icpx FC = mpiifort -fc=ifx sw_compiler = Intel oneAPI Compiler 2023.0.0 sw_mpi_library = Intel MPI Library 2021.8 for Linux OS # Compiler Version Flags CC_VERSION_OPTION = --version CXX_VERSION_OPTION = --version FC_VERSION_OPTION = --version # MPI options and binding environment, dependent upon Model being run # Adjust to match your system submit = mpiexec.hydra -bootstrap ssh --bind-to core -hostfile $[top]/$hostfile -np $ranks -ppn $ppn -genv OMP_NUM_THREADS=$threads $command ####################################################################### # Optimization # Note that SPEC baseline rules require that all uses of a given compiler # use the same flags in the same order. See the SPEChpc Run Rules # for more details # http://www.spec.org/hpc2021/Docs/runrules.html # # OPTIMIZE = flags applicable to all compilers # FOPTIMIZE = flags appliable to the Fortran compiler # COPTIMIZE = flags appliable to the C compiler # CXXOPTIMIZE = flags appliable to the C++ compiler # # See your compiler manual for information on the flags available # for your compiler # Compiler flags applied to all models vec_novec=-no-vec vec_avx2=-xCORE-AVX2 vec_avx512=-xCORE-AVX512 vec_avx512_high=-xCORE-AVX512 -mprefer-vector-width=512 vec_avx512_streaming_stores=-xCORE-AVX512 -mllvm -hir-nontemporal-cacheline-count=0 vec_avx512_high_exp1=-xCORE-AVX512 -mprefer-vector-width=512 -ffast-math vec_avx512_high_exp2=-xCORE-AVX512 -mprefer-vector-width=512 -flto vec_avx512_high_exp3=-xCORE-AVX512 -mprefer-vector-width=512 -funroll-loops vec_avx512_high_exp4=-xCORE-AVX512 -mprefer-vector-width=512 -ffast-math -flto -funroll-loops vec_avx512_high_exp5=-xCORE-AVX512 -mprefer-vector-width=512 -ffinite-math-only vec_avx512_high_exp6=-xCORE-AVX512 -mprefer-vector-width=512 -fimf-precision=low:sin,sqrt vec_avx512_high_exp7=-xCORE-AVX512 -mprefer-vector-width=512 -ffinite-math-only -fimf-precision=low:sin,sqrt -ffast-math -flto -funroll-loops vec_avx512_high_exp8=-xCORE-AVX512 -mprefer-vector-width=512 -qopt-multiple-gather-scatter-by-shuffles -ffast-math -flto -funroll-loops vec_common512=-xCOMMON-AVX512 vec=-xCORE-AVX512 -mprefer-vector-width=512 default=base,peak: OPTIMIZE = -O3 -Ofast -ipo -fiopenmp ${vec_avx512_high_exp8} FOPTIMIZE = -nostandard-realloc-lhs -align array64byte CPORTABILITY = -lstdc++ -std=c++14 -Wno-incompatible-function-pointer-types ENV_KMP_AFFINITY=compact,1,granularity=thread default=base,peak=default: ppn = %{PPN} ranks = %{RANKS} threads = %{THREADS} hostfile = %{HOSTFILE} 605.lbm_s=peak: basepeak=1 613.soma_s=peak: threads=120 ppn=2 ranks=4 618.tealeaf_s=peak: CC = mpiicc CXX = mpiicpc OPTIMIZE = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles threads=12 ppn=20 ranks=40 619.clvleaf_s=peak: FC = mpiifort OPTIMIZE = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles threads=2 ppn=120 ranks=240 621.miniswp_s=peak: CC = mpiicc CXX = mpiicpc OPTIMIZE = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles threads=60 ppn=4 ranks=8 628.pot3d_s=peak: threads=2 ppn=120 ranks=240 632.sph_exa_s=peak: threads=12 ppn=20 ranks=40 634.hpgmgfv_s=peak: CC = mpiicc CXX = mpiicpc OPTIMIZE = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles threads=15 ppn=16 ranks=32 635.weather_s=peak: OPTIMIZE += -qopt-streaming-stores=always threads=8 ppn=30 ranks=60 # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: flagsurl000 = http://www.spec.org/hpc2021/flags/Intel_compiler_flags.2023-08-16.xml sw_os_list000 = CentOS Linux release sw_os_list001 = 8.2.20044.18.0-193.el8.x86_644