# Invocation command line:
# /share/app/hpc2021-1.1.8/bin/harness/runhpc --reportable --define model=omp --define EXPID=submission/omp_8490H/node2/small.omp.rank_24.ppn_12.thread_20 --iterations=2 -c xfusion.omp.small.node2.cfg -T base,peak --input ref --define HOSTFILE=node2 --define RANKS=24 --define PPN=12 --define THREADS=20 --flagsurl config/flags/Intel_compiler_flags.2023-08-16.xml small
# output_root was not used for this run
############################################################################
#!/bin/bash

############################################################################
allow_label_override=yes  # label controls srcalt: simd - for simd

expid=
%ifdef %{EXPID}
    expid=%{EXPID}
%endif

build_in_build_dir = 0        # build in run dir
env_vars           = 1

%ifndef %{label}         # IF acctype is not set use mpi
%   define label xfusion
%endif

%ifndef %{model}         # IF acctype is not set use mpi
%   define pmodel MPI
%endif

%if %{model} eq 'mpi'
  pmodel=MPI
%endif

# OpenMP (CPU) flags
%if %{model} eq 'omp'
  pmodel=OMP
  OPTIMIZE += -fiopenmp
%endif

teeout = yes
makeflags=-j

# System Description
system_class = Homogenous Cluster

# Tester description
license_num     = 6488
showtimer = 0
test_sponsor    = xFusion
tester          = xFusion

# Operating system, file system
sw_mpi_other    = None
sw_other        = None

######################################################
# SUT Section
######################################################
#include: Example_SUT.inc

# General SUT info
system_vendor      = xFusion
node_compute_sw_accel_driver = None
node_compute_hw_accel_vendor = None
node_compute_hw_accel_type = None
node_compute_hw_accel_model = None
node_compute_hw_accel_ecc = None
node_compute_hw_accel_desc = None
node_compute_hw_accel_count = 0
node_compute_hw_accel_connect = None
hw_vendor_list = xFusion
hw_total_accel = 0
hw_model_list = xFusion FusionServer 2288H V7
hw_cpu_name_list = Intel Xeon Platinum 8490H
system_name000     = xFusion FusionServer 2288H V7 (Intel Xeon
system_name001 = Platinum 8490H)
hw_avail           = Jan-2023
sw_avail           = Apr-2023
prepared_by        = Lu Xu <luxu@xfusion.com>

# Computation node info
# [Node_Description: Hardware]
node_compute_syslbl = xFusion FusionServer 2288H V7
node_compute_order = 1
node_compute_count = 2
node_compute_purpose = Compute Node
node_compute_hw_vendor = xFusion
node_compute_hw_model = xFusion FusionServer 2288H V7
node_compute_hw_cpu_name = Intel Xeon Platinum 8490H
node_compute_hw_ncpuorder = 1, 2 chips
node_compute_hw_nchips = 2
node_compute_hw_ncores = 120
node_compute_hw_ncoresperchip = 60
node_compute_hw_nthreadspercore = 2
node_compute_hw_cpu_char = Turbo Boost Technology up to 3.5 GHz
node_compute_hw_cpu_mhz = 1900
node_compute_hw_pcache = 32 KB I + 48 KB D on chip per core
node_compute_hw_scache = 2 MB I+D on chip per core
node_compute_hw_tcache   = 112.5 MB I+D on chip per chip
node_compute_hw_ocache = None
node_compute_hw_memory   = 512 GB (16 x 32 GB 2Rx8 PC5-4800B-R)
node_compute_hw_disk = 1 x 7.68 TB NVMe SSD
node_compute_hw_other = None

#[Node_Description: Accelerator]

#[Node_Description: Software]
node_compute_hw_adapter_fs_model = MCX653105A-EFAT
node_compute_hw_adapter_fs_count = 1
node_compute_hw_adapter_fs_slot_type = PCI-Express 4.0 x16
node_compute_hw_adapter_fs_data_rate = 100 Gb/s
node_compute_hw_adapter_fs_ports_used = 1
node_compute_hw_adapter_fs_interconnect = Mellanox HDR
node_compute_hw_adapter_fs_driver = 5.4-3.1.0
node_compute_hw_adapter_fs_firmware = 20.32.1010
node_compute_sw_os000 = Rocky Linux release 8.7 (Green Obsidian)
node_compute_sw_os001 = 4.18.0-425.3.1.el8.x86_64
node_compute_sw_localfile = xfs
node_compute_sw_sharedfile = NFS
node_compute_sw_state = Multi-user, run level 3
node_compute_sw_other = N/A

#[Fileserver]

#[Interconnect]
interconnect_fs_syslbl = Mellanox HDR
interconnect_fs_order = 0
interconnect_fs_purpose = MPI
interconnect_fs_hw_vendor = Mellanox
interconnect_fs_hw_model = Mellanox HDR
interconnect_fs_hw_switch_fs_model000= Mellanox MQM8790-HS2F
interconnect_fs_hw_switch_fs_model001 = InfiniBand Switch
interconnect_fs_hw_switch_fs_count = 1
interconnect_fs_hw_switch_fs_ports = 40
interconnect_fs_hw_topo = Mesh
interconnect_fs_hw_switch_fs_data_rate = 200 Gbit/s
interconnect_fs_hw_switch_fs_firmware = 27.2010.1202

#######################################################################
# End of SUT section
######################################################################

######################################################################
# The header section of the config file.  Must appear
# before any instances of "section markers" (see below)
#
# ext = how the binaries you generated will be identified
# tune = specify "base" or "peak" or "all"
label         = %{label}_%{model}
tune          = all
output_format = all
use_submit_for_speed = 1

# Compiler Settings
default:
AR           = ar
ARFLAGS      = cr
CC           = mpiicc -cc=icx
CXX          = mpiicpc -cxx=icpx
FC           = mpiifort -fc=ifx
sw_compiler  = Intel oneAPI Compiler 2023.0.0
sw_mpi_library = Intel MPI Library 2021.8 for Linux OS

# Compiler Version Flags
CC_VERSION_OPTION  = --version
CXX_VERSION_OPTION = --version
FC_VERSION_OPTION  = --version

# MPI options and binding environment, dependent upon Model being run
# Adjust to match your system

submit = mpiexec.hydra -bootstrap ssh --bind-to core -hostfile $[top]/$hostfile -np $ranks -ppn $ppn -genv OMP_NUM_THREADS=$threads $command


#######################################################################
# Optimization

# Note that SPEC baseline rules require that all uses of a given compiler
# use the same flags in the same order. See the SPEChpc Run Rules
# for more details
#      http://www.spec.org/hpc2021/Docs/runrules.html
#
# OPTIMIZE    = flags applicable to all compilers
# FOPTIMIZE   = flags appliable to the Fortran compiler
# COPTIMIZE   = flags appliable to the C compiler
# CXXOPTIMIZE = flags appliable to the C++ compiler
#
# See your compiler manual for information on the flags available
# for your compiler

# Compiler flags applied to all models

    vec_novec=-no-vec
    vec_avx2=-xCORE-AVX2
    vec_avx512=-xCORE-AVX512
    vec_avx512_high=-xCORE-AVX512 -mprefer-vector-width=512
    vec_avx512_streaming_stores=-xCORE-AVX512 -mllvm -hir-nontemporal-cacheline-count=0
    vec_avx512_high_exp1=-xCORE-AVX512 -mprefer-vector-width=512 -ffast-math
    vec_avx512_high_exp2=-xCORE-AVX512 -mprefer-vector-width=512 -flto
    vec_avx512_high_exp3=-xCORE-AVX512 -mprefer-vector-width=512 -funroll-loops
    vec_avx512_high_exp4=-xCORE-AVX512 -mprefer-vector-width=512 -ffast-math -flto -funroll-loops
    vec_avx512_high_exp5=-xCORE-AVX512 -mprefer-vector-width=512 -ffinite-math-only
    vec_avx512_high_exp6=-xCORE-AVX512 -mprefer-vector-width=512 -fimf-precision=low:sin,sqrt
    vec_avx512_high_exp7=-xCORE-AVX512 -mprefer-vector-width=512 -ffinite-math-only -fimf-precision=low:sin,sqrt  -ffast-math -flto -funroll-loops
    vec_avx512_high_exp8=-xCORE-AVX512 -mprefer-vector-width=512 -qopt-multiple-gather-scatter-by-shuffles -ffast-math -flto -funroll-loops
    vec_common512=-xCOMMON-AVX512
    vec=-xCORE-AVX512 -mprefer-vector-width=512

default=base,peak:
OPTIMIZE      = -O3 -Ofast -ipo -fiopenmp ${vec_avx512_high_exp8}
FOPTIMIZE     = -nostandard-realloc-lhs -align array64byte
CPORTABILITY  = -lstdc++ -std=c++14 -Wno-incompatible-function-pointer-types

ENV_KMP_AFFINITY=compact,1,granularity=thread

default=base,peak=default:
    ppn      = %{PPN}
    ranks    = %{RANKS}
	threads   = %{THREADS}
    hostfile = %{HOSTFILE}

605.lbm_s=peak:
basepeak=1

613.soma_s=peak:
    threads=120
    ppn=2
    ranks=4

618.tealeaf_s=peak:
CC           = mpiicc
CXX          = mpiicpc
OPTIMIZE     = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles
    threads=12
    ppn=20
    ranks=40

619.clvleaf_s=peak:
FC           = mpiifort
OPTIMIZE     = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles
    threads=2
    ppn=120
    ranks=240

621.miniswp_s=peak:
CC           = mpiicc
CXX          = mpiicpc
OPTIMIZE     = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles
    threads=60
    ppn=4
    ranks=8

628.pot3d_s=peak:
    threads=2
    ppn=120
    ranks=240

632.sph_exa_s=peak:
    threads=12
    ppn=20
    ranks=40

634.hpgmgfv_s=peak:
CC           = mpiicc
CXX          = mpiicpc
OPTIMIZE     = -O3 -Ofast -xCORE-AVX512 -ansi-alias -qopenmp -ipo -qopt-zmm-usage=high -qopt-multiple-gather-scatter-by-shuffles
    threads=15
    ppn=16
    ranks=32

635.weather_s=peak:
OPTIMIZE     += -qopt-streaming-stores=always
    threads=8
    ppn=30
    ranks=60


# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
flagsurl000 = http://www.spec.org/hpc2021/flags/Intel_compiler_flags.2023-08-16.xml
sw_os_list000 = CentOS Linux release
sw_os_list001 = 8.2.20044.18.0-193.el8.x86_644