# Invocation command line:
# /panfs/users/abobyr/SpecHPC2021/kits/v1.1.8/bin/harness/runhpc --rebuild --reportable --define model=omp --define ppn=16 --iterations=3 -c Intel.oneAPI.SPR_HBMcache.base-peak.small.16nodes.cfg -T base,peak --input ref --define RANKS=256 --define THREADS=7 --flagsurl config/flags/Intel-oneAPI2024.1-linux64.xml small
# output_root was not used for this run
############################################################################
############################################################################
#!/bin/bash

############################################################################
allow_label_override=yes  # label controls srcalt: simd - for simd

build_in_build_dir = 0        # build in run dir
env_vars           = 1

%ifndef %{label}         # IF acctype is not set use mpi
%   define label intel
%endif

%ifndef %{model}         # IF acctype is not set use mpi
%   define pmodel MPI
%endif

%if %{model} eq 'mpi'
  pmodel=MPI
%endif

# OpenMP (CPU) flags
%if %{model} eq 'omp'
  pmodel=OMP
  OPTIMIZE += -fiopenmp
%endif

teeout = yes
makeflags=-j

# System Description
system_class = Homogenous Cluster

# Tester description
license_num     = 13
showtimer = 0
test_sponsor    = Intel
tester          = Intel

# Operating system, file system
sw_mpi_other    = None
sw_other        = None

######################################################
# SUT Section
######################################################
#include: Example_SUT.inc

# General SUT info
system_vendor      = Intel
system_name        = Endeavour: Intel Server D50DNP1SBB (Xeon Max 9480)
hw_vendor_list = Intel
hw_total_accel = 0
hw_model_list = Intel Server D50DNP1SBB (Xeon Max 9480)
hw_cpu_name_list = Intel Xeon Max 9480
hw_accel_vendor_list = None
hw_accel_model_list = None
hw_avail           = Jan-2023
sw_avail           = Mar-2024
prepared_by = Alexander Bobyr <alexander.bobyr@intel.com>

# Computation node info
# [Node_Description: Hardware]
node_compute_syslbl = Intel Server D50DNP1SBB (Xeon Max 9480)
node_compute_order = 1
node_compute_count = 16
node_compute_purpose = Compute
node_compute_hw_vendor = Intel
node_compute_hw_model = Intel Server D50DNP1SBB (Xeon Max 9480)
node_compute_hw_cpu_name = Intel Xeon Max 9480
node_compute_hw_ncpuorder = 1, 2 chips
node_compute_hw_nchips = 2
node_compute_hw_ncores = 112
node_compute_hw_ncoresperchip = 56
node_compute_hw_nthreadspercore = 2
node_compute_hw_cpu_char = Turbo Boost Technology up to 3.5 GHz
node_compute_hw_cpu_mhz = 1900
node_compute_hw_pcache = 32 KB I + 48 KB D on chip per core
node_compute_hw_scache = 2 MB I+D on chip per core
node_compute_hw_tcache = 112.5 MB I+D on chip per chip
node_compute_hw_ocache = None
node_compute_hw_memory000= 640 GB (16 x 32GB 2Rx8 PC5-4800B-R + 8 x 16GB HBM2
node_compute_hw_memory001 = 3200 MT/s)
node_compute_hw_disk = 1 x 1 TB  NVMe U.2 2.5" SSD
node_compute_hw_other = None

#[Node_Description: Accelerator]
node_compute_hw_accel_count   = None
node_compute_hw_accel_vendor  = None
node_compute_hw_accel_type    = None
node_compute_hw_accel_connect = None
node_compute_hw_accel_model   = None
node_compute_hw_accel_ecc     = None
node_compute_hw_accel_desc    = None

#[Node_Description: Software]
node_compute_hw_adapter_fs_model = Mellanox ConnectX-6 HDR
node_compute_hw_adapter_fs_count = 1
node_compute_hw_adapter_fs_slot_type = PCI-Express 4.0 x16
node_compute_hw_adapter_fs_data_rate = 200Gbit/s
node_compute_hw_adapter_fs_ports_used = 1
node_compute_hw_adapter_fs_interconnect = Mellanox HDR
node_compute_hw_adapter_fs_driver = 23.04-0.5.3
node_compute_hw_adapter_fs_firmware = 20.37.1014
node_compute_sw_os000 = Rocky Linux 8.8 (Green Obsidian)
node_compute_sw_os001 = 4.18.0-477.15.1.el8_8.x86_64
node_compute_sw_localfile = xfs
node_compute_sw_sharedfile = PANASAS FS
node_compute_sw_state = Run level 5
node_compute_sw_other = None
node_compute_sw_accel_driver = None

#[Fileserver]

#[Interconnect]
interconnect_fs_syslbl = Mellanox HDR
interconnect_fs_order = 0
interconnect_fs_purpose = MPI Traffic
interconnect_fs_hw_vendor = Mellanox
interconnect_fs_hw_model = Mellanox HDR
interconnect_fs_hw_switch_fs_model000= Mellanox MQM8790-HS2F Quantum HDR
interconnect_fs_hw_switch_fs_model001 = InfiniBand Switch
interconnect_fs_hw_switch_fs_count = 18
interconnect_fs_hw_switch_fs_ports = 40
interconnect_fs_hw_topo = Fat-tree
interconnect_fs_hw_switch_fs_data_rate = 200 Gbit/s
interconnect_fs_hw_switch_fs_firmware = 20.36.1010

#######################################################################
# End of SUT section
######################################################################

######################################################################
# The header section of the config file.  Must appear
# before any instances of "section markers" (see below)
#
# ext = how the binaries you generated will be identified
# tune = specify "base" or "peak" or "all"
label         = %{label}_%{model}
tune          = all
output_format = text
use_submit_for_speed = 1

# Compiler Settings
default:
AR           = ar
ARFLAGS      = cr
CC           = mpiicc -cc=icx
CXX          = mpiicpc -cxx=icpx
FC           = mpiifort -fc=ifx
sw_compiler  = Intel oneAPI Compiler 2024.1.0
sw_mpi_library = Intel MPI Library 2021.12 for Linux OS

# Compiler Version Flags
CC_VERSION_OPTION  = --version
CXX_VERSION_OPTION = --version
FC_VERSION_OPTION  = --version

# MPI options and binding environment, dependent upon Model being run
# Adjust to match your system

mpicmd = mpiexec.hydra -bootstrap ssh -genv OMP_NUM_THREADS $threads -np $ranks -ppn $ppn $command
submit = $mpicmd

#######################################################################
# Optimization

# Note that SPEC baseline rules require that all uses of a given compiler
# use the same flags in the same order. See the SPEChpc Run Rules
# for more details
#      http://www.spec.org/hpc2021/Docs/runrules.html
#
# OPTIMIZE    = flags applicable to all compilers
# FOPTIMIZE   = flags appliable to the Fortran compiler
# COPTIMIZE   = flags appliable to the C compiler
# CXXOPTIMIZE = flags appliable to the C++ compiler
#
# See your compiler manual for information on the flags available
# for your compiler

# Compiler flags applied to all models
default=base,peak:
OPTIMIZE      = -O3 -Ofast -xCORE-AVX512 -mprefer-vector-width=512 -qopt-multiple-gather-scatter-by-shuffles -fiopenmp -ffast-math -flto -funroll-loops
FOPTIMIZE     = -nostandard-realloc-lhs -align array64byte
CPORTABILITY  = -lstdc++ -std=c++14 -Wno-incompatible-function-pointer-types

default=base,peak=default:
    threads = %{THREADS}
    ppn     = %{ppn}
    ranks   = %{RANKS}

605.lbm_s=peak:
OPTIMIZE     += -qopt-streaming-stores=always
    threads=14
    ppn=8
    ranks=128

613.soma_s=peak:
    threads=14
    ppn=8
    ranks=128

618.tealeaf_s=peak:
basepeak=1

619.clvleaf_s=peak:
basepeak=1

621.miniswp_s=peak:
    threads=28
    ppn=4
    ranks=64

628.pot3d_s=peak:
basepeak=1

632.sph_exa_s=peak:
basepeak=1

634.hpgmgfv_s=peak:
basepeak=1

635.weather_s=peak:
basepeak=1

# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
notes_000 =The PANASAS filesystem as described on this result page was formerly
notes_005 =generally available.  At the time of this publication, it may
notes_010 =not be shipping, and/or may not be supported, and/or may fail
notes_015 =to meet other tests of General Availability described in the
notes_020 =SPEC HPG Policy document, http://www.spec.org/hpg/policy.html
notes_025 =
notes_030 =HBM is configured as HBM cache mode.



# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
flagsurl000 = http://www.spec.org/hpc2021/flags/HBM.xml
flagsurl001 = http://www.spec.org/hpc2021/flags/Intel_compiler_flags.2024-12-11.xml