# Invocation command line: # /store/mackey/specMPI/bin/runspec --ranks 3072 --reportable -a validate -o asc,csv,html -n 3 -T all -c default.cfg --define ICCV=16.0.3.210 --define MPI=MPT --define PPN=24 --define RUNPEAK --define TOTALHOSTLIST=r1i0n0,r1i0n1,r1i0n2,r1i0n3,r1i0n4,r1i0n5,r1i0n6,r1i0n7,r1i0n8,r1i0n9,r1i0n10,r1i0n11,r1i0n12,r1i0n13,r1i0n14,r1i0n15,r1i0n16,r1i0n17,r1i0n18,r1i0n19,r1i0n20,r1i0n21,r1i0n22,r1i0n23,r1i0n24,r1i0n25,r1i0n26,r1i0n27,r1i0n28,r1i0n29,r1i0n30,r1i0n31,r1i0n32,r1i0n33,r1i0n34,r1i0n35,r1i1n0,r1i1n1,r1i1n2,r1i1n3,r1i1n4,r1i1n5,r1i1n6,r1i1n7,r1i1n8,r1i1n9,r1i1n10,r1i1n11,r1i1n12,r1i1n13,r1i1n14,r1i1n15,r1i1n16,r1i1n17,r1i1n18,r1i1n19,r1i1n20,r1i1n21,r1i1n22,r1i1n23,r1i1n24,r1i1n25,r1i1n26,r1i1n27,r1i1n28,r1i1n29,r1i1n30,r1i1n31,r1i1n32,r1i1n33,r1i1n34,r1i1n35,r1i2n0,r1i2n1,r1i2n2,r1i2n3,r1i2n4,r1i2n5,r1i2n6,r1i2n7,r1i2n8,r1i2n9,r1i2n10,r1i2n11,r1i2n12,r1i2n13,r1i2n14,r1i2n15,r1i2n16,r1i2n17,r1i2n18,r1i2n19,r1i2n20,r1i2n21,r1i2n22,r1i2n23,r1i2n24,r1i2n25,r1i2n26,r1i2n27,r1i2n28,r1i2n29,r1i2n30,r1i2n31,r1i2n32,r1i2n33,r1i2n34,r1i2n35,r1i3n0,r1i3n1,r1i3n2,r1i3n3,r1i3n4,r1i3n5,r1i3n6,r1i3n7,r1i3n8,r1i3n9,r1i3n10,r1i3n11,r1i3n12,r1i3n13,r1i3n14,r1i3n15,r1i3n16,r1i3n17,r1i3n18,r1i3n19 --define PARTIALHOSTLIST=r1i0n0,r1i0n1,r1i0n2,r1i0n3,r1i0n4,r1i0n5,r1i0n6,r1i0n7,r1i0n8,r1i0n9,r1i0n10,r1i0n11,r1i0n12,r1i0n13,r1i0n14,r1i0n15,r1i0n16,r1i0n17,r1i0n18,r1i0n19,r1i0n20,r1i0n21,r1i0n22,r1i0n23,r1i0n24,r1i0n25,r1i0n26,r1i0n27,r1i0n28,r1i0n29,r1i0n30,r1i0n31,r1i0n32,r1i0n33,r1i0n34,r1i0n35,r1i1n0,r1i1n1,r1i1n2,r1i1n3,r1i1n4,r1i1n5,r1i1n6,r1i1n7,r1i1n8,r1i1n9,r1i1n10,r1i1n11,r1i1n12,r1i1n13,r1i1n14,r1i1n15,r1i1n16,r1i1n17,r1i1n18,r1i1n19,r1i1n20,r1i1n21,r1i1n22,r1i1n23,r1i1n24,r1i1n25,r1i1n26,r1i1n27,r1i1n28,r1i1n29,r1i1n30,r1i1n31,r1i1n32,r1i1n33,r1i1n34,r1i1n35,r1i2n0,r1i2n1,r1i2n2,r1i2n3,r1i2n4,r1i2n5,r1i2n6,r1i2n7,r1i2n8,r1i2n9,r1i2n10,r1i2n11,r1i2n12,r1i2n13,r1i2n14,r1i2n15,r1i2n16,r1i2n17,r1i2n18,r1i2n19,r1i2n20,r1i2n21,r1i2n22,r1i2n23,r1i2n24,r1i2n25,r1i2n26,r1i2n27,r1i2n28,r1i2n29,r1i2n30,r1i2n31,r1i2n32,r1i2n33,r1i2n34,r1i2n35,r1i3n0,r1i3n1,r1i3n2,r1i3n3,r1i3n4,r1i3n5,r1i3n6,r1i3n7,r1i3n8,r1i3n9,r1i3n10,r1i3n11,r1i3n12,r1i3n13,r1i3n14,r1i3n15,r1i3n16,r1i3n17,r1i3n18 --define LASTHOST=r1i3n19 --define HW_NODES=128 -i lref --define SIZE=large large # output_root was not used for this run ############################################################################ #################################################################### # # Platform Description # #################################################################### hw_avail = May-2016 license_num = 14 prepared_by = Mark Mackey sw_avail = Jun-2016 sw_base_ptrsize = 64-bit sw_other = None sw_peak_ptrsize = 64-bit system_vendor = SGI interconnect_IOMPI_hw_topo = Enhanced Hypercube interconnect_IOMPI_hw_switch_1_model = SGI P0002145 interconnect_IOMPI_purpose = MPI and I/O traffic interconnect_IOMPI_order = 1 interconnect_IOMPI_label = InfiniBand (MPI and I/O) interconnect_IOMPI_hw_vendor = Mellanox Technologies and SGI interconnect_IOMPI_hw_switch_1_ports = 36 interconnect_IOMPI_hw_switch_1_firmware = 11.0350.0394 interconnect_IOMPI_hw_switch_1_data_rate = InfiniBand 4x EDR interconnect_IOMPI_hw_switch_1_count = 30 interconnect_IOMPI_hw_model = None test_sponsor = SGI tester = SGI system_class = Homogeneous sw_preprocessors = None sw_mpi_other = OFED 3.2.2 sw_mpi_library = SGI MPT 2.14 Patch 11333 node_compute_count = 128 node_compute_hw_adapter_IB_count = 2 node_compute_hw_adapter_IB_data_rate = InfiniBand 4X EDR node_compute_hw_adapter_IB_driver = OFED-3.2.1.5.3 node_compute_hw_adapter_IB_firmware = 12.14.0114 node_compute_hw_adapter_IB_interconnect = InfiniBand node_compute_hw_adapter_IB_model000 = Mellanox MT27700 with ConnectX-4 node_compute_hw_adapter_IB_model001 = ASIC (PCIe x16 Gen3 8 GT/s) node_compute_hw_adapter_IB_ports_used = 1 node_compute_hw_adapter_IB_slot_type = PCIe x16 Gen3 node_compute_hw_cpu_char000= 14 Core, 2.60 GHz, 9.6 GT/s QPI node_compute_hw_cpu_char001 = Intel Turbo Boost Technology up to 3.50 GHz node_compute_hw_cpu_char002 = Hyper-Threading Technology enabled node_compute_hw_cpu_mhz = 2600 node_compute_hw_cpu_name = Intel Xeon E5-2690 v4 node_compute_hw_disk = None node_compute_hw_memory = 128 GB (8 x 16 GB 2Rx4 PC4-2400T-R) node_compute_hw_model = SGI ICE XA (Intel Xeon E5-2690 v4, 2.6 GHz) node_compute_hw_nchips = 2 node_compute_hw_ncores = 28 node_compute_hw_ncoresperchip = 14 node_compute_hw_ncpuorder = 1-2 chips node_compute_hw_nthreadspercore = 2 node_compute_hw_ocache = None node_compute_hw_other = None node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core node_compute_hw_scache = 256 KB I+D on chip per core node_compute_hw_tcache = 35 MB I+D on chip per chip node_compute_hw_vendor = SGI node_compute_label = SGI ICE XA IP-125 CS node_compute_order = 2 node_compute_purpose = compute node_compute_sw_localfile = NFSv3 node_compute_sw_os000 = SUSE Linux Enterprise Server 11 SP4 (x86_64), node_compute_sw_os001 = Kernel 3.0.101-71.1.10690.1.PTF-default node_compute_sw_other000= SGI Tempo Compute Node 3.3.0, node_compute_sw_other001 = Build 714r18.sles11sp4-1604041900 node_compute_sw_sharedfile = NFSv3 IPoIB node_compute_sw_state = Multi-user, run level 3 node_fileserver_count = 1 node_fileserver_hw_adapter_IB_count = 2 node_fileserver_hw_adapter_IB_data_rate = InfiniBand 4X FDR node_fileserver_hw_adapter_IB_driver = OFED-3.2.0.1.1 node_fileserver_hw_adapter_IB_firmware = 2.36.5000 node_fileserver_hw_adapter_IB_interconnect = InfiniBand node_fileserver_hw_adapter_IB_model000 = Mellanox MT27500 with ConnectX-3 ASIC node_fileserver_hw_adapter_IB_ports_used = 2 node_fileserver_hw_adapter_IB_slot_type = PCIe x8 Gen3 node_fileserver_hw_cpu_char000= Intel Turbo Boost Technology up to 3.30 GHz node_fileserver_hw_cpu_char001 = Hyper-Threading Technology disabled node_fileserver_hw_cpu_mhz = 1200 node_fileserver_hw_cpu_name = Intel Xeon E5-2670 node_fileserver_hw_disk000 = 45 TB RAID 6 node_fileserver_hw_disk001 = 8 x 6+2 900GB (WD, 10K RPM) node_fileserver_hw_memory = 128 GB (12 * 8 GB 2Rx4 PC3-12800R-11, ECC) node_fileserver_hw_model000= SGI MIS Server node_fileserver_hw_nchips = 2 node_fileserver_hw_ncores = 16 node_fileserver_hw_ncoresperchip = 8 node_fileserver_hw_ncpuorder = 1-2 chips node_fileserver_hw_nthreadspercore = 1 node_fileserver_hw_ocache = None node_fileserver_hw_other = None node_fileserver_hw_pcache = 32 KB I + 32 KB D on chip per core node_fileserver_hw_scache = 256 KB I+D on chip per core node_fileserver_hw_tcache = 20 MB I+D on chip per chip node_fileserver_hw_vendor = SGI node_fileserver_label = SGI MIS Server node_fileserver_order = 3 node_fileserver_purpose = fileserver node_fileserver_sw_localfile = xfs node_fileserver_sw_os000 = SUSE Linux Enterprise Server 11 (x86_64), node_fileserver_sw_os001 = Kernel 3.0.101-0.46-default node_fileserver_sw_other000= SGI Foundation Software 2.9, node_fileserver_sw_other001 = Build 711r2.sles11sp3-1411192056 node_fileserver_sw_state = Multi-user, run level 3 sw_c_compiler000 = Intel C++ Composer XE 2016 for Linux, sw_c_compiler001 = Version 16.0.3.210 Build 20160415 sw_cxx_compiler000 = Intel C++ Composer XE 2016 for Linux sw_cxx_compiler001 = Version 16.0.3.210 Build 20160405 sw_f_compiler000 = Intel Fortran Composer XE 2016 for Linux, sw_f_compiler001 = Version 16.0.3.210 Build 20160405 system_name000 = SGI ICE XA system_name001 = (Intel Xeon E5-2690 v4, 2.6 GHz) notes_035 = export MPI_CONNECTIONS_THRESHOLD=0 notes_040 = export MPI_IB_MTU=4096 notes_045 = ulimit -s unlimited notes_050 =BIOS settings: notes_016 = export MPI_IB_UPGRADE_SENDS=50 notes_055 = AMI BIOS version HA012036 notes_060 = Hyper-Threading Technology enabled notes_065 = Intel Turbo Boost Technology enabled (default) notes_070 = Transparent Hugepages Enabled notes_075 =Job Placement: notes_080 = Each MPI job was assigned to a topologically compact set notes_085 = of nodes. The base run used 12 ranks per socket and peak notes_090 = runs varied between 8 and 14 ranks per socket. The total notes_095 = number of sockets and nodes used was constant. notes_100 =Additional notes regarding interconnect: notes_105 = The Infiniband network consists of two independent planes, notes_110 = with half the switches in the system allocated to each plane. notes_115 = I/O traffic is restricted to one plane, while MPI traffic can notes_120 = use both planes. #################################################################### # # defaults # #################################################################### flagsurl000= http://www.spec.org/mpi2007/flags/SGI_x86_64_Intel14_flags.20140908.xml %if !defined(%{ICCV}) || !defined(%{SIZE}) % error must define both SIZE and ICCV %endif %if !defined(%{MPI}) || '%{MPI}' eq 'MPT' ext=sgimpi.intel.%{ICCV}.avx2.%{SIZE} %elif '%{MPI}' eq 'INTEL' % if %{ICC} == 14 % if '%{SIZE}' eq 'medium' ext=intel-mpi.intel.%{ICCV}.avx2.med % elif '%{SIZE}' eq 'large' ext=intel-mpi.intel.%{ICCV}.avx2.large % else % error invalid SIZE '%{SIZE}' % endif % elif %{ICC} == 16 && '%{SIZE}' eq 'medium' ext=intel-mpi.intel.%{ICCV}.msse2 % elif %{ICC} == 16 && '%{SIZE}' eq 'large' ext=intel-mpi.intel.%{ICCV}.avx2 % else % error invalid combination of SIZE and ICC % endif %elif '%{MPI}' eq 'OMPI' % if %{ICC} == 14 % if '%{SIZE}' eq 'medium' ext=openmpi.intel.%{ICCV}.avx2.med % elif '%{SIZE}' eq 'large' ext=openmpi.intel.%{ICCV}.avx2.large % else % error invalid SIZE '%{SIZE}' % endif % elif %{ICC} == 16 && '%{SIZE}' eq 'medium' ext=openmpi.intel.%{ICCV}.msse2 % elif %{ICC} == 16 && '%{SIZE}' eq 'large' ext=openmpi.intel.%{ICCV}.avx2 % else % error invalid combination of SIZE and ICC % endif %endif action=validate tune=base input=ref teeout=no env_vars=1 no_input_handler=null mean_anyway=1 strict_rundir_verify = 1 makeflags=-j 16 %if '%{MPI}' eq 'INTEL' FC = mpiifort CC = mpiicc CXX = mpiicpc %elif '%{MPI}' eq 'OMPI' FC = mpif90 CC = mpicc CXX = mpic++ %else FC = ifort CC = icc CXX = icpc %endif #################################################################### # # Base flags # #################################################################### default=default=default=default: FOPTIMIZE = -O3 -xCORE-AVX2 -no-prec-div %if '%{MPI}' eq 'INTEL' OPTIMIZE = -O3 -xAVX -no-prec-div -ipo %else COPTIMIZE = -O3 -xCORE-AVX2 -no-prec-div CXXOPTIMIZE = -O3 -xCORE-AVX2 -no-prec-div -ansi-alias EXTRA_LIBS = -lmpi %endif %if %{ICC} == 16 && '%{SIZE}' eq 'medium' COPTIMIZE = -O2 -msse2 -no-prec-div CXXOPTIMIZE = -O2 -msse2 -no-prec-div FOPTIMIZE = -O2 -msse2 -no-prec-div %endif default=default=default=default: use_submit_for_speed=1 % if !defined(%{MPI}) || '%{MPI}' eq 'MPT' % if defined(%{HOSTLIST}) && defined(%{RPH}) && defined(%{SPILLHOST}) && defined(%{SPILL}) % ifdef %{MPINSIDE} submit=MPI_DSM_CPULIST=%{CPULIST} mpirun %{HOSTLIST} %{RPH} MPInside $command : %{SPILLHOST} %{SPILL} MPInside $command % else submit=MPI_DSM_CPULIST=%{CPULIST} mpirun %{HOSTLIST} %{RPH} $command : %{SPILLHOST} %{SPILL} $command % endif % elif defined(%{HOSTLIST}) && defined(%{RPH}) && %{SPILL} == 0 % ifdef %{MPINSIDE} submit=MPI_DSM_CPULIST=%{CPULIST} mpirun %{HOSTLIST} %{RPH} MPInside $command % else submit=MPI_DSM_CPULIST=%{CPULIST} mpirun %{HOSTLIST} %{RPH} $command % endif % else % ifdef %{MPINSIDE} % ifndef %{PPN} submit=mpiexec_mpt -n $ranks MPInside $command % else submit=mpiexec_mpt -ppn %{PPN} -n $ranks MPInside $command % endif % else % ifndef %{PPN} submit=mpiexec_mpt -n $ranks $command % else submit=mpiexec_mpt -ppn %{PPN} -n $ranks $command % endif % endif % endif % elif '%{MPI}' eq 'INTEL' % ifndef %{PPN} submit=mpiexec.hydra -machinefile \$PBS_NODEFILE -genv I_MPI_PIN 1 -genv I_MPI_OFA_ADAPTER_NAME mlx5_0 -n $ranks $command % else submit=mpiexec.hydra -machinefile \$PBS_NODEFILE -ppn %{PPN} -genv I_MPI_PIN 1 -genv I_MPI_OFA_ADAPTER_NAME mlx5_0 -n $ranks $command % endif % elif '%{MPI}' eq 'OMPI' %define NOMXM -mca pml ob1 -mca btl self,sm,openib %define MXM -mca pml yalla %define HCOLL # on by default %define NOHCOLL -mca coll_hcoll_enable 0 %define MLX50 -x MXM_RDMA_PORTS=mlx5_0:1 -mca btl_openib_if_include mlx5_0:1 %define KNEM -mca btl_sm_use_knem 1 -x MXM_SHM_KCOPY_MODE=knem %define NOKNEM -mca btl_sm_use_knem 0 -x MXM_SHM_KCOPY_MODE=off %define UD -x MXM_TLS=self,shm,ud %define RC -x MXM_TLS=self,shm,rc -x MXM_RC_QP_LIMIT=-1 %define DC -x MXM_TLS=self,shm,dc submit=mpirun %{KNEM} %{NOHCOLL} %{MLX50} %{MXM} %{DC} -x LD_LIBRARY_PATH=\$LD_LIBRARY_PATH -np $ranks --hostfile \$PBS_NODEFILE --rankfile %{RANKFILE} $command % else submit=mpirun -np $ranks $command % endif #mpiexec.hydra -machinefile $PBS_NODEFILE -genv I_MPI_PIN_PROCESSOR_LIST=14-25,0-11 -ppn 24 -genv I_MPI_PIN 1 -genv I_MPI_OFA_ADAPTER_NAME=mlx5_0 -n 3072 #submit=mpiexec_mpt -n $ranks dplace -s1 -c2-5,8-11 $command #################################################################### # # Peak flags # #################################################################### # Medium Dataset #################################################################### %if '%{SIZE}' eq 'medium' 104.milc=peak=default=default: basepeak=yes 107.leslie3d=peak=default=default: ranks=1120 submit=MPI_DSM_CPULIST=14-27,0-13:allhosts mpirun %{TOTALHOSTLIST} 28 $command 113.GemsFDTD=peak=default=default: ranks=320 submit=MPI_DSM_CPULIST=14-17,0-3:allhosts mpirun %{TOTALHOSTLIST} 8 $command 115.fds4=peak=default=default: basepeak=yes 121.pop2=peak=default=default: ranks=512 submit=MPI_DSM_CPULIST=14-20,0-5:allhosts mpirun %{PARTIALHOSTLIST} 13 $command : %{LASTHOST} 5 $command 122.tachyon=peak=default=default: ranks=1120 submit=MPI_DSM_CPULIST=14-27,0-13:allhosts mpirun %{TOTALHOSTLIST} 28 $command 126.lammps=peak=default=default: ranks=320 submit=MPI_DSM_CPULIST=14-17,0-3:allhosts mpirun %{TOTALHOSTLIST} 8 $command 127.wrf2=peak=default=default: basepeak=yes 128.GAPgeofem=peak=default=default: ranks=1024 submit=MPI_DSM_CPULIST=14-26,0-12:allhosts mpirun %{PARTIALHOSTLIST} 26 $command : %{LASTHOST} 10 $command 129.tera_tf=peak=default=default: ranks=1024 submit=MPI_DSM_CPULIST=14-26,0-12:allhosts mpirun %{PARTIALHOSTLIST} 26 $command : %{LASTHOST} 10 $command 130.socorro=peak=default=default: ranks=640 submit=MPI_DSM_CPULIST=14-21,0-7:allhosts mpirun %{TOTALHOSTLIST} 16 $command 132.zeusmp2=peak=default=default: ranks=512 submit=MPI_DSM_CPULIST=14-20,0-5:allhosts mpirun %{PARTIALHOSTLIST} 13 $command : %{LASTHOST} 5 $command 137.lu=peak=default=default: ranks=512 submit=MPI_DSM_CPULIST=14-20,0-5:allhosts mpirun %{PARTIALHOSTLIST} 13 $command : %{LASTHOST} 5 $command %endif #################################################################### # Large Dataset #################################################################### %if '%{SIZE}' eq 'large' 121.pop2=peak=default=default: ranks=2048 submit=MPI_DSM_CPULIST=14-21,0-7:allhosts mpirun %{TOTALHOSTLIST} 16 $command 122.tachyon=peak=default=default: ranks=3584 submit=MPI_DSM_CPULIST=14-27,0-13:allhosts mpirun %{TOTALHOSTLIST} 28 $command 125.RAxML=peak=default=default: ranks=3584 submit=MPI_DSM_CPULIST=14-27,0-13:allhosts mpirun %{TOTALHOSTLIST} 28 $command 126.lammps=peak=default=default: basepeak=yes 128.GAPgeofem=peak=default=default: ranks=3584 submit=MPI_DSM_CPULIST=14-27,0-13:allhosts mpirun %{TOTALHOSTLIST} 28 $command 129.tera_tf=peak=default=default: basepeak=yes 132.zeusmp2=peak=default=default: ranks=2048 submit=MPI_DSM_CPULIST=14-21,0-7:allhosts mpirun %{TOTALHOSTLIST} 16 $command 137.lu=peak=default=default: ranks=2048 submit=MPI_DSM_CPULIST=14-21,0-7:allhosts mpirun %{TOTALHOSTLIST} 16 $command 142.dmilc=peak=default=default: basepeak=yes 143.dleslie=peak=default=default: basepeak=yes 145.lGemsFDTD=peak=default=default: basepeak=yes 147.l2wrf2=peak=default=default: ranks=3584 submit=MPI_DSM_CPULIST=14-27,0-13:allhosts mpirun %{TOTALHOSTLIST} 28 $command 115.fds4=default=default=default: #%if '%{MPI}' eq 'MPT' && defined(%{HOSTLIST}) #ENV_MPI_DSM_CPULIST='14-27,0-13:allhosts' #ENV_MPI_DSM_CPULIST='0-27:allhosts' #ENV_MPI_DSM_VERBOSE=1 #ENV_MPI_VERBOSE=1 #submit=mpiexec_mpt -v -n $ranks $command #%endif %endif #################################################################### # # Portability flags # #################################################################### 121.pop2=default=default=default: CPORTABILITY = -DSPEC_MPI_CASE_FLAG 127.wrf2=default=default=default: CPORTABILITY = -DSPEC_MPI_CASE_FLAG -DSPEC_MPI_LINUX 130.socorro=default=default=default: notes_base_130=src.alt used: 130.socorro->nullify_ptrs srcalt=nullify_ptrs FPORTABILITY=-assume nostd_intent_in 129.tera_tf=default=default=default: %if '%{SIZE}' eq 'medium' srcalt=add_rank_support notes_base_129=src.alt used: 129.tera_tf->add_rank_support %endif 143.dleslie=default=default=default: srcalt=integer_overflow # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: notes_000 =Software environment: notes_005 = export MPI_REQUEST_MAX=65536 notes_010 = export MPI_TYPE_MAX=32768 notes_015 = export MPI_IB_RAILS=2 notes_020 = export MPI_IB_UPGRADE_SENDS=50 notes_025 = export MPI_IB_IMM_UPGRADE=false notes_030 = export MPI_IB_DCIS=2