NOAA-GFDL · Hallberg-NOAA · Nov 10, 2023 · Oct 2, 2023
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -19,12 +19,18 @@ jobs:
 
     - uses: ./.github/actions/testing-setup
 
-    - name: Compile unit testing
-      run: make -j build/unit/MOM_unit_tests
+    - name: Compile file parser unit tests
+      run: make -j build/unit/test_MOM_file_parser
 
-    - name: Run unit tests
+    - name: Run file parser unit tests
       run: make run.cov.unit
 
+    - name: Compile unit testing
+      run: make -j build.unit
+
+    - name: Run (single processor) unit tests
+      run: make run.unit
+
     - name: Report unit test coverage to CI (PR)
       if: github.event_name == 'pull_request'
       run: make report.cov.unit REQUIRE_COVERAGE_UPLOAD=true

diff --git a/.github/workflows/perfmon.yml b/.github/workflows/perfmon.yml
@@ -1,6 +1,6 @@
 name: Performance Monitor
 
-on: [pull_request]
+on: [push, pull_request]
 
 jobs:
   build-test-perfmon:
@@ -20,19 +20,34 @@ jobs:
     - uses: ./.github/actions/testing-setup
 
     - name: Compile optimized models
+      if: ${{ github.event_name == 'pull_request' }}
       run: >-
         make -j build.prof
         MOM_TARGET_SLUG=$GITHUB_REPOSITORY
         MOM_TARGET_LOCAL_BRANCH=$GITHUB_BASE_REF
         DO_REGRESSION_TESTS=true
 
     - name: Generate profile data
+      if: ${{ github.event_name == 'pull_request' }}
       run: >-
         pip install f90nml &&
         make profile
         DO_REGRESSION_TESTS=true
 
     - name: Generate perf data
+      if: ${{ github.event_name == 'pull_request' }}
       run: |
         sudo sysctl -w kernel.perf_event_paranoid=2
         make perf DO_REGRESSION_TESTS=true
+
+    - name: Compile timing tests
+      run: |
+        make -j build.timing
+
+    - name: Run timing tests
+      run: |
+        make -j run.timing
+
+    - name: Display timing results
+      run: |
+        make -j show.timing
diff --git a/.testing/Makefile b/.testing/Makefile
@@ -116,6 +116,9 @@ DO_PROFILE ?=
 # Enable code coverage runs
 DO_COVERAGE ?=
 
+# Enable code coverage runs
+DO_UNIT_TESTS ?=
+
 # Report failure if coverage report is not uploaded
 REQUIRE_COVERAGE_UPLOAD ?=
 
@@ -151,10 +154,16 @@ ifeq ($(DO_PROFILE), true)
   BUILDS += opt/MOM6 opt_target/MOM6
 endif
 
-# Unit testing
-UNIT_EXECS ?= MOM_unit_tests
+# Coverage
 ifeq ($(DO_COVERAGE), true)
-  BUILDS += cov/MOM6 $(foreach e, $(UNIT_EXECS), unit/$(e))
+  BUILDS += cov/MOM6
+endif
+
+# Unit testing (or coverage)
+UNIT_EXECS ?= $(basename $(notdir $(wildcard ../config_src/drivers/unit_tests/*.F90) ) )
+TIMING_EXECS ?= $(basename $(notdir $(wildcard ../config_src/drivers/timing_tests/*.F90) ) )
+ifneq (X$(DO_COVERAGE)$(DO_UNIT_TESTS)X, XX)
+  BUILDS += $(foreach e, $(UNIT_EXECS), unit/$(e))
 endif
 
 ifeq ($(DO_PROFILE), false)
@@ -258,13 +267,15 @@ build/coupled/Makefile: MOM_ENV += $(SYMMETRIC_FCFLAGS) $(MOM_LDFLAGS)
 build/nuopc/Makefile: MOM_ENV += $(SYMMETRIC_FCFLAGS) $(MOM_LDFLAGS)
 build/cov/Makefile: MOM_ENV += $(COV_FCFLAGS) $(COV_LDFLAGS)
 build/unit/Makefile: MOM_ENV += $(COV_FCFLAGS) $(COV_LDFLAGS)
+build/timing/Makefile: MOM_ENV += $(OPT_FCFLAGS) $(MOM_LDFLAGS)
 
 # Configure script flags
 MOM_ACFLAGS := --with-framework=$(FRAMEWORK)
 build/openmp/Makefile: MOM_ACFLAGS += --enable-openmp
 build/coupled/Makefile: MOM_ACFLAGS += --with-driver=FMS_cap
 build/nuopc/Makefile: MOM_ACFLAGS += --with-driver=nuopc_cap
 build/unit/Makefile: MOM_ACFLAGS += --with-driver=unit_tests
+build/timing/Makefile: MOM_ACFLAGS += --with-driver=timing_tests
 
 # Fetch regression target source code
 build/target/Makefile: | $(TARGET_CODEBASE)
@@ -276,10 +287,15 @@ build/target_codebase/configure: $(TARGET_SOURCE)
 
 
 # Build executables
-$(foreach e,$(UNIT_EXECS),build/unit/$(e)): build/unit/Makefile $(MOM_SOURCE)
-	cd $(@D) && $(TIME) $(MAKE) -j
-build/%/MOM6: build/%/Makefile $(MOM_SOURCE)
-	cd $(@D) && $(TIME) $(MAKE) -j
+build/unit/test_%: build/unit/Makefile FORCE
+	cd $(@D) && $(TIME) $(MAKE) $(@F) -j
+build/unit/Makefile: $(foreach e,$(UNIT_EXECS),../config_src/drivers/unit_tests/$(e).F90)
+build/timing/time_%: build/timing/Makefile FORCE
+	cd $(@D) && $(TIME) $(MAKE) $(@F) -j
+build/timing/Makefile: $(foreach e,$(TIMING_EXECS),../config_src/drivers/timing_tests/$(e).F90)
+build/%/MOM6: build/%/Makefile FORCE
+	cd $(@D) && $(TIME) $(MAKE) $(@F) -j
+FORCE: ;
 
 
 # Use autoconf to construct the Makefile for each target
@@ -655,28 +671,47 @@ test.summary:
 .PHONY: run.cov.unit
 run.cov.unit: build/unit/MOM_file_parser_tests.F90.gcov
 
-$(WORKSPACE)/work/unit/std.out: build/unit/MOM_unit_tests
+.PHONY: build.unit
+build.unit: $(foreach f, $(UNIT_EXECS), build/unit/$(f))
+.PHONY: run.unit
+run.unit: $(foreach f, $(UNIT_EXECS), work/unit/$(f).out)
+.PHONY: build.timing
+build.timing: $(foreach f, $(TIMING_EXECS), build/timing/$(f))
+.PHONY: run.timing
+run.timing: $(foreach f, $(TIMING_EXECS), work/timing/$(f).out)
+.PHONY: show.timing
+show.timing: $(foreach f, $(TIMING_EXECS), work/timing/$(f).show)
+$(WORKSPACE)/work/timing/%.show:
+	./tools/disp_timing.py $(@:.show=.out)
+
+# General rule to run a unit test executable
+# Pattern is to run build/unit/executable and direct output to executable.out
+$(WORKSPACE)/work/unit/%.out: build/unit/%
+	@mkdir -p $(@D)
+	cd $(@D) ; $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> >(tee $*.err) > $*.out
+
+$(WORKSPACE)/work/unit/test_MOM_file_parser.out: build/unit/test_MOM_file_parser
 	if [ $(REPORT_COVERAGE) ]; then \
 	  find build/unit -name *.gcda -exec rm -f '{}' \; ; \
 	fi
-	rm -rf $(@D)
 	mkdir -p $(@D)
 	cd $(@D) \
-	  && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std.err > std.out \
+	  && rm -f input.nml logfile.0000*.out *_input MOM_parameter_doc.* \
+	  && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> test_MOM_file_parser.err > test_MOM_file_parser.out \
 	  || !( \
-	    cat std.out | tail -n 100 ; \
-	    cat std.err | tail -n 100 ; \
+	    cat test_MOM_file_parser.out | tail -n 100 ; \
+	    cat test_MOM_file_parser.err | tail -n 100 ; \
 	  )
 	cd $(@D) \
-	  && $(TIME) $(MPIRUN) -n 2 $(abspath $<) 2> p2.std.err > p2.std.out \
+	  && $(TIME) $(MPIRUN) -n 2 $(abspath $<) 2> p2.test_MOM_file_parser.err > p2.test_MOM_file_parser.out \
 	  || !( \
-	    cat p2.std.out | tail -n 100 ; \
-	    cat p2.std.err | tail -n 100 ; \
+	    cat p2.test_MOM_file_parser.out | tail -n 100 ; \
+	    cat p2.test_MOM_file_parser.err | tail -n 100 ; \
 	  )
 
 # NOTE: .gcov actually depends on .gcda, but .gcda is produced with std.out
 # TODO: Replace $(WORKSPACE)/work/unit/std.out with *.gcda?
-build/unit/MOM_file_parser_tests.F90.gcov: $(WORKSPACE)/work/unit/std.out
+build/unit/MOM_file_parser_tests.F90.gcov: $(WORKSPACE)/work/unit/test_MOM_file_parser.out
 	cd $(@D) \
 	  && gcov -b *.gcda > gcov.unit.out
 	find $(@D) -name "*.gcov" -exec sed -i -r 's/^( *[0-9]*)\*:/ \1:/g' {} \;
@@ -693,6 +728,10 @@ report.cov.unit: build/unit/MOM_file_parser_tests.F90.gcov codecov
 	    if [ "$(REQUIRE_COVERAGE_UPLOAD)" = true ] ; then false ; fi ; \
 	  }
 
+$(WORKSPACE)/work/timing/%.out: build/timing/% FORCE
+	@mkdir -p $(@D)
+	@echo Running $< in $(@D)
+	@cd $(@D) ; $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> $*.err > $*.out
 
 #---
 # Profiling based on FMS clocks

diff --git a/.testing/README.rst b/.testing/README.rst
@@ -22,6 +22,17 @@ Usage
 ``make clean``
    Delete the MOM6 test executables and dependency builds (FMS).
 
+``make -j build.unit``
+   Build the unit test programs in config_src/drivers/unit_tests
+
+``make -j run.unit``
+   Run the unit test programs from config_src/drivers/unit_tests in $(WORKSPACE)/work/unit
+
+``make -j build.timing``
+   Build the timing test programs in config_src/drivers/timing_tests
+
+``make -j run.timing``
+   Run the timing test programs from config_src/drivers/timing_tests in $(WORKSPACE)/work/timing
 
 Configuration
 =============

diff --git a/.testing/tools/disp_timing.py b/.testing/tools/disp_timing.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+
+from __future__ import print_function
+
+import argparse
+import json
+import math
+
+scale = 1e6  # micro-seconds (should make this dynamic)
+
+
+def display_timing_file(file, show_all):
+    """Parse a JSON file of timing results and pretty-print the results"""
+
+    with open(file) as json_file:
+        timing_dict = json.load(json_file)
+
+    print("(Times measured in %5.0e seconds)" % (1./scale))
+    print("  Min time Module & function")
+    for sub in timing_dict.keys():
+        tmin = timing_dict[sub]['min'] * scale
+        print("%10.4e %s" % (tmin, sub))
+
+        if show_all:
+            tmean = timing_dict[sub]['mean'] * scale
+            tmax = timing_dict[sub]['max'] * scale
+            tstd = timing_dict[sub]['std'] * scale
+            nsamp = timing_dict[sub]['n_samples']
+            tsstd = tstd / math.sqrt(nsamp)
+            print("           (" +
+                  "mean = %10.4e " % (tmean) +
+                  "±%7.1e, " % (tsstd) +
+                  "max = %10.4e, " % (tmax) +
+                  "std = %8.2e, " % (tstd) +
+                  "# = %d)" % (nsamp))
+
+
+def compare_timing_files(file, ref, show_all, significance_threshold):
+    """Read and compare two JSON files of timing results"""
+
+    with open(file) as json_file:
+        timing_dict = json.load(json_file)
+
+    with open(ref) as json_file:
+        ref_dict = json.load(json_file)
+
+    print("(Times measured in %5.0e seconds)" % (1./scale))
+    print("  Delta (%)  Module & function")
+    for sub in {**ref_dict, **timing_dict}.keys():
+        T1 = ref_dict.get(sub)
+        T2 = timing_dict.get(sub)
+        if T1 is not None:
+            # stats for reference (old)
+            tmin1 = T1['min'] * scale
+            tmean1 = T1['mean'] * scale
+        if T2 is not None:
+            # stats for reference (old)
+            tmin2 = T2['min'] * scale
+            tmean2 = T2['mean'] * scale
+        if (T1 is not None) and (T2 is not None):
+            # change in actual minimum as percentage of old
+            dt = (tmin2 - tmin1) * 100 / tmin1
+            if dt < -significance_threshold:
+                color = '\033[92m'
+            elif dt > significance_threshold:
+                color = '\033[91m'
+            else:
+                color = ''
+            print("%s%+10.4f%%\033[0m  %s" % (color, dt, sub))
+        else:
+            if T2 is None:
+                print("   removed   %s" % (sub))
+            else:
+                print("     added   %s" % (sub))
+
+        if show_all:
+            if T2 is None:
+                print("               --")
+            else:
+                tmax2 = T2['max'] * scale
+                tstd2 = T2['std'] * scale
+                n2 = T2['n_samples']
+                tsstd2 = tstd2 / math.sqrt(n2)
+                print("               %10.4e (" % (tmin2) +
+                      "mean = %10.4e " % (tmean2) +
+                      "±%7.1e, " % (tsstd2) +
+                      "max=%10.4e, " % (tmax2) +
+                      "std=%8.2e, " % (tstd2) +
+                      "# = %d)" % (n2))
+            if T1 is None:
+                print("               --")
+            else:
+                tmax1 = T1['max'] * scale
+                tstd1 = T1['std'] * scale
+                n1 = T1['n_samples']
+                tsstd1 = tstd1 / math.sqrt(n1)
+                print("               %10.4e (" % (tmin1) +
+                      "mean = %10.4e " % (tmean1) +
+                      "±%7.1e, " % (tsstd1) +
+                      "max=%10.4e, " % (tmax1) +
+                      "std=%8.2e, " % (tstd1) +
+                      "# = %d)" % (n1))
+
+
+# Parse arguments
+parser = argparse.ArgumentParser(
+    description="Beautify timing output from MOM6 timing tests."
+)
+parser.add_argument(
+    'file',
+    help="File to process."
+)
+parser.add_argument(
+    '-a', '--all',
+    action='store_true',
+    help="Display all metrics rather than just the minimum time."
+)
+parser.add_argument(
+    '-t', '--threshold',
+    default=6.0, type=float,
+    help="Significance threshold to flag (percentage)."
+)
+parser.add_argument(
+    '-r', '--reference',
+    help="Reference file to compare against."
+)
+args = parser.parse_args()
+
+# Do the thing
+if args.reference is None:
+    display_timing_file(args.file, args.all)
+else:
+    compare_timing_files(args.file, args.reference, args.all, args.threshold)