CliMA · charleskawczynski · Nov 12, 2024 · Nov 12, 2024
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
diff --git a/config/default_configs/default_config.yml b/config/default_configs/default_config.yml
@@ -217,6 +217,9 @@ non_orographic_gravity_wave:
 nh_poly:
   help: "Horizontal polynomial degree. Note: The number of quadrature points in 1D within each horizontal element is then Nq = <--nh_poly> + 1"
   value: 3
+reproducibility_test:
+  help: "(Bool) perform reproducibility test"
+  value: false
 check_conservation:
   help: "Check conservation of mass and energy [`false` (default), `true`]"
   value: false

diff --git a/config/model_configs/deep_sphere_baroclinic_wave_rhoe_equilmoist.yml b/config/model_configs/deep_sphere_baroclinic_wave_rhoe_equilmoist.yml
@@ -1,5 +1,6 @@
 precip_model: "0M"
 dt_save_state_to_disk: "2days"
+reproducibility_test: true
 initial_condition: "MoistBaroclinicWave"
 dt: "450secs"
 t_end: "10days"

diff --git a/config/model_configs/diagnostic_edmfx_aquaplanet.yml b/config/model_configs/diagnostic_edmfx_aquaplanet.yml
@@ -19,5 +19,6 @@ cloud_model: "quadrature_sgs"
 precip_model: 1M
 dt: 120secs
 t_end: 3hours
+reproducibility_test: true
 toml: [toml/diagnostic_edmfx.toml]
 ode_algo: ARS343
diff --git a/config/model_configs/single_column_precipitation_test.yml b/config/model_configs/single_column_precipitation_test.yml
@@ -14,6 +14,7 @@ precip_model: "1M"
 vert_diff: "FriersonDiffusion"
 implicit_diffusion: true
 approximate_linear_solve_iters: 2
+reproducibility_test: false
 toml: [toml/single_column_precipitation_test.toml]
 diagnostics:
   - short_name: [hus, clw, cli, husra, hussn, ta, wa]

diff --git a/config/model_configs/sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric.yml b/config/model_configs/sphere_aquaplanet_rhoe_equilmoist_allsky_gw_raw_zonallyasymmetric.yml
@@ -16,6 +16,7 @@ cloud_model: "grid_scale"
 surface_temperature: "ZonallyAsymmetric"
 moist: "equil"
 albedo_model: "RegressionFunctionAlbedo"
+reproducibility_test: true
 aerosol_radiation: true
 prescribed_aerosols: ["CB1", "CB2", "DST01", "DST02", "DST03", "DST04", "OC1", "OC2", "SO4"]
 toml: [toml/sphere_aquaplanet.toml]
diff --git a/config/model_configs/sphere_baroclinic_wave_rhoe_equilmoist.yml b/config/model_configs/sphere_baroclinic_wave_rhoe_equilmoist.yml
@@ -1,5 +1,6 @@
 precip_model: "0M"
 dt_save_state_to_disk: "2days"
+reproducibility_test: true
 initial_condition: "MoistBaroclinicWave"
 dt: "450secs"
 t_end: "10days"

diff --git a/config/model_configs/sphere_held_suarez_rhoe_equilmoist_hightop_sponge.yml b/config/model_configs/sphere_held_suarez_rhoe_equilmoist_hightop_sponge.yml
@@ -9,5 +9,6 @@ t_end: "4days"
 vert_diff: true
 forcing: "held_suarez"
 precip_model: "0M"
+reproducibility_test: true
 moist: "equil"
 toml: [toml/sphere_held_suarez.toml]
diff --git a/examples/hybrid/driver.jl b/examples/hybrid/driver.jl
@@ -87,8 +87,8 @@ end
 include(
     joinpath(@__DIR__, "..", "..", "reproducibility_tests", "mse_tables.jl"),
 )
-if get(ENV, "test_reproducibility", "false") == "true"
-    # Export reproducibility results, to later test against the main branch
+if config.parsed_args["reproducibility_test"]
+    # Test results against main branch
     include(
         joinpath(
             @__DIR__,
@@ -98,10 +98,17 @@ if get(ENV, "test_reproducibility", "false") == "true"
             "reproducibility_tests.jl",
         ),
     )
-    export_reproducibility_results(
-        config.comms_ctx,
+    @testset "Test reproducibility table entries" begin
+        mse_keys = sort(collect(keys(all_best_mse[simulation.job_id])))
+        pcs = collect(Fields.property_chains(sol.u[end]))
+        for prop_chain in mse_keys
+            @test prop_chain in pcs
+        end
+    end
+    perform_reproducibility_tests(
         simulation.job_id,
         sol.u[end],
+        all_best_mse,
         simulation.output_dir,
     )
 end

diff --git a/reproducibility_tests/README.md b/reproducibility_tests/README.md
@@ -64,8 +64,9 @@ To update the mse tables:
 
 To add a new reproducibility test:
 
- - Add `julia --color=yes --project=examples reproducibility_tests/test_mse.jl --job_id [job_id] --out_dir [job_id]` as a separate command for the new (or existing) job, and set the `test_reproducibility` environment flag. For example: `test_reproducibility: "true"`.
- - Add the job's `job_id` into the `reproducibility_test_job_ids` vector in `reproducibility_test/mse_tables.jl`.
+ - Set the command-line `reproducibility_test` to true, and add `julia --color=yes --project=examples reproducibility_tests/test_mse.jl --job_id [job_id] --out_dir [job_id]` as a separate command for the new (or existing) job
+ - Copy the `all_best_mse` dict template from the job's log
+ - Paste the `all_best_mse` dict template into `reproducibility_test/mse_tables.jl`
 
 <!-- TODO: improve names / mark off sections for all_best_mse dict -->
 
@@ -89,17 +90,19 @@ We cannot (easily) compare the output with a reference if we change the spatial
 
 ## A detailed procedure of how reproducibility tests are performed
 
-Reprodicibility results are computed at the end of the `examples/hybrid/driver.jl` script, and tested in the `reproducibility_tests/test_mse.jl`. This separation helps us delay. Here is an outline of the reproducibility test procedure:
+Reprodicibility tests are performed at the end of `examples/hybrid/driver.jl`, after a simulation completes, and relies on a unique job id (`job_id`). Here is an outline of the reproducibility test procedure:
 
  0) Run a simulation, with a particular `job_id`, to the final time.
- 1) Export the solution (a `FieldVector`) at the final simulation time to an HDF5 file.
- 2) Compute the mean squared errors (MSE) against all other comparable references (which are saved in a dedicated folders on the Caltech Central cluster) for all fieldvector variables in the prognostic state.
- 3) Convert this set of MSEs to a dictionary (called `computed_mse`), and export it to a file in the output folder.
+ 1) Load a dictionary, `all_best_mse`, of previous "best" mean-squared errors from `mse_tables.jl` and extract the mean squared errors for the given `job_id` (store in job-specific dictionary, `best_mse`).
+ 2) Export the solution (a `FieldVector`) at the final simulation time to an `NCDataset` file.
+ 3) Compute the errors between the exported solution and the exported solution from the reference `NCDataset` files (which are saved in a dedicated folders on the Caltech Central cluster) and save into a dictionary, called `computed_mse`.
+ 4) Export this dictionary (`computed_mse`) to the output folder
+ 5) Test that `computed_mse` is no worse than `best_mse` (determines if reproducibility test passes or not).
 
 After these steps are performed at the end of the driver, additional jobs are run:
 
  1) Print `computed_mse` for all jobs to make updating `reproducibility_tests/mse_tables.jl` easy
- 2) If we're on the github queue merging branch (all tests have passed, and the PR is effectively merging), move the HDF5 files from the scratch directory onto the dedicated folder on the Caltech Central cluster.
+ 2) If we're on the github queue merging branch (all tests have passed, and the PR is effectively merging), move the `NCDataset`s from the scratch directory onto the dedicated folder on the Caltech Central cluster.
 
 ## How we track which dataset to compare against
 

diff --git a/reproducibility_tests/compute_mse.jl b/reproducibility_tests/compute_mse.jl
@@ -5,67 +5,67 @@ import ClimaCoreTempestRemap as CCTR
 
 include("latest_comparable_paths.jl")
 
-"""
-    to_dict(filename::String, comms_ctx)
-
-Convert the HDF5 file containing the
-prognostic field `Y` into a `Dict`
-using ClimaCore's `property_chains` and
-`single_field` functions.
-"""
-function to_dict(filename::String, comms_ctx)
-    dict = Dict{Any, AbstractArray}()
-    reader = InputOutput.HDF5Reader(filename, comms_ctx)
-    Y = InputOutput.read_field(reader, "Y")
-    Base.close(reader)
-    for prop_chain in Fields.property_chains(Y)
-        dict[prop_chain] =
-            vec(Array(parent(Fields.single_field(Y, prop_chain))))
+function get_nc_data(ds, var::String)
+    if haskey(ds, var)
+        return ds[var]
+    else
+        for key in keys(ds.group)
+            if haskey(ds.group[key], var)
+                return ds.group[key][var]
+            end
+        end
     end
-    return dict
+    error("No key $var for mse computation.")
+    return nothing
 end
 
 """
-    zero_dict(filename::String, comms_ctx)
+    to_dict(nc_filename::String, reference_keys::Vector{String})
 
-Return a dict of zeros for all `ClimaCore.Fields.property_chains`
-in the fieldvector `Y` contained in the HDF5 file `filename`.
+Convert an NCDatasets file to a `Dict`.
 """
-function zero_dict(filename::String, comms_ctx)
-    dict = Dict{Any, AbstractArray}()
-    reader = InputOutput.HDF5Reader(filename, comms_ctx)
-    Y = InputOutput.read_field(reader, "Y")
-    Base.close(reader)
-    for prop_chain in Fields.property_chains(Y)
-        dict[prop_chain] =
-            vec(Array(parent(Fields.single_field(Y, prop_chain)))) .* 0
+function to_dict(nc_filename::String, reference_keys::Vector{String})
+    dict = Dict{String, AbstractArray}()
+    NCDatasets.Dataset(nc_filename, "r") do ds
+        for key in reference_keys
+            dict[key] = vec(Array(get_nc_data(ds, key)))
+        end
     end
     return dict
 end
 
 """
-    reproducibility_results(
-        comms_ctx;
+    reproducibility_test(;
         job_id,
+        reference_mse,
         ds_filename_computed,
         ds_filename_reference = nothing,
+        varname,
     )
 
 Returns a `Dict` of mean-squared errors between
-datasets `ds_filename_computed` and
-`ds_filename_reference` for all variables.
+`NCDataset`s `ds_filename_computed` and
+`ds_filename_reference` for all keys in `reference_mse`.
+Keys in `reference_mse` may directly map to keys in
+the `NCDataset`s, or they may be mapped to the keys
+via `varname`.
 
 If running on buildkite, we get `ds_filename_reference`
 from the latest merged dataset on Caltech central.
 """
-function reproducibility_results(comms_ctx; job_id, ds_filename_computed)
+function reproducibility_test(;
+    job_id,
+    reference_mse,
+    ds_filename_computed,
+    varname,
+)
     local ds_filename_reference
+    reference_keys = map(k -> varname(k), collect(keys(reference_mse)))
     paths = String[] # initialize for later handling
 
     if haskey(ENV, "BUILDKITE_COMMIT")
         paths = latest_comparable_paths(10)
-        isempty(paths) &&
-            return (zero_dict(ds_filename_computed, comms_ctx), paths)
+        isempty(paths) && return (reference_mse, paths)
         @info "`ds_filename_computed`: `$ds_filename_computed`"
         ds_filename_references =
             map(p -> joinpath(p, ds_filename_computed), paths)
@@ -94,41 +94,40 @@ function reproducibility_results(comms_ctx; job_id, ds_filename_computed)
                     @warn "There is no reference dataset, and no NC tar file."
                 end
             end
-        end
-        non_existent_files = filter(x -> !isfile(x), ds_filename_references)
-        if !isempty(non_existent_files)
-            msg = "\n\n"
-            msg *= "Pull request author:\n"
-            msg *= "    It seems that a new dataset,\n"
-            msg *= "\n"
-            msg *= "dataset file(s):`$(non_existent_files)`,"
-            msg *= "\n"
-            msg *= "    was created, or the name of the dataset\n"
-            msg *= "    has changed. Please increment the reference\n"
-            msg *= "    counter in `reproducibility_tests/ref_counter.jl`.\n"
-            msg *= "\n"
-            msg *= "    If this is not the case, then please\n"
-            msg *= "    open an issue with a link pointing to this\n"
-            msg *= "    PR and build.\n"
-            msg *= "\n"
-            msg *= "For more information, please find\n"
-            msg *= "`reproducibility_tests/README.md` and read the section\n\n"
-            msg *= "  `How to merge pull requests (PR) that get approved\n"
-            msg *= "   but *break* reproducibility tests`\n\n"
-            msg *= "for how to merge this PR."
-            error(msg)
+            if !isfile(ds_filename_reference)
+                msg = "\n\n"
+                msg *= "Pull request author:\n"
+                msg *= "    It seems that a new dataset,\n"
+                msg *= "\n"
+                msg *= "dataset file:`$(ds_filename_computed)`,"
+                msg *= "\n"
+                msg *= "    was created, or the name of the dataset\n"
+                msg *= "    has changed. Please increment the reference\n"
+                msg *= "    counter in `reproducibility_tests/ref_counter.jl`.\n"
+                msg *= "\n"
+                msg *= "    If this is not the case, then please\n"
+                msg *= "    open an issue with a link pointing to this\n"
+                msg *= "    PR and build.\n"
+                msg *= "\n"
+                msg *= "For more information, please find\n"
+                msg *= "`reproducibility_tests/README.md` and read the section\n\n"
+                msg *= "  `How to merge pull requests (PR) that get approved\n"
+                msg *= "   but *break* reproducibility tests`\n\n"
+                msg *= "for how to merge this PR."
+                error(msg)
+            end
         end
     else
         @warn "Buildkite not detected. Skipping reproducibility tests."
         @info "Please review output results before merging."
-        return (zero_dict(ds_filename_computed, comms_ctx), paths)
+        return (reference_mse, paths)
     end
 
     local computed_mse
-    dict_computed = to_dict(ds_filename_computed, comms_ctx)
-    dict_references = map(ds -> to_dict(ds, comms_ctx), ds_filename_references)
-    reference_keys = keys(first(dict_references))
-    @info "Reference keys $reference_keys"
+    @info "Prescribed reference keys $reference_keys"
+    dict_computed = to_dict(ds_filename_computed, reference_keys)
+    dict_references =
+        map(ds -> to_dict(ds, reference_keys), ds_filename_references)
     @info "Computed keys $(collect(keys(dict_computed)))"
     @info "Reference keys $(collect(keys(first(dict_references))))"
     if all(dr -> keys(dict_computed) == keys(dr), dict_references) && all(
@@ -154,3 +153,62 @@ function reproducibility_results(comms_ctx; job_id, ds_filename_computed)
     return (computed_mses, paths)
 
 end
+
+
+##### TODO: move below functions to ClimaCore
+
+function first_center_space(fv::Fields.FieldVector)
+    for prop_chain in Fields.property_chains(fv)
+        f = Fields.single_field(fv, prop_chain)
+        space = axes(f)
+        if space isa Spaces.CenterExtrudedFiniteDifferenceSpace
+            return space
+        end
+    end
+    error("Unfound space")
+end
+
+function first_face_space(fv::Fields.FieldVector)
+    for prop_chain in Fields.property_chains(fv)
+        f = Fields.single_field(fv, prop_chain)
+        space = axes(f)
+        if space isa Spaces.FaceExtrudedFiniteDifferenceSpace
+            return space
+        end
+    end
+    error("Unfound space")
+end
+
+function export_nc(
+    Y::Fields.FieldVector;
+    nc_filename,
+    t_now = 0.0,
+    center_space = first_center_space,
+    face_space = first_face_space,
+    filter_prop_chain = pn -> true, # use all fields
+    varname::Function,
+)
+    prop_chains = Fields.property_chains(Y)
+    filter!(filter_prop_chain, prop_chains)
+    cspace = center_space(Y)
+    fspace = face_space(Y)
+    # create a temporary dir for intermediate data
+    FT = eltype(Y)
+    NCDatasets.NCDataset(nc_filename, "c") do nc
+        # defines the appropriate dimensions and variables for a space coordinate
+        # defines the appropriate dimensions and variables for a time coordinate (by default, unlimited size)
+        nc_time = CCTR.def_time_coord(nc)
+        CCTR.def_space_coord(nc, cspace, type = "cgll")
+        CCTR.def_space_coord(nc, fspace, type = "cgll")
+        # define variables for the prognostic states
+        for prop_chain in Fields.property_chains(Y)
+            f = Fields.single_field(Y, prop_chain)
+            space = axes(f)
+            nc_var = CCTR.defVar(nc, varname(prop_chain), FT, space, ("time",))
+            nc_var[:, 1] = f
+        end
+        # TODO: interpolate w onto center space and save it the same way as the other vars
+        nc_time[1] = t_now
+    end
+    return nothing
+end
diff --git a/reproducibility_tests/move_output.jl b/reproducibility_tests/move_output.jl
@@ -2,8 +2,12 @@
 include(joinpath(@__DIR__, "latest_comparable_paths.jl"))
 paths = latest_comparable_paths()
 
-include(joinpath(@__DIR__, "mse_tables.jl"))
-job_ids = reproducibility_test_job_ids
+all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl"))
+lines = deepcopy(all_lines)
+filter!(x -> occursin("] = OrderedCollections", x), lines)
+job_ids = getindex.(split.(lines, "\""), 2)
+@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
+@assert length(job_ids) ≠ 0 # safety net
 
 # Note: cluster_data_prefix is also defined in compute_mse.jl
 cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
@@ -17,7 +21,7 @@ if buildkite_ci
     @info "commit = $(commit)"
 
     using Glob
-    # @show readdir(joinpath(@__DIR__, ".."))
+    @show readdir(joinpath(@__DIR__, ".."))
     # if a contributor manually merged, we still want to move data
     # from scratch to `cluster_data_prefix`. So, let's also try moving
     # data if this is running on the main branch.