diff --git a/examples/hybrid/driver.jl b/examples/hybrid/driver.jl index 796f2b86d4..b11af3bf27 100644 --- a/examples/hybrid/driver.jl +++ b/examples/hybrid/driver.jl @@ -148,7 +148,7 @@ if ClimaComms.iamroot(config.comms_ctx) joinpath( pkgdir(CA), "reproducibility_tests", - "latest_comparable_paths.jl", + "reproducibility_utils.jl", ), ) @info "Plotting" diff --git a/reproducibility_tests/compute_mse.jl b/reproducibility_tests/compute_mse.jl index 33c2322913..09d3dd99ae 100644 --- a/reproducibility_tests/compute_mse.jl +++ b/reproducibility_tests/compute_mse.jl @@ -3,7 +3,7 @@ import NCDatasets import Tar import ClimaCoreTempestRemap as CCTR -include("latest_comparable_paths.jl") +include("reproducibility_utils.jl") function get_nc_data(ds, var::String) if haskey(ds, var) diff --git a/reproducibility_tests/move_output.jl b/reproducibility_tests/move_output.jl index 157251285e..457089e230 100644 --- a/reproducibility_tests/move_output.jl +++ b/reproducibility_tests/move_output.jl @@ -1,5 +1,5 @@ -include(joinpath(@__DIR__, "latest_comparable_paths.jl")) +include(joinpath(@__DIR__, "reproducibility_utils.jl")) paths = latest_comparable_paths() all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl")) @@ -66,61 +66,6 @@ else @info "ENV keys: $(keys(ENV))" end -function reason(path) - f = joinpath(path, "ref_counter.jl") - if !isfile(f) - return "ref_counter.jl does not exist" - else - ref_counter = parse(Int, first(readlines(f))) - return "ref_counter: $ref_counter" - end -end - -function cleanup_central(cluster_data_prefix) - @warn "Cleaning up old files on central" - # Get (sorted) array of paths, `pop!(sorted_paths)` - # is the most recent merged folder. - sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix) - keep_latest_n = 0 - keep_latest_ref_counters = 5 - if !isempty(sorted_paths) - N = length(sorted_paths) - keep_latest_n - paths_to_delete = [] - ref_counters_main = ref_counters_per_path(sorted_paths) - i_largest_reference = argmax(ref_counters_main) - path = sorted_paths[i_largest_reference] - ref_counter_file_main = joinpath(path, "ref_counter.jl") - @assert isfile(ref_counter_file_main) - ref_counter_main = parse(Int, first(readlines(ref_counter_file_main))) - - for i in 1:N - path = sorted_paths[i] - ref_counter_file = joinpath(path, "ref_counter.jl") - if !isfile(ref_counter_file) - push!(paths_to_delete, path) - else - ref_counter = parse(Int, first(readlines(ref_counter_file))) - # Just to be safe, let's also make sure that we don't delete - # any paths with recent (let's say 5) ref counter increments ago. - if ref_counter + keep_latest_ref_counters < ref_counter_main - push!(paths_to_delete, path) - end - end - end - @show ref_counter_main - @show length(sorted_paths) - @show length(paths_to_delete) - @info "Deleting files:" - for i in 1:length(paths_to_delete) - f = paths_to_delete[i] - @info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime))). Reason: $(reason(f))" - end - for i in 1:length(paths_to_delete) - rm(paths_to_delete[i]; recursive = true, force = true) - end - end -end - if buildkite_ci && in_merge_queue cleanup_central(cluster_data_prefix) end diff --git a/reproducibility_tests/print_new_mse.jl b/reproducibility_tests/print_new_mse.jl index f7bb2f8a54..72bff33ae8 100644 --- a/reproducibility_tests/print_new_mse.jl +++ b/reproducibility_tests/print_new_mse.jl @@ -2,7 +2,7 @@ import OrderedCollections import JSON # Get cases from JobIDs in mse_tables file: -include(joinpath(@__DIR__, "latest_comparable_paths.jl")) +include(joinpath(@__DIR__, "reproducibility_utils.jl")) paths = latest_comparable_paths() all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl")) diff --git a/reproducibility_tests/latest_comparable_paths.jl b/reproducibility_tests/reproducibility_utils.jl similarity index 65% rename from reproducibility_tests/latest_comparable_paths.jl rename to reproducibility_tests/reproducibility_utils.jl index 27bbbf3aee..1b7fca4a19 100644 --- a/reproducibility_tests/latest_comparable_paths.jl +++ b/reproducibility_tests/reproducibility_utils.jl @@ -117,3 +117,58 @@ function latest_comparable_paths(; return comparable_paths end + +function reason(path) + f = joinpath(path, "ref_counter.jl") + if !isfile(f) + return "ref_counter.jl does not exist" + else + ref_counter = parse(Int, first(readlines(f))) + return "ref_counter: $ref_counter" + end +end + +function cleanup_central(cluster_data_prefix) + @warn "Cleaning up old files on central" + # Get (sorted) array of paths, `pop!(sorted_paths)` + # is the most recent merged folder. + sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix) + keep_latest_n = 0 + keep_latest_ref_counters = 5 + if !isempty(sorted_paths) + N = length(sorted_paths) - keep_latest_n + paths_to_delete = [] + ref_counters_main = ref_counters_per_path(sorted_paths) + i_largest_reference = argmax(ref_counters_main) + path = sorted_paths[i_largest_reference] + ref_counter_file_main = joinpath(path, "ref_counter.jl") + @assert isfile(ref_counter_file_main) + ref_counter_main = parse(Int, first(readlines(ref_counter_file_main))) + + for i in 1:N + path = sorted_paths[i] + ref_counter_file = joinpath(path, "ref_counter.jl") + if !isfile(ref_counter_file) + push!(paths_to_delete, path) + else + ref_counter = parse(Int, first(readlines(ref_counter_file))) + # Just to be safe, let's also make sure that we don't delete + # any paths with recent (let's say 5) ref counter increments ago. + if ref_counter + keep_latest_ref_counters < ref_counter_main + push!(paths_to_delete, path) + end + end + end + @show ref_counter_main + @show length(sorted_paths) + @show length(paths_to_delete) + @info "Deleting files:" + for i in 1:length(paths_to_delete) + f = paths_to_delete[i] + @info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime))). Reason: $(reason(f))" + end + for i in 1:length(paths_to_delete) + rm(paths_to_delete[i]; recursive = true, force = true) + end + end +end diff --git a/reproducibility_tests/test_reset.jl b/reproducibility_tests/test_reset.jl index 783ece2412..14f8a1c877 100644 --- a/reproducibility_tests/test_reset.jl +++ b/reproducibility_tests/test_reset.jl @@ -1,7 +1,7 @@ import OrderedCollections # Get cases from JobIDs in mse_tables file: -include(joinpath(@__DIR__, "latest_comparable_paths.jl")) +include(joinpath(@__DIR__, "reproducibility_utils.jl")) paths = latest_comparable_paths() include(joinpath(@__DIR__, "mse_tables.jl")) diff --git a/test/unit_reproducibility_infra.jl b/test/unit_reproducibility_infra.jl index fa9b11f214..3b3562c4f4 100644 --- a/test/unit_reproducibility_infra.jl +++ b/test/unit_reproducibility_infra.jl @@ -4,7 +4,7 @@ using Revise; include("test/unit_reproducibility_infra.jl") using Test using Dates -include(joinpath("..", "reproducibility_tests/latest_comparable_paths.jl")) +include(joinpath("..", "reproducibility_tests/reproducibility_utils.jl")) function make_ref_file_counter(dir, pathname, i) d = mkdir(pathname)