Skip to content

Commit

Permalink
Merge pull request #3438 from CliMA/ck/repro_refactor2
Browse files Browse the repository at this point in the history
Improve names, prep for more repro unit tests
  • Loading branch information
charleskawczynski authored Nov 14, 2024
2 parents 68480cf + 351a0c9 commit 1d372ce
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 61 deletions.
2 changes: 1 addition & 1 deletion examples/hybrid/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ if ClimaComms.iamroot(config.comms_ctx)
joinpath(
pkgdir(CA),
"reproducibility_tests",
"latest_comparable_paths.jl",
"reproducibility_utils.jl",
),
)
@info "Plotting"
Expand Down
2 changes: 1 addition & 1 deletion reproducibility_tests/compute_mse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import NCDatasets
import Tar
import ClimaCoreTempestRemap as CCTR

include("latest_comparable_paths.jl")
include("reproducibility_utils.jl")

function get_nc_data(ds, var::String)
if haskey(ds, var)
Expand Down
57 changes: 1 addition & 56 deletions reproducibility_tests/move_output.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

include(joinpath(@__DIR__, "latest_comparable_paths.jl"))
include(joinpath(@__DIR__, "reproducibility_utils.jl"))
paths = latest_comparable_paths()

all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl"))
Expand Down Expand Up @@ -66,61 +66,6 @@ else
@info "ENV keys: $(keys(ENV))"
end

function reason(path)
f = joinpath(path, "ref_counter.jl")
if !isfile(f)
return "ref_counter.jl does not exist"
else
ref_counter = parse(Int, first(readlines(f)))
return "ref_counter: $ref_counter"
end
end

function cleanup_central(cluster_data_prefix)
@warn "Cleaning up old files on central"
# Get (sorted) array of paths, `pop!(sorted_paths)`
# is the most recent merged folder.
sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix)
keep_latest_n = 0
keep_latest_ref_counters = 5
if !isempty(sorted_paths)
N = length(sorted_paths) - keep_latest_n
paths_to_delete = []
ref_counters_main = ref_counters_per_path(sorted_paths)
i_largest_reference = argmax(ref_counters_main)
path = sorted_paths[i_largest_reference]
ref_counter_file_main = joinpath(path, "ref_counter.jl")
@assert isfile(ref_counter_file_main)
ref_counter_main = parse(Int, first(readlines(ref_counter_file_main)))

for i in 1:N
path = sorted_paths[i]
ref_counter_file = joinpath(path, "ref_counter.jl")
if !isfile(ref_counter_file)
push!(paths_to_delete, path)
else
ref_counter = parse(Int, first(readlines(ref_counter_file)))
# Just to be safe, let's also make sure that we don't delete
# any paths with recent (let's say 5) ref counter increments ago.
if ref_counter + keep_latest_ref_counters < ref_counter_main
push!(paths_to_delete, path)
end
end
end
@show ref_counter_main
@show length(sorted_paths)
@show length(paths_to_delete)
@info "Deleting files:"
for i in 1:length(paths_to_delete)
f = paths_to_delete[i]
@info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime))). Reason: $(reason(f))"
end
for i in 1:length(paths_to_delete)
rm(paths_to_delete[i]; recursive = true, force = true)
end
end
end

if buildkite_ci && in_merge_queue
cleanup_central(cluster_data_prefix)
end
2 changes: 1 addition & 1 deletion reproducibility_tests/print_new_mse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import OrderedCollections
import JSON

# Get cases from JobIDs in mse_tables file:
include(joinpath(@__DIR__, "latest_comparable_paths.jl"))
include(joinpath(@__DIR__, "reproducibility_utils.jl"))
paths = latest_comparable_paths()

all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,58 @@ function latest_comparable_paths(;

return comparable_paths
end

function reason(path)
f = joinpath(path, "ref_counter.jl")
if !isfile(f)
return "ref_counter.jl does not exist"
else
ref_counter = parse(Int, first(readlines(f)))
return "ref_counter: $ref_counter"
end
end

function cleanup_central(cluster_data_prefix)
@warn "Cleaning up old files on central"
# Get (sorted) array of paths, `pop!(sorted_paths)`
# is the most recent merged folder.
sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix)
keep_latest_n = 0
keep_latest_ref_counters = 5
if !isempty(sorted_paths)
N = length(sorted_paths) - keep_latest_n
paths_to_delete = []
ref_counters_main = ref_counters_per_path(sorted_paths)
i_largest_reference = argmax(ref_counters_main)
path = sorted_paths[i_largest_reference]
ref_counter_file_main = joinpath(path, "ref_counter.jl")
@assert isfile(ref_counter_file_main)
ref_counter_main = parse(Int, first(readlines(ref_counter_file_main)))

for i in 1:N
path = sorted_paths[i]
ref_counter_file = joinpath(path, "ref_counter.jl")
if !isfile(ref_counter_file)
push!(paths_to_delete, path)
else
ref_counter = parse(Int, first(readlines(ref_counter_file)))
# Just to be safe, let's also make sure that we don't delete
# any paths with recent (let's say 5) ref counter increments ago.
if ref_counter + keep_latest_ref_counters < ref_counter_main
push!(paths_to_delete, path)
end
end
end
@show ref_counter_main
@show length(sorted_paths)
@show length(paths_to_delete)
@info "Deleting files:"
for i in 1:length(paths_to_delete)
f = paths_to_delete[i]
@info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime))). Reason: $(reason(f))"
end
for i in 1:length(paths_to_delete)
rm(paths_to_delete[i]; recursive = true, force = true)
end
end
end
2 changes: 1 addition & 1 deletion reproducibility_tests/test_reset.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import OrderedCollections

# Get cases from JobIDs in mse_tables file:
include(joinpath(@__DIR__, "latest_comparable_paths.jl"))
include(joinpath(@__DIR__, "reproducibility_utils.jl"))
paths = latest_comparable_paths()
include(joinpath(@__DIR__, "mse_tables.jl"))

Expand Down
2 changes: 1 addition & 1 deletion test/unit_reproducibility_infra.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Revise; include("test/unit_reproducibility_infra.jl")
using Test
using Dates

include(joinpath("..", "reproducibility_tests/latest_comparable_paths.jl"))
include(joinpath("..", "reproducibility_tests/reproducibility_utils.jl"))

function make_ref_file_counter(dir, pathname, i)
d = mkdir(pathname)
Expand Down

0 comments on commit 1d372ce

Please sign in to comment.