From d14c99eb57e62aeba178b2455c9482db931fea5a Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Tue, 18 Mar 2025 15:14:41 -0400 Subject: [PATCH 1/7] delete old files scheduler job --- CHANGES.md | 7 ++++ birdhouse/components/README.rst | 15 +++++++++ birdhouse/components/thredds/default.env | 6 ++++ .../components/wps_outputs-volume/default.env | 6 ++++ birdhouse/env.local.example | 16 +++++++++ .../scheduler-job-clean_old_files/.gitignore | 1 + .../clean-old-files.sh | 33 +++++++++++++++++++ .../config.yml.template | 9 +++++ .../config/scheduler/docker-compose-extra.yml | 4 +++ .../scheduler-job-clean_old_files/default.env | 24 ++++++++++++++ .../pre-docker-compose-up.include | 3 ++ 11 files changed, 124 insertions(+) create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/.gitignore create mode 100755 birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/default.env create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include diff --git a/CHANGES.md b/CHANGES.md index 8c79e37e7..a36b9577d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -60,6 +60,13 @@ - Answer: This is a hack that would work based on the specific way that the docker-crontab image sets schedules. However, this is not obvious to the user and is unreliable since it is not documented. +- Introduce a scheduler job to delete old files that may accumulate over time + + Creates the `optional-component-clean_old_files` job that deletes old THREDDS log files and WPS output files. + To set the oldest file that will be kept for each of these options, set the `THREDDS_DELETE_FILES_OLDER_THAN_DAYS` + and/or the `WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS` variables in the local environment files (see + `env.local.example` or the `scheduler` documentation for details). + [2.10.1](https://github.com/bird-house/birdhouse-deploy/tree/2.10.1) (2025-03-10) ------------------------------------------------------------------------------------------------------------------ diff --git a/birdhouse/components/README.rst b/birdhouse/components/README.rst index 92e23a34a..d860b900f 100644 --- a/birdhouse/components/README.rst +++ b/birdhouse/components/README.rst @@ -59,6 +59,21 @@ component directory to the ``BIRDHOUSE_EXTRA_CONF_DIRS`` variable in your local * component location: ``optional-components/scheduler-job-deploy_raven_testdata`` +* Automatically remove old files + + * Removes THREDDS log files and WPS output files older than a specific number of days + + * In order to remove THREDDS log files the ``thredds`` component needs to be enabled. + + * Set the ``THREDDS_DELETE_FILES_OLDER_THAN_DAYS`` variable in the local environment file to an integer specifying + how old a THREDDS log file needs to be before it is deleted. + + * In order to remove WPS output files, at least one of the WPS components needs to be enabled. + + * Set the ``WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS`` variable in the local environment file to an integer specifying + how old a WPS output file needs to be before it is deleted. + + For additional configuration options for all these jobs see the ``env.local.example`` file as well as the individual ``default.env`` files in each of the component directories. diff --git a/birdhouse/components/thredds/default.env b/birdhouse/components/thredds/default.env index ff97522d2..63b0a38f7 100644 --- a/birdhouse/components/thredds/default.env +++ b/birdhouse/components/thredds/default.env @@ -38,6 +38,12 @@ export THREDDS_DATASET_DATASETSCAN_BODY=' ' +export THREDDS_DELETE_FILES_OLDER_THAN_DAYS= +export SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS=" + ${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS} + \$([ -n \"\${THREDDS_DELETE_FILES_OLDER_THAN_DAYS}\" ] && echo \"thredds_persistence:/thredds|/thredds/logs/threddsServlet.*.log|\${THREDDS_DELETE_FILES_OLDER_THAN_DAYS}\") +" + # add any new variables not already in 'VARS' or 'OPTIONAL_VARS' that must be replaced in templates here VARS=" $VARS diff --git a/birdhouse/components/wps_outputs-volume/default.env b/birdhouse/components/wps_outputs-volume/default.env index d9516ca65..2c39860ec 100644 --- a/birdhouse/components/wps_outputs-volume/default.env +++ b/birdhouse/components/wps_outputs-volume/default.env @@ -3,6 +3,12 @@ OPTIONAL_VARS=" \$SECURE_DATA_PROXY_AUTH_INCLUDE " +export WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= +export SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS=" + ${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS} + \$([ -n \"\${WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}\" ] && echo \"\${COMPOSE_PROJECT_NAME:-birdhouse}_wps_outputs:/wps_outputs|/wps_outputs|\${WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}\") +" + # add any new variables not already in 'VARS' or 'OPTIONAL_VARS' that must be replaced in templates here # single quotes are important in below list to keep variable names intact until 'birdhouse-compose' parses them EXTRA_VARS=' diff --git a/birdhouse/env.local.example b/birdhouse/env.local.example index 3a123a9b2..726280a36 100644 --- a/birdhouse/env.local.example +++ b/birdhouse/env.local.example @@ -204,6 +204,22 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}" # (note: if using 'BIRDHOUSE_DATA_PERSIST_ROOT', it must be defined earlier, either in this file or from 'default.env') #export BIRDHOUSE_LOGROTATE_DATA_DIR='${BIRDHOUSE_DATA_PERSIST_ROOT}/logrotate' +# These variables configure the scheduler-job-clean_old_files component +# +# Delete THREDDS log files older than X days (e.g. X=20): +#export THREDDS_DELETE_FILES_OLDER_THAN_DAYS=20 +# +# Delete WPS output files older than X days (e.g. X=90): +#export WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS=90 +# +# Set cron schedule for the clean old files job (how often the job runs). +# By default it runs weekly on Sunday at 2:05 am: +#export SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 2 * * 0" + +############################################################# +# Proxy variables +############################################################# + # Content of "location /" in file config/proxy/conf.d/all-services.include.template # Useful to have a custom homepage. # Default: diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/.gitignore b/birdhouse/optional-components/scheduler-job-clean_old_files/.gitignore new file mode 100644 index 000000000..1d3ed4c17 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/.gitignore @@ -0,0 +1 @@ +config.yml diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh b/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh new file mode 100755 index 000000000..66a9c7ba6 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +################################################################ +# Deletes old files as determined by the CLEAN_OLD_FILES_OPTIONS +# environment variable. +# +# This variable contains space delimited fields, each +# representing a group of files to be deleted. +# The format of these fields are as follows: +# +# || +# +# - docker-volume-mounts is not used by this script +# - find-location is an argument passed to `find` which will +# recursively search for files to delete based on that argument +# - age-in-days is an integer that represents a number of days, +# all files found by `find` that were modified more than this +# number of days ago will be deleted +# +# Example call to delete all files in /tmp older than 20 days and +# all files in /var/log older than 90 days: +# +# $ export CLEAN_OLD_FILES_OPTIONS='xxx|/tmp|20 yyy|/var/log|90' +# $ sh clean-old-files.sh +################################################################## + + +for opt in ${CLEAN_OLD_FILES_OPTIONS}; do + loc="$(echo $opt | cut -d\| -f 2)" + age="$(echo $opt | cut -d\| -f 3)" + echo "Removing files in ${loc} that have not been modified in ${age} days" + [ -n "$loc" ] && [ -n "$age" ] && find ${loc} -type f -mtime +"${age}" -print -delete +done diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template new file mode 100644 index 000000000..3c3956436 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template @@ -0,0 +1,9 @@ +- name: clean_old_files + comment: clean old files generated by the stack + schedule: '${SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' + command: 'bash /clean-old-files.sh' + dockerargs: >- + --rm --name scheduler-job-clean_old_files + --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro + --env CLEAN_OLD_FILES_OPTIONS="${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS}" ${SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES} + image: '${SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE}' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml new file mode 100644 index 000000000..327190ff3 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml @@ -0,0 +1,4 @@ +services: + scheduler: + volumes: + - ./optional-components/scheduler-job-clean_old_files/config.yml:/scheduler-job-configs/clean_old_files.yml:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env new file mode 100644 index 000000000..bd0537304 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env @@ -0,0 +1,24 @@ +export SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE='${BASH_IMAGE}' + +export SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 2 * * 0" # weekly on Sunday at 2:05 + +export SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES='$(for opt in ${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS}; do printf " --volume %s:rw " "$(echo $opt | cut -d\| -f 1)"; done)' + +export DELAYED_EVAL=" + $DELAYED_EVAL + SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE + SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS + SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES +" + +OPTIONAL_VARS=" + $OPTIONAL_VARS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES +" + +VARS=" + $VARS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE + \$SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY +" diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include new file mode 100644 index 000000000..0003e2fca --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include @@ -0,0 +1,3 @@ +if [ -z "$SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS" ]; then + log WARN 'The scheduler-job-clean_old_files component is enabled but no files are scheduled to be deleted. Please reconfigure this component or disable it.' +fi From 354e74c81bc5b1bcb3a6f1d5839f704887fab37c Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Thu, 20 Mar 2025 15:36:30 -0400 Subject: [PATCH 2/7] make every job a separate file --- birdhouse/birdhouse-compose.sh | 1 + birdhouse/components/README.rst | 1 + birdhouse/default.env | 4 ++ .../blank.config.yml | 1 + .../clean-old-files.sh | 51 +++++++++---------- .../config.yml.template | 9 ---- .../config/finch/config.yml.template | 9 ++++ .../config/finch/docker-compose-extra.yml | 5 ++ .../config/hummingbird/config.yml.template | 9 ++++ .../hummingbird/docker-compose-extra.yml | 5 ++ .../config/raven/config.yml.template | 9 ++++ .../config/raven/docker-compose-extra.yml | 5 ++ .../config/scheduler/docker-compose-extra.yml | 4 -- .../config/thredds/config.yml.template | 9 ++++ .../config/thredds/docker-compose-extra.yml | 5 ++ .../scheduler-job-clean_old_files/default.env | 51 ++++++++++++++++--- .../pre-docker-compose-up.include | 24 ++++++++- 17 files changed, 153 insertions(+), 49 deletions(-) create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/blank.config.yml delete mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml delete mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template create mode 100644 birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml diff --git a/birdhouse/birdhouse-compose.sh b/birdhouse/birdhouse-compose.sh index 7c90c443b..a20ba3371 100755 --- a/birdhouse/birdhouse-compose.sh +++ b/birdhouse/birdhouse-compose.sh @@ -23,6 +23,7 @@ VARS=' $BIRDHOUSE_LOCAL_ENV $BIRDHOUSE_LOG_DIR $COMPOSE_DIR + $COMPOSE_PROJECT_NAME ' # list of vars to be substituted in template but they do not have to be set in env.local diff --git a/birdhouse/components/README.rst b/birdhouse/components/README.rst index d860b900f..0ee3efbcc 100644 --- a/birdhouse/components/README.rst +++ b/birdhouse/components/README.rst @@ -73,6 +73,7 @@ component directory to the ``BIRDHOUSE_EXTRA_CONF_DIRS`` variable in your local * Set the ``WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS`` variable in the local environment file to an integer specifying how old a WPS output file needs to be before it is deleted. + * component location: ``optional-components/scheduler-job-clean_old_files`` For additional configuration options for all these jobs see the ``env.local.example`` file as well as the individual ``default.env`` files in each of the component directories. diff --git a/birdhouse/default.env b/birdhouse/default.env index 2868f2828..b1871bb64 100644 --- a/birdhouse/default.env +++ b/birdhouse/default.env @@ -32,6 +32,10 @@ export BIRDHOUSE_LOG_DIR=/var/log/birdhouse # Must use single-quote for delayed eval. export BIRDHOUSE_FQDN_PUBLIC='${BIRDHOUSE_FQDN}' +# Ensure that the compose project name is set as a variable so that other scripts can refer +# to it easily +export COMPOSE_PROJECT_NAME="${COMPOSE_PROJECT_NAME:-birdhouse}" + # Append to DELAYED_EVAL list. export DELAYED_EVAL=" $DELAYED_EVAL diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/blank.config.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/blank.config.yml new file mode 100644 index 000000000..03b83d2d7 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/blank.config.yml @@ -0,0 +1 @@ +# this file intentionally contains no content and is mounted to the scheduler directory if a clean_old_files job is not enabled. diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh b/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh index 66a9c7ba6..37d30032d 100755 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/clean-old-files.sh @@ -1,33 +1,32 @@ #!/bin/sh ################################################################ -# Deletes old files as determined by the CLEAN_OLD_FILES_OPTIONS -# environment variable. +# Example call to delete all files in /tmp last modified longer +# than 20 days ago # -# This variable contains space delimited fields, each -# representing a group of files to be deleted. -# The format of these fields are as follows: -# -# || -# -# - docker-volume-mounts is not used by this script -# - find-location is an argument passed to `find` which will -# recursively search for files to delete based on that argument -# - age-in-days is an integer that represents a number of days, -# all files found by `find` that were modified more than this -# number of days ago will be deleted -# -# Example call to delete all files in /tmp older than 20 days and -# all files in /var/log older than 90 days: -# -# $ export CLEAN_OLD_FILES_OPTIONS='xxx|/tmp|20 yyy|/var/log|90' -# $ sh clean-old-files.sh +# $ sh clean-old-files.sh 20 mtime /tmp ################################################################## +AGE="$1" +MODE="$2" +LOCATION="$3" + +ACCEPTABLE_MODES='|mtime|ctime|atime|' + +if ! echo "$AGE" | grep -q '^[0-9][0-9]*$'; then + >&2 echo "AGE argument set to '${AGE}'. It must be an unsigned integer" + exit 1 +fi + +if [ "${ACCEPTABLE_MODES#*"|${MODE}|"}" = "${ACCEPTABLE_MODES}" ]; then + >&2 echo "MODE argument set to '${MODE}'. It must be one of 'mtime', 'ctime', or 'atime'" + exit 1 +fi + +if [ -z "${LOCATION}" ]; then + >&2 echo "LOCATION argument is blank or unset. It must refer to a path on disk." + exit 1 +fi -for opt in ${CLEAN_OLD_FILES_OPTIONS}; do - loc="$(echo $opt | cut -d\| -f 2)" - age="$(echo $opt | cut -d\| -f 3)" - echo "Removing files in ${loc} that have not been modified in ${age} days" - [ -n "$loc" ] && [ -n "$age" ] && find ${loc} -type f -mtime +"${age}" -print -delete -done +echo "Removing files in ${LOCATION} that have a ${MODE} value greater than ${AGE} days" +find "${LOCATION}" -type f "-${MODE}" +"${AGE}" -print -delete diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template deleted file mode 100644 index 3c3956436..000000000 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config.yml.template +++ /dev/null @@ -1,9 +0,0 @@ -- name: clean_old_files - comment: clean old files generated by the stack - schedule: '${SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'bash /clean-old-files.sh' - dockerargs: >- - --rm --name scheduler-job-clean_old_files - --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro - --env CLEAN_OLD_FILES_OPTIONS="${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS}" ${SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES} - image: '${SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE}' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template new file mode 100644 index 000000000..c60d61d5d --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template @@ -0,0 +1,9 @@ +- name: clean_old_files_finch + comment: clean old WPS output files generated by Finch + schedule: '${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' + command: 'bash /clean-old-files.sh "${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/finch' + dockerargs: >- + --rm --name scheduler-job-clean_old_files_finch + --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro + --volume "${COMPOSE_PROJECT_NAME}_wps_outputs:/wps_outputs:rw" + image: '${SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE}' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml new file mode 100644 index 000000000..43cf7218c --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml @@ -0,0 +1,5 @@ +services: + scheduler: + volumes: + - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_finch.yml:ro} +# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template new file mode 100644 index 000000000..d9ec53b31 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template @@ -0,0 +1,9 @@ +- name: clean_old_files_hummingbird + comment: clean old WPS output files generated by Hummingbird + schedule: '${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' + command: 'bash /clean-old-files.sh "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/hummingbird' + dockerargs: >- + --rm --name scheduler-job-clean_old_files_hummingbird + --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro + --volume "${COMPOSE_PROJECT_NAME}_wps_outputs:/wps_outputs:rw" + image: '${SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE}' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml new file mode 100644 index 000000000..d85ffeeee --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml @@ -0,0 +1,5 @@ +services: + scheduler: + volumes: + - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_hummingbird.yml:ro} +# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template new file mode 100644 index 000000000..097eff201 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template @@ -0,0 +1,9 @@ +- name: clean_old_files_raven + comment: clean old WPS output files generated by Raven + schedule: '${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' + command: 'bash /clean-old-files.sh "${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/raven' + dockerargs: >- + --rm --name scheduler-job-clean_old_files_raven + --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro + --volume "${COMPOSE_PROJECT_NAME}_wps_outputs:/wps_outputs:rw" + image: '${SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE}' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml new file mode 100644 index 000000000..ad4ced035 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml @@ -0,0 +1,5 @@ +services: + scheduler: + volumes: + - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_raven.yml:ro} +# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml deleted file mode 100644 index 327190ff3..000000000 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/scheduler/docker-compose-extra.yml +++ /dev/null @@ -1,4 +0,0 @@ -services: - scheduler: - volumes: - - ./optional-components/scheduler-job-clean_old_files/config.yml:/scheduler-job-configs/clean_old_files.yml:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template new file mode 100644 index 000000000..6f8763e74 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template @@ -0,0 +1,9 @@ +- name: clean_old_files_thredds + comment: clean old log files generated by Thredds + schedule: '${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' + command: 'bash /clean-old-files.sh "${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}" "${THREDDS_LOGS_DELETE_FILES_TIME_MODE}" /thredds' + dockerargs: >- + --rm --name scheduler-job-clean_old_files_thredds + --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro + --volume "thredds_persistence:/thredds:rw" + image: '${SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE}' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml new file mode 100644 index 000000000..b3c25ea74 --- /dev/null +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml @@ -0,0 +1,5 @@ +services: + scheduler: + volumes: + - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_thredds.yml:ro} +# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env index bd0537304..b5f23dc1d 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env @@ -1,24 +1,59 @@ -export SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE='${BASH_IMAGE}' +export SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER=alpine # alpine contains find with -ctime -mtime and -atime options (busybox based containers do not) +export SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION=3.2.1 +export SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE='${SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER}:${SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION}' -export SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 2 * * 0" # weekly on Sunday at 2:05 +if echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'scheduler[[:space:]]*$'; then + export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= + export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime + export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 + export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false + export __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC='$( [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/finch/config.yml" )' -export SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES='$(for opt in ${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS}; do printf " --volume %s:rw " "$(echo $opt | cut -d\| -f 1)"; done)' + export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= + export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime + export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 + export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false + export __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC='$( [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/hummingbird/config.yml" )' + + export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= + export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime + export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="15 4 * * 0" # weekly on Sunday at 4:15 + export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false + export __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC='$( [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/raven/config.yml" )' + + export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= + export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime + export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 + export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false + export __SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC='$( [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/thredds/config.yml" )' +fi export DELAYED_EVAL=" $DELAYED_EVAL SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE - SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS - SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES + __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC + __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC + __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC + __SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC " OPTIONAL_VARS=" $OPTIONAL_VARS - \$SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS - \$SCHEDULER_JOB_CLEAN_OLD_FILES_VOLUMES + \$FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS + \$FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE + \$FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY + \$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS + \$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE + \$HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY + \$RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS + \$RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE + \$RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY + \$THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS + \$THREDDS_LOGS_DELETE_FILES_TIME_MODE + \$THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY " VARS=" $VARS \$SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE - \$SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY " diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include index 0003e2fca..8b5520fdf 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include @@ -1,3 +1,23 @@ -if [ -z "$SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS" ]; then - log WARN 'The scheduler-job-clean_old_files component is enabled but no files are scheduled to be deleted. Please reconfigure this component or disable it.' +if echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'scheduler[[:space:]]*$'; then + _acceptable_modes='|mtime|ctime|atime|' + + if [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'finch[[:space:]]*$'; then + echo "$FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the finch file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"|${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the finch file cleaning job. This job will not run properly!" + fi + + if [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'hummingbird[[:space:]]*$'; then + echo "$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the hummingbird file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"|${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the hummingbird file cleaning job. This job will not run properly!" + fi + + if [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'raven[[:space:]]*$'; then + echo "$RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the raven file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"|${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the raven file cleaning job. This job will not run properly!" + fi + + if [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'thredds[[:space:]]*$'; then + echo "$THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the thredds file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"|${THREDDS_LOGS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "THREDDS_LOGS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${THREDDS_LOGS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the thredds file cleaning job. This job will not run properly!" + fi fi From 2d198e8dc4cdbf82965fc66386d32d58660a95ea Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:25:33 -0400 Subject: [PATCH 3/7] update docs, fix alpine version, fix command executable --- birdhouse/components/README.rst | 15 ++----- birdhouse/env.local.example | 43 +++++++++++++++---- .../config/finch/config.yml.template | 2 +- .../config/hummingbird/config.yml.template | 2 +- .../config/raven/config.yml.template | 2 +- .../config/thredds/config.yml.template | 2 +- .../scheduler-job-clean_old_files/default.env | 10 ++--- 7 files changed, 47 insertions(+), 29 deletions(-) diff --git a/birdhouse/components/README.rst b/birdhouse/components/README.rst index 0ee3efbcc..da243e581 100644 --- a/birdhouse/components/README.rst +++ b/birdhouse/components/README.rst @@ -61,18 +61,11 @@ component directory to the ``BIRDHOUSE_EXTRA_CONF_DIRS`` variable in your local * Automatically remove old files - * Removes THREDDS log files and WPS output files older than a specific number of days - - * In order to remove THREDDS log files the ``thredds`` component needs to be enabled. - - * Set the ``THREDDS_DELETE_FILES_OLDER_THAN_DAYS`` variable in the local environment file to an integer specifying - how old a THREDDS log file needs to be before it is deleted. - - * In order to remove WPS output files, at least one of the WPS components needs to be enabled. - - * Set the ``WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS`` variable in the local environment file to an integer specifying - how old a WPS output file needs to be before it is deleted. + * Removes files generated by other components that may accumulate over time and are not manage automatically by those components. + * Currently supports removing WPS output files from the ``finch``, ``raven``, and ``hummingbird`` components as well as log files + from the ``thredds`` component. + * component location: ``optional-components/scheduler-job-clean_old_files`` For additional configuration options for all these jobs see the ``env.local.example`` file diff --git a/birdhouse/env.local.example b/birdhouse/env.local.example index 726280a36..fdb2ed99f 100644 --- a/birdhouse/env.local.example +++ b/birdhouse/env.local.example @@ -206,15 +206,40 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}" # These variables configure the scheduler-job-clean_old_files component # -# Delete THREDDS log files older than X days (e.g. X=20): -#export THREDDS_DELETE_FILES_OLDER_THAN_DAYS=20 -# -# Delete WPS output files older than X days (e.g. X=90): -#export WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS=90 -# -# Set cron schedule for the clean old files job (how often the job runs). -# By default it runs weekly on Sunday at 2:05 am: -#export SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 2 * * 0" +# For all options below: +# - variables that end with SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED will enable the a clean_old_files job for the relevant component if set to 'true' +# - variables that end with DELETE_FILES_OLDER_THAN_DAYS set a number of days. Files older than this number of days will be deleted every time +# the scheduler-job-clean_old_files scheduler job runs. +# - variables that end with DELETE_FILES_TIME_MODE is used by the find command to calculate the age of a file: +# - atime: delete files that haven't been accessed in X days +# - mtime: delete files that haven't been modified in X days +# - ctime: delete files that were created more than X days ago +# - variables that end with SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY set the frequency at which the relevant clean old files job should be run. This +# value is a string that conforms to the cron schedule format. +# +# Delete old WPS output files generated by the finch WPS component +#export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime +#export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 +#export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +# +# Delete old WPS output files generated by the hummingbird WPS component +#export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime +#export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 +#export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +# +# Delete old WPS output files generated by the raven WPS component +#export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime +#export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 +#export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +# +# Delete old log files generated by the thredds component +#export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime +#export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 +#export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false ############################################################# # Proxy variables diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template index c60d61d5d..6915a03f0 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_finch comment: clean old WPS output files generated by Finch schedule: '${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'bash /clean-old-files.sh "${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/finch' + command: 'sh /clean-old-files.sh "${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/finch' dockerargs: >- --rm --name scheduler-job-clean_old_files_finch --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template index d9ec53b31..ccc47ec51 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_hummingbird comment: clean old WPS output files generated by Hummingbird schedule: '${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'bash /clean-old-files.sh "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/hummingbird' + command: 'sh /clean-old-files.sh "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/hummingbird' dockerargs: >- --rm --name scheduler-job-clean_old_files_hummingbird --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template index 097eff201..a4bd80e18 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_raven comment: clean old WPS output files generated by Raven schedule: '${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'bash /clean-old-files.sh "${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/raven' + command: 'sh /clean-old-files.sh "${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/raven' dockerargs: >- --rm --name scheduler-job-clean_old_files_raven --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template index 6f8763e74..2b19688ad 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_thredds comment: clean old log files generated by Thredds schedule: '${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'bash /clean-old-files.sh "${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}" "${THREDDS_LOGS_DELETE_FILES_TIME_MODE}" /thredds' + command: 'sh /clean-old-files.sh "${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}" "${THREDDS_LOGS_DELETE_FILES_TIME_MODE}" /thredds' dockerargs: >- --rm --name scheduler-job-clean_old_files_thredds --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env index b5f23dc1d..5276acd64 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env @@ -1,27 +1,27 @@ export SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER=alpine # alpine contains find with -ctime -mtime and -atime options (busybox based containers do not) -export SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION=3.2.1 +export SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION=3.21 export SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE='${SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER}:${SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION}' if echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'scheduler[[:space:]]*$'; then - export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= + export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false export __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC='$( [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/finch/config.yml" )' - export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= + export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false export __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC='$( [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/hummingbird/config.yml" )' - export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= + export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="15 4 * * 0" # weekly on Sunday at 4:15 export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false export __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC='$( [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/raven/config.yml" )' - export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= + export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false From 84b0ef6e7b24c8c8d58e2ca1863e5fefe04d4054 Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:32:33 -0400 Subject: [PATCH 4/7] cleanup old code and comments --- birdhouse/components/thredds/default.env | 6 ------ birdhouse/components/wps_outputs-volume/default.env | 6 ------ .../config/finch/docker-compose-extra.yml | 1 - .../config/hummingbird/docker-compose-extra.yml | 1 - .../config/raven/docker-compose-extra.yml | 1 - .../config/thredds/docker-compose-extra.yml | 1 - 6 files changed, 16 deletions(-) diff --git a/birdhouse/components/thredds/default.env b/birdhouse/components/thredds/default.env index 63b0a38f7..ff97522d2 100644 --- a/birdhouse/components/thredds/default.env +++ b/birdhouse/components/thredds/default.env @@ -38,12 +38,6 @@ export THREDDS_DATASET_DATASETSCAN_BODY=' ' -export THREDDS_DELETE_FILES_OLDER_THAN_DAYS= -export SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS=" - ${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS} - \$([ -n \"\${THREDDS_DELETE_FILES_OLDER_THAN_DAYS}\" ] && echo \"thredds_persistence:/thredds|/thredds/logs/threddsServlet.*.log|\${THREDDS_DELETE_FILES_OLDER_THAN_DAYS}\") -" - # add any new variables not already in 'VARS' or 'OPTIONAL_VARS' that must be replaced in templates here VARS=" $VARS diff --git a/birdhouse/components/wps_outputs-volume/default.env b/birdhouse/components/wps_outputs-volume/default.env index 2c39860ec..d9516ca65 100644 --- a/birdhouse/components/wps_outputs-volume/default.env +++ b/birdhouse/components/wps_outputs-volume/default.env @@ -3,12 +3,6 @@ OPTIONAL_VARS=" \$SECURE_DATA_PROXY_AUTH_INCLUDE " -export WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= -export SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS=" - ${SCHEDULER_JOB_CLEAN_OLD_FILES_OPTIONS} - \$([ -n \"\${WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}\" ] && echo \"\${COMPOSE_PROJECT_NAME:-birdhouse}_wps_outputs:/wps_outputs|/wps_outputs|\${WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}\") -" - # add any new variables not already in 'VARS' or 'OPTIONAL_VARS' that must be replaced in templates here # single quotes are important in below list to keep variable names intact until 'birdhouse-compose' parses them EXTRA_VARS=' diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml index 43cf7218c..fa8610e1c 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/docker-compose-extra.yml @@ -2,4 +2,3 @@ services: scheduler: volumes: - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_finch.yml:ro} -# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml index d85ffeeee..f9b06024c 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/docker-compose-extra.yml @@ -2,4 +2,3 @@ services: scheduler: volumes: - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_hummingbird.yml:ro} -# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml index ad4ced035..8e85addbd 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/docker-compose-extra.yml @@ -2,4 +2,3 @@ services: scheduler: volumes: - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_raven.yml:ro} -# TODO: mount an empty config file instead \ No newline at end of file diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml index b3c25ea74..61440f7e9 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/docker-compose-extra.yml @@ -2,4 +2,3 @@ services: scheduler: volumes: - ./optional-components/scheduler-job-clean_old_files/${__SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC:-blank.config.yml}:/scheduler-job-configs/clean_old_files_thredds.yml:ro} -# TODO: mount an empty config file instead \ No newline at end of file From 796766e16310b5c0cad0f8e015a856d8f60b80ba Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:35:58 -0400 Subject: [PATCH 5/7] update CHANGES --- CHANGES.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6ed61258b..caf2305e9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -101,9 +101,11 @@ - Introduce a scheduler job to delete old files that may accumulate over time Creates the `optional-component-clean_old_files` job that deletes old THREDDS log files and WPS output files. - To set the oldest file that will be kept for each of these options, set the `THREDDS_DELETE_FILES_OLDER_THAN_DAYS` - and/or the `WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS` variables in the local environment files (see - `env.local.example` or the `scheduler` documentation for details). + Allows individual cleanup jobs to be enabled for each of `raven`, `finch`, `hummingbird`, and `thredds` components. + Allows the user to configure how old a file must be before it is deleted (age in days) and how to calculate the age + of the file (time since last modified, time since last accessed, time since created). + + (see `env.local.example` or the `scheduler` documentation for details). [2.10.1](https://github.com/bird-house/birdhouse-deploy/tree/2.10.1) (2025-03-10) ------------------------------------------------------------------------------------------------------------------ From 1059ffe58d28203f7351ffc2fa853d6870e47d96 Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Wed, 26 Mar 2025 13:48:07 -0400 Subject: [PATCH 6/7] reconfigure warnings if dependent components are not enabled --- .../scheduler-job-clean_old_files/default.env | 42 +++++++++---------- .../pre-docker-compose-up.include | 38 +++++++++-------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env index 5276acd64..a9d81814a 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env @@ -2,31 +2,29 @@ export SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER=alpine # alpine contains find with - export SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION=3.21 export SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE='${SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER}:${SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION}' -if echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'scheduler[[:space:]]*$'; then - export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer - export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime - export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 - export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false - export __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC='$( [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/finch/config.yml" )' +export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime +export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 +export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC='$( [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/finch/config.yml" )' - export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer - export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime - export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 - export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false - export __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC='$( [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/hummingbird/config.yml" )' +export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime +export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 +export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC='$( [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/hummingbird/config.yml" )' - export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer - export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime - export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="15 4 * * 0" # weekly on Sunday at 4:15 - export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false - export __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC='$( [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/raven/config.yml" )' +export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime +export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="15 4 * * 0" # weekly on Sunday at 4:15 +export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC='$( [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/raven/config.yml" )' - export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer - export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime - export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 - export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false - export __SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC='$( [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/thredds/config.yml" )' -fi +export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime +export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 +export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC='$( [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/thredds/config.yml" )' export DELAYED_EVAL=" $DELAYED_EVAL diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include index 8b5520fdf..406d39530 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include @@ -1,23 +1,25 @@ -if echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'scheduler[[:space:]]*$'; then - _acceptable_modes='|mtime|ctime|atime|' +if ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'scheduler[[:space:]]*$'; then + log WARN 'The scheduler-job-clean_old_files component is enabled but the scheduler component is not. This WILL cause problems. Please disable the scheduler-job-clean_old_files component.' +fi - if [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'finch[[:space:]]*$'; then - echo "$FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the finch file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"|${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the finch file cleaning job. This job will not run properly!" - fi +_acceptable_modes='|mtime|ctime|atime|' - if [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'hummingbird[[:space:]]*$'; then - echo "$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the hummingbird file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"|${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the hummingbird file cleaning job. This job will not run properly!" - fi +if [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'finch[[:space:]]*$'; then + echo "$FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the finch file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the finch file cleaning job. This job will not run properly!" +fi - if [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'raven[[:space:]]*$'; then - echo "$RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the raven file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"|${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the raven file cleaning job. This job will not run properly!" - fi +if [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'hummingbird[[:space:]]*$'; then + echo "$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the hummingbird file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the hummingbird file cleaning job. This job will not run properly!" +fi + +if [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'raven[[:space:]]*$'; then + echo "$RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the raven file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the raven file cleaning job. This job will not run properly!" +fi - if [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -qv 'thredds[[:space:]]*$'; then - echo "$THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the thredds file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"|${THREDDS_LOGS_DELETE_FILES_TIME_MODE}|"}" = "${_acceptable_modes}" ] && log WARN "THREDDS_LOGS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${THREDDS_LOGS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the thredds file cleaning job. This job will not run properly!" - fi +if [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'thredds[[:space:]]*$'; then + echo "$THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the thredds file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${THREDDS_LOGS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "THREDDS_LOGS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${THREDDS_LOGS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the thredds file cleaning job. This job will not run properly!" fi From f36262fdc6c63526cf627c7615d1008946ed574d Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Fri, 12 Sep 2025 09:54:02 -0400 Subject: [PATCH 7/7] review comments --- CHANGES.md | 4 +- birdhouse/env.local.example | 32 +++++----- .../config/finch/config.yml.template | 4 +- .../config/hummingbird/config.yml.template | 4 +- .../config/raven/config.yml.template | 4 +- .../config/thredds/config.yml.template | 4 +- .../scheduler-job-clean_old_files/default.env | 64 +++++++++---------- .../pre-docker-compose-up.include | 24 +++---- 8 files changed, 70 insertions(+), 70 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 862f686cd..8d00b7b07 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,7 +17,7 @@ - Introduce a scheduler job to delete old files that may accumulate over time - Creates the `optional-component-clean_old_files` job that deletes old THREDDS log files and WPS output files. + Creates the `optional-component/clean_old_files` job that deletes old THREDDS log files and WPS output files. Allows individual cleanup jobs to be enabled for each of `raven`, `finch`, `hummingbird`, and `thredds` components. Allows the user to configure how old a file must be before it is deleted (age in days) and how to calculate the age of the file (time since last modified, time since last accessed, time since created). @@ -434,7 +434,7 @@ create additional jobs by adding them as custom components instead. What about... ? - - just schedule these jobs for a non-existant day linvalid arg that triggers usage messageike February 31st? + - just schedule these jobs for a non-existant day like February 31st? - Answer: This would technically work but is not obvious to the user. It is better to make this explicit. - just set the schedule to the `'#'` string? - Answer: This is a hack that would work based on the specific way that the docker-crontab image sets schedules. diff --git a/birdhouse/env.local.example b/birdhouse/env.local.example index 2a447079d..05d032293 100644 --- a/birdhouse/env.local.example +++ b/birdhouse/env.local.example @@ -218,28 +218,28 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}" # value is a string that conforms to the cron schedule format. # # Delete old WPS output files generated by the finch WPS component -#export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -#export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime -#export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 -#export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +#export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE=atime +#export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 +#export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_ENABLED=false # # Delete old WPS output files generated by the hummingbird WPS component -#export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -#export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime -#export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 -#export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +#export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE=atime +#export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 +#export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_ENABLED=false # # Delete old WPS output files generated by the raven WPS component -#export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -#export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime -#export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 -#export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +#export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE=atime +#export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 +#export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_ENABLED=false # # Delete old log files generated by the thredds component -#export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -#export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime -#export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 -#export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false +#export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +#export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE=mtime +#export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 +#export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_ENABLED=false # These variables configure the scheduler-job-backup component diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template index 6915a03f0..5c71c2308 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/finch/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_finch comment: clean old WPS output files generated by Finch - schedule: '${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'sh /clean-old-files.sh "${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/finch' + schedule: '${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_FREQUENCY}' + command: 'sh /clean-old-files.sh "${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS}" "${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE}" /wps_outputs/finch' dockerargs: >- --rm --name scheduler-job-clean_old_files_finch --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template index ccc47ec51..bd971b8df 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/hummingbird/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_hummingbird comment: clean old WPS output files generated by Hummingbird - schedule: '${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'sh /clean-old-files.sh "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/hummingbird' + schedule: '${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_FREQUENCY}' + command: 'sh /clean-old-files.sh "${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS}" "${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE}" /wps_outputs/hummingbird' dockerargs: >- --rm --name scheduler-job-clean_old_files_hummingbird --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template index a4bd80e18..22b1c85f8 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/raven/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_raven comment: clean old WPS output files generated by Raven - schedule: '${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'sh /clean-old-files.sh "${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}" "${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}" /wps_outputs/raven' + schedule: '${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_FREQUENCY}' + command: 'sh /clean-old-files.sh "${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS}" "${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE}" /wps_outputs/raven' dockerargs: >- --rm --name scheduler-job-clean_old_files_raven --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template index 2b19688ad..f7adb08c3 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/config/thredds/config.yml.template @@ -1,7 +1,7 @@ - name: clean_old_files_thredds comment: clean old log files generated by Thredds - schedule: '${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY}' - command: 'sh /clean-old-files.sh "${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}" "${THREDDS_LOGS_DELETE_FILES_TIME_MODE}" /thredds' + schedule: '${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_FREQUENCY}' + command: 'sh /clean-old-files.sh "${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS}" "${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE}" /thredds' dockerargs: >- --rm --name scheduler-job-clean_old_files_thredds --volume ${COMPOSE_DIR}/optional-components/scheduler-job-clean_old_files/clean-old-files.sh:/clean-old-files.sh:ro diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env index a9d81814a..9e0bde48b 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/default.env +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/default.env @@ -2,29 +2,29 @@ export SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER=alpine # alpine contains find with - export SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION=3.21 export SCHEDULER_JOB_CLEAN_OLD_FILES_IMAGE='${SCHEDULER_JOB_CLEAN_OLD_FILES_DOCKER}:${SCHEDULER_JOB_CLEAN_OLD_FILES_VERSION}' -export FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -export FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime -export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 -export FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false -export __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC='$( [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/finch/config.yml" )' +export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE=atime +export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_FREQUENCY="5 4 * * 0" # weekly on Sunday at 4:05 +export SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_CONFIG_LOC='$( [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_ENABLED}" = "true" ] && echo "config/finch/config.yml" )' -export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -export HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime -export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 -export HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false -export __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC='$( [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/hummingbird/config.yml" )' +export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE=atime +export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_FREQUENCY="10 4 * * 0" # weekly on Sunday at 4:10 +export SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_CONFIG_LOC='$( [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_ENABLED}" = "true" ] && echo "config/hummingbird/config.yml" )' -export RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -export RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE=atime -export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="15 4 * * 0" # weekly on Sunday at 4:15 -export RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false -export __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC='$( [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/raven/config.yml" )' +export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE=atime +export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_FREQUENCY="15 4 * * 0" # weekly on Sunday at 4:15 +export SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_CONFIG_LOC='$( [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_ENABLED}" = "true" ] && echo "config/raven/config.yml" )' -export THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer -export THREDDS_LOGS_DELETE_FILES_TIME_MODE=mtime -export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 -export THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED=false -export __SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC='$( [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && echo "config/thredds/config.yml" )' +export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS= # unset by default if this job is enabled this must be set to an integer +export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE=mtime +export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_FREQUENCY="20 4 * * 0" # weekly on Sunday at 4:20 +export SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_ENABLED=false +export __SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_CONFIG_LOC='$( [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_ENABLED}" = "true" ] && echo "config/thredds/config.yml" )' export DELAYED_EVAL=" $DELAYED_EVAL @@ -37,18 +37,18 @@ export DELAYED_EVAL=" OPTIONAL_VARS=" $OPTIONAL_VARS - \$FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS - \$FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE - \$FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY - \$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS - \$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE - \$HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY - \$RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS - \$RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE - \$RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY - \$THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS - \$THREDDS_LOGS_DELETE_FILES_TIME_MODE - \$THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_FREQUENCY + \$SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE + \$SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_FREQUENCY + \$SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE + \$SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_FREQUENCY + \$SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE + \$SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_FREQUENCY + \$SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS + \$SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE + \$SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_FREQUENCY " VARS=" diff --git a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include index 406d39530..5194d5cd3 100644 --- a/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include +++ b/birdhouse/optional-components/scheduler-job-clean_old_files/pre-docker-compose-up.include @@ -4,22 +4,22 @@ fi _acceptable_modes='|mtime|ctime|atime|' -if [ "${FINCH_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'finch[[:space:]]*$'; then - echo "$FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${FINCH_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the finch file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${FINCH_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the finch file cleaning job. This job will not run properly!" +if [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'finch[[:space:]]*$'; then + echo "$SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS variable must be an integer not '${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the finch file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${SCHEDULER_JOB_CLEAN_OLD_FILES_FINCH_TIME_MODE}'. Please set this variable to a valid option or disable the finch file cleaning job. This job will not run properly!" fi -if [ "${HUMMINGBIRD_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'hummingbird[[:space:]]*$'; then - echo "$HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the hummingbird file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${HUMMINGBIRD_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the hummingbird file cleaning job. This job will not run properly!" +if [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'hummingbird[[:space:]]*$'; then + echo "$SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS variable must be an integer not '${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the hummingbird file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${SCHEDULER_JOB_CLEAN_OLD_FILES_HUMMINGBIRD_TIME_MODE}'. Please set this variable to a valid option or disable the hummingbird file cleaning job. This job will not run properly!" fi -if [ "${RAVEN_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'raven[[:space:]]*$'; then - echo "$RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the raven file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${RAVEN_WPS_OUTPUTS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the raven file cleaning job. This job will not run properly!" +if [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'raven[[:space:]]*$'; then + echo "$SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS variable must be an integer not '${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the raven file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${SCHEDULER_JOB_CLEAN_OLD_FILES_RAVEN_TIME_MODE}'. Please set this variable to a valid option or disable the raven file cleaning job. This job will not run properly!" fi -if [ "${THREDDS_SCHEDULER_JOB_CLEAN_OLD_FILES_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'thredds[[:space:]]*$'; then - echo "$THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS variable must be an integer not '${THREDDS_LOGS_DELETE_FILES_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the thredds file cleaning job. This job will not run properly!" - [ "${_acceptable_modes#*"${THREDDS_LOGS_DELETE_FILES_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "THREDDS_LOGS_DELETE_FILES_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${THREDDS_LOGS_DELETE_FILES_TIME_MODE}'. Please set this variable to a valid option or disable the thredds file cleaning job. This job will not run properly!" +if [ "${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_ENABLED}" = "true" ] && ! echo "${BIRDHOUSE_EXTRA_CONF_DIRS}" | grep -q 'thredds[[:space:]]*$'; then + echo "$SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS" | grep -q '^[0-9][0-9]*$' || log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS variable must be an integer not '${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_OLDER_THAN_DAYS}'. Please set this variable to an integer or disable the thredds file cleaning job. This job will not run properly!" + [ "${_acceptable_modes#*"${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE}"}" = "${_acceptable_modes}" ] && log WARN "SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE variable must be one of 'mtime', 'atime', or 'ctime' not '${SCHEDULER_JOB_CLEAN_OLD_FILES_THREDDS_TIME_MODE}'. Please set this variable to a valid option or disable the thredds file cleaning job. This job will not run properly!" fi