@@ -133,8 +133,9 @@ exec > /tmp/splunkconf-backup-debug.log 2>&1
133133# 20240629 replace direct var inclusion with loading function logic
134134# 20230702 relax check for tags to work better when only some tags are set
135135# 20240703 add more tags
136+ # 20241008 add settings to tune kvstore ready wait times, add more messages to report timeouts, add support to configure these by tags
136137
137- VERSION=" 20240703a "
138+ VERSION=" 20241008a "
138139
139140# ##### BEGIN default parameters
140141# dont change here, use the configuration file to override them
@@ -280,6 +281,15 @@ BACKUPSCRIPTS=1
280281
281282
282283# KVSTORE Backup options
284+
285+ # how much we wait at start checking if kvstore is ready (because splunkd may not have finished starting kvstore)
286+ # This is the number of 10s loop to wait
287+ KVSTOREREADYINIT=100
288+ # how much we wait at kvdump backup time checking if kvstore is back to ready (ie finished to backup)
289+ # This is the number of 10s loop to wait
290+ KVSTOREREADYBACKUP=100
291+
292+
283293# stop splunk for kvstore backup (that can be a bad idea if you have cluster and stop all instances at same time or whitout maintenance mode)
284294# risk is that data could be corrupted if something is written to kvstore while we do the backup
285295# RESTARTFORKVBACKUP=1
@@ -928,6 +938,20 @@ if [ -z ${splunks3backupbucket+x} ]; then
928938fi
929939
930940
941+ if [ -z ${splunkkvstorereadyinit+x} ]; then
942+ debug_log " tag splunkkvstorereadyinit not set, using value ${KVSTOREREADYINIT} from configuration files"
943+ else
944+ KVSTOREREADYINIT=${splunkkvstorereadyinit}
945+ debug_log " setting KVSTOREREADYINIT=${KVSTOREREADYINIT} via tags"
946+ fi
947+
948+ if [ -z ${splunkkvstorereadybackup+x} ]; then
949+ debug_log " tag splunkkvstorereadybackup not set, using value ${KVSTOREREADYBACKUP} from configuration files"
950+ else
951+ KVSTOREREADYBACKUP=${splunkkvstorereadybackup}
952+ debug_log " setting KVSTOREREADYBACKUP=${KVSTOREREADYBACKUP} via tags"
953+ fi
954+
931955if [ -z ${splunks3endpointurl+x} ]; then
932956 debug_log " tag splunks3endpointurl not set, using value ${REMOTES3ENDPOINTURL} from configuration files"
933957else
@@ -1386,7 +1410,8 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
13861410 KVARCHIVE=" backupconfsplunk-kvdump-${TODAY} "
13871411 MESS1=" MGMTURL=${MGMTURL} KVARCHIVE=${KVARCHIVE} " ;
13881412 debug_log " pre backup : checking in case kvstore is not ready like initialization at start"
1389- COUNTER=50
1413+ COUNTER=${KVSTOREREADYINIT}
1414+ COUNTERMAX=${KVSTOREREADYINIT}
13901415 RES=" "
13911416 RES2=" "
13921417 # wait a bit (up to 20*10= 200s) for backup to complete, especially for big kvstore/busy env (io)
@@ -1401,24 +1426,30 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
14011426 RES2=" "
14021427 fi
14031428 # echo_log "RES=$RES"
1404- debug_log " COUNTER=$COUNTER $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=prebackup RES=$RES RESREADY=$RESREADY RES2=$RES "
1429+ debug_log " COUNTER=$COUNTER (max= ${COUNTERMAX} ) $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=prebackup RES=$RES RESREADY=$RESREADY RES2=$RES "
14051430 let COUNTER-=1
14061431 sleep 10
14071432 done
1433+ if [[ -z " $RES " ]]; then
1434+ warn_log " COUNTER=$COUNTER (max=${COUNTERMAX} ) $MESSVER $MESS1 type=$TYPE object=$kvbackupmode result=failure dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} ATTENTION : we didnt get ready status ! Please investigate or tune up KVSTOREREADYINIT to wait more"
1435+ else
1436+ debug_log " OK: KVSTORE REady state before launching backup"
1437+ fi
14081438 # here we try to start backup anyway but if the status was not ready , something is probably wrong
14091439 START=$(( $(date +% s% N)) );
14101440 debug_log " launching kvdump backup via REST API"
14111441 RES=` curl --silent -k https://${MGMTURL} /services/kvstore/backup/create -X post --header " Authorization: Splunk ${sessionkey} " -d" archiveName=${KVARCHIVE} " `
14121442
14131443 # echo_log "KVDUMP CREATE RES=$RES"
1414- COUNTER=50
1444+ COUNTER=${KVSTOREREADYBACKUP}
1445+ COUNTERMAX=${KVSTOREREADYBACKUP}
14151446 RES=" "
14161447 # wait a bit (up to 20*10= 200s) for backup to complete, especially for big kvstore/busy env (io)
14171448 # increase here if needed (ie take more time !)
14181449 until [[ $COUNTER -lt 1 || -n " $RES " ]]; do
14191450 RES=` curl --silent -k https://${MGMTURL} /services/kvstore/status --header " Authorization: Splunk ${sessionkey} " | grep backupRestoreStatus | grep -i Ready`
14201451 # echo_log "RES=$RES"
1421- debug_log " COUNTER=$COUNTER $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=postbackup"
1452+ debug_log " COUNTER=$COUNTER (max= ${COUNTERMAX} ) $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=postbackup"
14221453 let COUNTER-=1
14231454 sleep 10
14241455 done
@@ -1435,11 +1466,11 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
14351466 FILESIZE=0
14361467 fi
14371468 if [[ -z " $RES " ]]; then
1438- warn_log " COUNTER=$COUNTER $ MESSVER $MESS1 type=$TYPE object=$kvbackupmode result=failure dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} ATTENTION : we didnt get ready status ! Either backup kvstore (kvdump) has failed or takes too long"
1469+ warn_log " COUNTER=$COUNTER (max= ${COUNTERMAX} ) $ MESSVER $MESS1 type=$TYPE object=$kvbackupmode result=failure dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} ATTENTION : we didnt get ready status ! Either backup kvstore (kvdump) has failed or takes too long.Please investigate or tune up KVSTOREREADYBACKUP to wait more if you see backup completed but wasn't copied to remote storage "
14391470 kvdump_done=" -1"
14401471 else
14411472 kvdump_done=" 1"
1442- echo_log " COUNTER=$COUNTER $MESSVER $MESS1 action=backup type=$TYPE object=$kvbackupmode result=success dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} kvstore online (kvdump) backup complete"
1473+ echo_log " COUNTER=$COUNTER (max= ${COUNTERMAX} ) $MESSVER $MESS1 action=backup type=$TYPE object=$kvbackupmode result=success dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} kvstore online (kvdump) backup complete"
14431474 fi
14441475 elif [[ " $MODE " == " 0" ]] || [[ " $MODE " == " kvstore" ]] || [[ " $MODE " == " kvauto" ]]; then
14451476 if [[ " $MODE " == " 0" ]] || [[ " $MODE " == " kvauto" ]]; then
0 commit comments