Skip to content

Commit ef9861f

Browse files
committed
add settings to tune kvstore ready wait times, add more messages to report timeouts, add support to configure these by tags
1 parent 93b7ee2 commit ef9861f

File tree

2 files changed

+46
-7
lines changed

2 files changed

+46
-7
lines changed

splunkconf-backup/bin/splunkconf-backup.sh

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,9 @@ exec > /tmp/splunkconf-backup-debug.log 2>&1
133133
# 20240629 replace direct var inclusion with loading function logic
134134
# 20230702 relax check for tags to work better when only some tags are set
135135
# 20240703 add more tags
136+
# 20241008 add settings to tune kvstore ready wait times, add more messages to report timeouts, add support to configure these by tags
136137

137-
VERSION="20240703a"
138+
VERSION="20241008a"
138139

139140
###### BEGIN default parameters
140141
# dont change here, use the configuration file to override them
@@ -280,6 +281,15 @@ BACKUPSCRIPTS=1
280281

281282

282283
# KVSTORE Backup options
284+
285+
# how much we wait at start checking if kvstore is ready (because splunkd may not have finished starting kvstore)
286+
# This is the number of 10s loop to wait
287+
KVSTOREREADYINIT=100
288+
# how much we wait at kvdump backup time checking if kvstore is back to ready (ie finished to backup)
289+
# This is the number of 10s loop to wait
290+
KVSTOREREADYBACKUP=100
291+
292+
283293
# stop splunk for kvstore backup (that can be a bad idea if you have cluster and stop all instances at same time or whitout maintenance mode)
284294
# risk is that data could be corrupted if something is written to kvstore while we do the backup
285295
#RESTARTFORKVBACKUP=1
@@ -928,6 +938,20 @@ if [ -z ${splunks3backupbucket+x} ]; then
928938
fi
929939

930940

941+
if [ -z ${splunkkvstorereadyinit+x} ]; then
942+
debug_log "tag splunkkvstorereadyinit not set, using value ${KVSTOREREADYINIT} from configuration files"
943+
else
944+
KVSTOREREADYINIT=${splunkkvstorereadyinit}
945+
debug_log "setting KVSTOREREADYINIT=${KVSTOREREADYINIT} via tags"
946+
fi
947+
948+
if [ -z ${splunkkvstorereadybackup+x} ]; then
949+
debug_log "tag splunkkvstorereadybackup not set, using value ${KVSTOREREADYBACKUP} from configuration files"
950+
else
951+
KVSTOREREADYBACKUP=${splunkkvstorereadybackup}
952+
debug_log "setting KVSTOREREADYBACKUP=${KVSTOREREADYBACKUP} via tags"
953+
fi
954+
931955
if [ -z ${splunks3endpointurl+x} ]; then
932956
debug_log "tag splunks3endpointurl not set, using value ${REMOTES3ENDPOINTURL} from configuration files"
933957
else
@@ -1386,7 +1410,8 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
13861410
KVARCHIVE="backupconfsplunk-kvdump-${TODAY}"
13871411
MESS1="MGMTURL=${MGMTURL} KVARCHIVE=${KVARCHIVE}";
13881412
debug_log "pre backup : checking in case kvstore is not ready like initialization at start"
1389-
COUNTER=50
1413+
COUNTER=${KVSTOREREADYINIT}
1414+
COUNTERMAX=${KVSTOREREADYINIT}
13901415
RES=""
13911416
RES2=""
13921417
# wait a bit (up to 20*10= 200s) for backup to complete, especially for big kvstore/busy env (io)
@@ -1401,24 +1426,30 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
14011426
RES2=""
14021427
fi
14031428
#echo_log "RES=$RES"
1404-
debug_log "COUNTER=$COUNTER $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=prebackup RES=$RES RESREADY=$RESREADY RES2=$RES"
1429+
debug_log "COUNTER=$COUNTER (max=${COUNTERMAX}) $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=prebackup RES=$RES RESREADY=$RESREADY RES2=$RES"
14051430
let COUNTER-=1
14061431
sleep 10
14071432
done
1433+
if [[ -z "$RES" ]]; then
1434+
warn_log "COUNTER=$COUNTER (max=${COUNTERMAX}) $MESSVER $MESS1 type=$TYPE object=$kvbackupmode result=failure dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} ATTENTION : we didnt get ready status ! Please investigate or tune up KVSTOREREADYINIT to wait more"
1435+
else
1436+
debug_log "OK: KVSTORE REady state before launching backup"
1437+
fi
14081438
# here we try to start backup anyway but if the status was not ready , something is probably wrong
14091439
START=$(($(date +%s%N)));
14101440
debug_log "launching kvdump backup via REST API"
14111441
RES=`curl --silent -k https://${MGMTURL}/services/kvstore/backup/create -X post --header "Authorization: Splunk ${sessionkey}" -d"archiveName=${KVARCHIVE}"`
14121442

14131443
#echo_log "KVDUMP CREATE RES=$RES"
1414-
COUNTER=50
1444+
COUNTER=${KVSTOREREADYBACKUP}
1445+
COUNTERMAX=${KVSTOREREADYBACKUP}
14151446
RES=""
14161447
# wait a bit (up to 20*10= 200s) for backup to complete, especially for big kvstore/busy env (io)
14171448
# increase here if needed (ie take more time !)
14181449
until [[ $COUNTER -lt 1 || -n "$RES" ]]; do
14191450
RES=`curl --silent -k https://${MGMTURL}/services/kvstore/status --header "Authorization: Splunk ${sessionkey}" | grep backupRestoreStatus | grep -i Ready`
14201451
#echo_log "RES=$RES"
1421-
debug_log "COUNTER=$COUNTER $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=postbackup"
1452+
debug_log "COUNTER=$COUNTER (max=${COUNTERMAX}) $MESSVER $MESS1 type=$TYPE object=${kvbackupmode} action=backup result=running info=postbackup"
14221453
let COUNTER-=1
14231454
sleep 10
14241455
done
@@ -1435,11 +1466,11 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
14351466
FILESIZE=0
14361467
fi
14371468
if [[ -z "$RES" ]]; then
1438-
warn_log "COUNTER=$COUNTER $MESSVER $MESS1 type=$TYPE object=$kvbackupmode result=failure dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} ATTENTION : we didnt get ready status ! Either backup kvstore (kvdump) has failed or takes too long"
1469+
warn_log "COUNTER=$COUNTER (max=${COUNTERMAX}) $MESSVER $MESS1 type=$TYPE object=$kvbackupmode result=failure dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} ATTENTION : we didnt get ready status ! Either backup kvstore (kvdump) has failed or takes too long.Please investigate or tune up KVSTOREREADYBACKUP to wait more if you see backup completed but wasn't copied to remote storage"
14391470
kvdump_done="-1"
14401471
else
14411472
kvdump_done="1"
1442-
echo_log "COUNTER=$COUNTER $MESSVER $MESS1 action=backup type=$TYPE object=$kvbackupmode result=success dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} kvstore online (kvdump) backup complete"
1473+
echo_log "COUNTER=$COUNTER (max=${COUNTERMAX}) $MESSVER $MESS1 action=backup type=$TYPE object=$kvbackupmode result=success dest=${LFICKVDUMP} durationms=${DURATION} size=${FILESIZE} kvstore online (kvdump) backup complete"
14431474
fi
14441475
elif [[ "$MODE" == "0" ]] || [[ "$MODE" == "kvstore" ]] || [[ "$MODE" == "kvauto" ]]; then
14451476
if [[ "$MODE" == "0" ]] || [[ "$MODE" == "kvauto" ]]; then

splunkconf-backup/default/splunkconf-backup.conf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,14 @@ BACKUPSTATE=1
122122
# set to backup scripts
123123
BACKUPSCRIPTS=1
124124

125+
#KVSTORE options
126+
127+
# how much we wait at start checking if kvstore is ready (because splunkd may not have finished starting kvstore)
128+
# This is the number of 10s loop to wait
129+
KVSTOREREADYINIT=100
130+
# how much we wait at kvdump backup time checking if kvstore is back to ready (ie finished to backup)
131+
# This is the number of 10s loop to wait
132+
KVSTOREREADYBACKUP=100
125133

126134

127135
#minfreespace

0 commit comments

Comments
 (0)