Skip to content

Commit eddd520

Browse files
author
Avinash Singh
committed
deletion is handled acccording to pattern mentioned in config accross directories
1 parent 693b42d commit eddd520

File tree

6 files changed

+146
-52
lines changed

6 files changed

+146
-52
lines changed

config/config.yaml.example

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ vehicle_id: "vehicle-CN-001"
2020
## ============================================
2121
# LOG DIRECTORIES TO MONITOR
2222
# ============================================
23+
# ⚠️ CRITICAL: When monitoring /var/log/syslog, you MUST use pattern "syslog.[1-9]*"
24+
# to avoid uploading the active syslog file. Uploading the active file creates
25+
# an infinite loop where the service continuously re-uploads the file it just wrote to.
26+
#
2327
# Four log sources are monitored with explicit source names:
2428
# 1. Terminal logs: terminal/ in S3
2529
# 2. ROS logs: ros/ in S3
@@ -37,7 +41,7 @@ vehicle_id: "vehicle-CN-001"
3741
# - If pattern is omitted, ALL files in the directory are uploaded
3842
# - Supports wildcards: * (any characters), ? (single character)
3943
# - Examples:
40-
# pattern: "syslog*" → Matches: syslog, syslog.1, syslog.2.gz
44+
# pattern: "syslog.[1-9]*" → Matches: syslog.1, syslog.2.gz (skips active syslog)
4145
# pattern: "*.log" → Matches: error.log, debug.log
4246
# pattern: "test_*.mcap" → Matches: test_2024.mcap, test_run1.mcap
4347
#
@@ -67,7 +71,8 @@ log_directories:
6771

6872
- path: /var/log
6973
source: syslog
70-
pattern: "syslog.[1-9]*" # Only rotated files, NOT active syslog1, etc
74+
pattern: "syslog.[1-9]*" # CRITICAL: Only rotated files (syslog.1, syslog.2.gz)
75+
# Skips /var/log/syslog (active file - would cause infinite loop!)
7176
recursive: false # Do NOT monitor subdirectories (system logs have many folders)
7277

7378
- path: ${HOME}/ros2_ws/log

scripts/deployment/health_check.sh

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,14 @@ fi
188188
print_section "S3 Connectivity"
189189

190190
if command -v aws &> /dev/null; then
191+
# Detect if running with sudo and use actual user's AWS credentials
192+
if [ -n "$SUDO_USER" ] && [ "$SUDO_USER" != "root" ]; then
193+
# Running with sudo - use actual user's home for credentials
194+
REAL_HOME=$(eval echo ~$SUDO_USER)
195+
export AWS_SHARED_CREDENTIALS_FILE="$REAL_HOME/.aws/credentials"
196+
export AWS_CONFIG_FILE="$REAL_HOME/.aws/config"
197+
fi
198+
191199
if [ -n "$AWS_PROFILE" ]; then
192200
AWS_CMD="aws --profile $AWS_PROFILE --region $AWS_REGION"
193201
else
@@ -197,35 +205,43 @@ if command -v aws &> /dev/null; then
197205
# Check recent uploads
198206
info "Checking recent uploads..."
199207

200-
RECENT_UPLOADS=$($AWS_CMD s3 ls "s3://$S3_BUCKET/$VEHICLE_ID/" --recursive 2>/dev/null | tail -10)
201-
202-
if [ -n "$RECENT_UPLOADS" ]; then
203-
check_pass "S3 bucket accessible"
204-
205-
TOTAL_FILES=$($AWS_CMD s3 ls "s3://$S3_BUCKET/$VEHICLE_ID/" --recursive 2>/dev/null | wc -l)
206-
check_pass "Total files in S3: $TOTAL_FILES"
207-
208-
echo ""
209-
echo -e "${BOLD}Latest Uploads:${NC}"
210-
echo "$RECENT_UPLOADS" | tail -5 | while read -r line; do
211-
FILE_DATE=$(echo "$line" | awk '{print $1, $2}')
212-
FILE_SIZE=$(echo "$line" | awk '{print $3}')
213-
FILE_NAME=$(echo "$line" | awk '{print $4}' | rev | cut -d'/' -f1 | rev)
214-
215-
# Convert size to human readable
216-
if [ "$FILE_SIZE" -gt 1048576 ]; then
217-
FILE_SIZE_H="$(echo "scale=1; $FILE_SIZE / 1048576" | bc) MB"
218-
elif [ "$FILE_SIZE" -gt 1024 ]; then
219-
FILE_SIZE_H="$(echo "scale=1; $FILE_SIZE / 1024" | bc) KB"
220-
else
221-
FILE_SIZE_H="${FILE_SIZE} B"
222-
fi
223-
224-
echo -e " ${CYAN}$FILE_DATE${NC} - $FILE_NAME (${FILE_SIZE_H})"
225-
done
208+
# Test AWS credentials first
209+
AWS_TEST_OUTPUT=$($AWS_CMD sts get-caller-identity 2>&1)
210+
if [ $? -ne 0 ]; then
211+
check_warn "Cannot access AWS (credentials issue)"
212+
info "Error: $(echo "$AWS_TEST_OUTPUT" | head -1)"
213+
info "Make sure AWS credentials are configured for profile: $AWS_PROFILE"
226214
else
227-
check_warn "No files found in S3 yet"
228-
info "Files will appear after first upload"
215+
RECENT_UPLOADS=$($AWS_CMD s3 ls "s3://$S3_BUCKET/$VEHICLE_ID/" --recursive 2>/dev/null | tail -10)
216+
217+
if [ -n "$RECENT_UPLOADS" ]; then
218+
check_pass "S3 bucket accessible"
219+
220+
TOTAL_FILES=$($AWS_CMD s3 ls "s3://$S3_BUCKET/$VEHICLE_ID/" --recursive 2>/dev/null | wc -l)
221+
check_pass "Total files in S3: $TOTAL_FILES"
222+
223+
echo ""
224+
echo -e "${BOLD}Latest Uploads:${NC}"
225+
echo "$RECENT_UPLOADS" | tail -5 | while read -r line; do
226+
FILE_DATE=$(echo "$line" | awk '{print $1, $2}')
227+
FILE_SIZE=$(echo "$line" | awk '{print $3}')
228+
FILE_NAME=$(echo "$line" | awk '{print $4}' | rev | cut -d'/' -f1 | rev)
229+
230+
# Convert size to human readable
231+
if [ "$FILE_SIZE" -gt 1048576 ]; then
232+
FILE_SIZE_H="$(echo "scale=1; $FILE_SIZE / 1048576" | bc) MB"
233+
elif [ "$FILE_SIZE" -gt 1024 ]; then
234+
FILE_SIZE_H="$(echo "scale=1; $FILE_SIZE / 1024" | bc) KB"
235+
else
236+
FILE_SIZE_H="${FILE_SIZE} B"
237+
fi
238+
239+
echo -e " ${CYAN}$FILE_DATE${NC} - $FILE_NAME (${FILE_SIZE_H})"
240+
done
241+
else
242+
check_warn "No files found in S3 yet"
243+
info "Files will appear after first upload"
244+
fi
229245
fi
230246
else
231247
check_warn "AWS CLI not available for S3 verification"

scripts/deployment/verify_deployment.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,9 @@ if [ -z "$LOG_DIRS" ]; then
240240
check_warn "No log directories configured"
241241
else
242242
for dir in $LOG_DIRS; do
243-
# Replace USER placeholder
243+
# Replace USER placeholder and expand ${HOME}
244244
dir="${dir//USER/$USER}"
245+
dir="${dir//\$\{HOME\}/$HOME}"
245246

246247
if [ -d "$dir" ]; then
247248
check_pass "Log directory exists: $dir"

src/disk_manager.py

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import shutil
88
import logging
99
import time
10+
import fnmatch
1011
from pathlib import Path
1112
from typing import List, Tuple, Dict
1213
import os
@@ -53,41 +54,49 @@ class DiskManager:
5354
uploaded_files (dict): Files safe to delete with deletion time (NEW v2.0)
5455
"""
5556

56-
def __init__(self,
57+
def __init__(self,
5758
log_directories: List[str],
5859
reserved_gb: float = 70.0,
5960
warning_threshold: float = 0.90,
60-
critical_threshold: float = 0.95):
61+
critical_threshold: float = 0.95,
62+
directory_configs: Dict[str, Dict] = None):
6163
"""
6264
Initialize disk manager.
63-
65+
6466
Args:
6567
log_directories: Directories to monitor and clean
6668
reserved_gb: Minimum free space to maintain (GB)
6769
warning_threshold: Disk usage % to warn at (0-1, e.g. 0.90 = 90%)
6870
critical_threshold: Disk usage % to force cleanup (0-1, e.g. 0.95 = 95%)
69-
71+
directory_configs: Optional dict mapping directory paths to their configs
72+
Format: {'/var/log': {'pattern': 'syslog.[1-9]*', 'recursive': False}}
73+
7074
Note:
7175
Thresholds are disk usage percentages, not free space percentages
7276
"""
7377
self.log_directories = [Path(d) for d in log_directories]
7478
self.reserved_bytes = int(reserved_gb * 1024 * 1024 * 1024)
7579
self.warning_threshold = warning_threshold
7680
self.critical_threshold = critical_threshold
77-
81+
82+
# Store directory configurations (pattern, recursive) for deletion filtering
83+
self.directory_configs = directory_configs or {}
84+
7885
# Track uploaded files with deletion time (NEW v2.0)
7986
# Format: {filepath: delete_after_timestamp}
8087
# If keep_days=0, timestamp is 0 (delete immediately)
8188
# If keep_days=14, timestamp is upload_time + 14 days
8289
self.uploaded_files: Dict[str, float] = {}
83-
90+
8491
# Callback for registry cleanup (set by main.py)
8592
self._on_file_deleted_callback = None
86-
93+
8794
logger.info("Initialized")
8895
logger.info(f"Reserved space: {reserved_gb} GB")
8996
logger.info(f"Warning threshold: {warning_threshold * 100}%")
9097
logger.info(f"Critical threshold: {critical_threshold * 100}%")
98+
if self.directory_configs:
99+
logger.info(f"Directory configs loaded for {len(self.directory_configs)} directories")
91100

92101
def mark_uploaded(self, filepath: str, keep_until_days: int = 0):
93102
"""
@@ -115,7 +124,46 @@ def mark_uploaded(self, filepath: str, keep_until_days: int = 0):
115124
delete_after = time.time() + (keep_until_days * SECONDS_PER_DAY)
116125

117126
self.uploaded_files[abs_path] = delete_after
118-
127+
128+
def _matches_pattern(self, file_path: Path) -> bool:
129+
"""
130+
Check if file matches the configured pattern for its parent directory.
131+
132+
Args:
133+
file_path: Path to the file to check
134+
135+
Returns:
136+
bool: True if file matches pattern (or no pattern configured), False otherwise
137+
"""
138+
# Find which monitored directory this file belongs to
139+
for log_dir in self.log_directories:
140+
try:
141+
# Check if file is under this log directory
142+
file_path.relative_to(log_dir)
143+
144+
# Found the parent directory, check pattern
145+
dir_str = str(log_dir.resolve())
146+
config = self.directory_configs.get(dir_str, {})
147+
pattern = config.get('pattern')
148+
149+
if pattern is None:
150+
# No pattern configured - accept all files
151+
logger.debug(f"Deletion pattern check: {file_path.name} - no pattern, accepting")
152+
return True
153+
else:
154+
# Check if filename matches pattern
155+
match_result = fnmatch.fnmatch(file_path.name, pattern)
156+
logger.debug(f"Deletion pattern check: {file_path.name} vs '{pattern}' => {match_result}")
157+
return match_result
158+
159+
except ValueError:
160+
# file_path is not relative to this log_dir, continue
161+
continue
162+
163+
# File is not in any monitored directory - should not happen, but be safe
164+
logger.warning(f"File {file_path} not in any monitored directory, skipping deletion")
165+
return False
166+
119167
def get_disk_usage(self, path: str = "/") -> Tuple[float, int, int]:
120168
"""Get disk usage statistics (returns: usage_percent, used_bytes, free_bytes)."""
121169
stat = shutil.disk_usage(path)
@@ -227,16 +275,21 @@ def cleanup_by_age(self, max_age_days: int) -> int:
227275

228276
for file_path in directory.rglob('*'):
229277
if file_path.is_file() and not file_path.name.startswith('.'):
278+
# NEW: Check if file matches the upload pattern before deletion
279+
if not self._matches_pattern(file_path):
280+
logger.debug(f"Skipping {file_path.name} - doesn't match upload pattern")
281+
continue
282+
230283
try:
231284
mtime = file_path.stat().st_mtime
232-
285+
233286
if mtime < cutoff_time:
234287
size = file_path.stat().st_size
235288
age_days = (time.time() - mtime) / 86400
236-
289+
237290
logger.info(f"Deleting old file: {file_path.name} "
238291
f"({age_days:.1f} days old, {size / (1024**2):.1f} MB)")
239-
292+
240293
file_path.unlink()
241294
deleted_count += 1
242295
freed_bytes += size
@@ -338,6 +391,11 @@ def emergency_cleanup_all_files(self, target_free_gb: float = None) -> int:
338391

339392
for file_path in directory.rglob('*'):
340393
if file_path.is_file() and not file_path.name.startswith('.'):
394+
# NEW: Check if file matches the upload pattern before deletion
395+
if not self._matches_pattern(file_path):
396+
logger.debug(f"EMERGENCY: Skipping {file_path.name} - doesn't match upload pattern")
397+
continue
398+
341399
try:
342400
mtime = file_path.stat().st_mtime
343401
size = file_path.stat().st_size

src/main.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,22 @@ def __init__(self, config_path: str):
9393
log_directories=log_dir_configs
9494
)
9595

96+
# Build directory_configs for disk manager (pattern-aware deletion)
97+
directory_configs = {}
98+
if is_new_format:
99+
for item in log_dir_configs:
100+
resolved_path = str(Path(item['path']).expanduser().resolve())
101+
directory_configs[resolved_path] = {
102+
'pattern': item.get('pattern'),
103+
'recursive': item.get('recursive', True)
104+
}
105+
96106
self.disk_manager = DiskManager(
97107
log_directories=monitor_paths,
98108
reserved_gb=self.config.get('disk.reserved_gb'),
99109
warning_threshold=self.config.get('disk.warning_threshold', 0.90),
100-
critical_threshold=self.config.get('disk.critical_threshold', 0.95)
110+
critical_threshold=self.config.get('disk.critical_threshold', 0.95),
111+
directory_configs=directory_configs if is_new_format else None
101112
)
102113

103114
def on_file_deleted(filepath):

tests/unit/test_config.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ def test_recursive_boolean_false():
644644

645645

646646
def test_recursive_string_value_rejected():
647-
"""Test recursive: "true" (string) is accepted (YAML converts to bool)"""
647+
"""Test recursive: "true" (string) is rejected by type validation"""
648648
config = {
649649
'vehicle_id': 'vehicle-001',
650650
'log_directories': [
@@ -660,14 +660,15 @@ def test_recursive_string_value_rejected():
660660
temp_path = f.name
661661

662662
try:
663-
# YAML will keep the string "true", but validation doesn't check type
664-
cm = ConfigManager(temp_path)
663+
# Validation now checks type - string should be rejected
664+
with pytest.raises(ConfigValidationError, match="recursive.*boolean"):
665+
cm = ConfigManager(temp_path)
665666
finally:
666667
Path(temp_path).unlink()
667668

668669

669670
def test_recursive_integer_value_rejected():
670-
"""Test recursive: 1 (integer) is accepted (validation doesn't check type)"""
671+
"""Test recursive: 1 (integer) is rejected by type validation"""
671672
config = {
672673
'vehicle_id': 'vehicle-001',
673674
'log_directories': [
@@ -683,8 +684,9 @@ def test_recursive_integer_value_rejected():
683684
temp_path = f.name
684685

685686
try:
686-
# Validation doesn't check type for recursive field
687-
cm = ConfigManager(temp_path)
687+
# Validation now checks type - integer should be rejected
688+
with pytest.raises(ConfigValidationError, match="recursive.*boolean"):
689+
cm = ConfigManager(temp_path)
688690
finally:
689691
Path(temp_path).unlink()
690692

@@ -743,7 +745,7 @@ def test_pattern_empty_string_rejected():
743745

744746

745747
def test_pattern_non_string_rejected():
746-
"""Test non-string pattern is accepted (validation doesn't check type)"""
748+
"""Test non-string pattern is rejected by type validation"""
747749
config = {
748750
'vehicle_id': 'vehicle-001',
749751
'log_directories': [
@@ -759,8 +761,9 @@ def test_pattern_non_string_rejected():
759761
temp_path = f.name
760762

761763
try:
762-
# Validation doesn't check type for pattern field
763-
cm = ConfigManager(temp_path)
764+
# Validation now checks type - integer should be rejected
765+
with pytest.raises(ConfigValidationError, match="pattern.*string"):
766+
cm = ConfigManager(temp_path)
764767
finally:
765768
Path(temp_path).unlink()
766769

0 commit comments

Comments
 (0)