>(path: Option) -> anyhow::Result {
+ let mut builder = config::Config::builder();
+
+ // Add default values
+ builder = builder.set_default("scan_patterns", vec!["*.rs", "*.toml"])?;
+ builder = builder.set_default("output_formats", vec!["json"])?;
+ builder = builder.set_default("database_path", "code_guardian.db")?;
+ builder = builder.set_default("max_threads", num_cpus::get() as i64)?;
+ builder = builder.set_default("cache_size", 50000i64)?;
+ builder = builder.set_default("batch_size", 100i64)?;
+ builder = builder.set_default("max_file_size", (10 * 1024 * 1024) as i64)?;
+
+ // Add file source if provided
+ if let Some(path) = path {
+ let path = path.as_ref();
+ if path.exists() {
+ let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
+ match extension {
+ "toml" => {
+ builder = builder.add_source(config::File::with_name(path.to_str().unwrap()));
+ }
+ "json" => {
+ builder = builder.add_source(config::File::with_name(path.to_str().unwrap()));
+ }
+ _ => return Err(anyhow::anyhow!("Unsupported config file format: {}", extension)),
+ }
+ }
+ }
+
+ let config = builder.build()?;
+ let parsed: Config = config.try_deserialize()?;
+ Ok(parsed)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::fs;
+ use tempfile::TempDir;
+
+ #[test]
+ fn test_default_config() {
+ let config = Config::default();
+ assert!(!config.scan_patterns.is_empty());
+ assert!(!config.output_formats.is_empty());
+ assert!(!config.database_path.is_empty());
+ assert!(config.max_threads > 0);
+ assert_eq!(config.cache_size, 50000);
+ assert_eq!(config.batch_size, 100);
+ assert_eq!(config.max_file_size, 10 * 1024 * 1024);
+ }
+
+ #[test]
+ fn test_load_config_toml() {
+ let temp_dir = TempDir::new().unwrap();
+ let config_path = temp_dir.path().join("config.toml");
+ let toml_content = r#"
+scan_patterns = ["*.rs", "*.py"]
+output_formats = ["json", "csv"]
+database_path = "test.db"
+max_threads = 4
+cache_size = 100000
+batch_size = 200
+max_file_size = 20971520
+"#;
+ fs::write(&config_path, toml_content).unwrap();
+
+ let config = load_config(Some(&config_path)).unwrap();
+ assert_eq!(config.scan_patterns, vec!["*.rs", "*.py"]);
+ assert_eq!(config.output_formats, vec!["json", "csv"]);
+ assert_eq!(config.database_path, "test.db");
+ assert_eq!(config.max_threads, 4);
+ assert_eq!(config.cache_size, 100000);
+ assert_eq!(config.batch_size, 200);
+ assert_eq!(config.max_file_size, 20971520);
+ }
+
+ #[test]
+ fn test_load_config_json() {
+ let temp_dir = TempDir::new().unwrap();
+ let config_path = temp_dir.path().join("config.json");
+ let json_content = r#"{
+"scan_patterns": ["*.js", "*.ts"],
+"output_formats": ["html"],
+"database_path": "data.db",
+"max_threads": 8,
+"cache_size": 75000,
+"batch_size": 150,
+"max_file_size": 15728640
+}"#;
+ fs::write(&config_path, json_content).unwrap();
+
+ let config = load_config(Some(&config_path)).unwrap();
+ assert_eq!(config.scan_patterns, vec!["*.js", "*.ts"]);
+ assert_eq!(config.output_formats, vec!["html"]);
+ assert_eq!(config.database_path, "data.db");
+ assert_eq!(config.max_threads, 8);
+ assert_eq!(config.cache_size, 75000);
+ assert_eq!(config.batch_size, 150);
+ assert_eq!(config.max_file_size, 15728640);
+ }
+
+ #[test]
+ fn test_load_config_no_file() {
+ let config = load_config::<&str>(None).unwrap();
+ let default = Config::default();
+ assert_eq!(config.scan_patterns, default.scan_patterns);
+ assert_eq!(config.output_formats, default.output_formats);
+ assert_eq!(config.database_path, default.database_path);
+ assert_eq!(config.max_threads, default.max_threads);
+ assert_eq!(config.cache_size, default.cache_size);
+ assert_eq!(config.batch_size, default.batch_size);
+ assert_eq!(config.max_file_size, default.max_file_size);
+ }
+
+ #[test]
+ fn test_load_config_unsupported_format() {
+ let temp_dir = TempDir::new().unwrap();
+ let config_path = temp_dir.path().join("config.txt");
+ fs::write(&config_path, "invalid").unwrap();
+
+ let result = load_config(Some(&config_path));
+ assert!(result.is_err());
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/custom_detectors.rs b/crates/core/src/custom_detectors.rs
new file mode 100644
index 0000000..f456e4b
--- /dev/null
+++ b/crates/core/src/custom_detectors.rs
@@ -0,0 +1,598 @@
+use crate::{Match, PatternDetector, Severity};
+use anyhow::Result;
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::path::Path;
+
+/// Configuration for a custom detector
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CustomDetectorConfig {
+ pub name: String,
+ pub description: String,
+ pub pattern: String,
+ pub file_extensions: Vec, // Empty = all files
+ pub case_sensitive: bool,
+ pub multiline: bool,
+ pub capture_groups: Vec, // Named capture groups
+ pub severity: Severity,
+ pub category: DetectorCategory,
+ pub examples: Vec,
+ pub enabled: bool,
+}
+
+
+
+/// Categories for organizing custom detectors
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum DetectorCategory {
+ CodeQuality,
+ Security,
+ Performance,
+ Documentation,
+ Testing,
+ Deprecated,
+ Custom(String),
+}
+
+/// A custom pattern detector built from configuration
+pub struct CustomDetector {
+ config: CustomDetectorConfig,
+ regex: Regex,
+}
+
+impl Clone for CustomDetector {
+ fn clone(&self) -> Self {
+ Self::new(self.config.clone()).unwrap()
+ }
+}
+
+impl CustomDetector {
+ /// Create a new custom detector from configuration
+ pub fn new(config: CustomDetectorConfig) -> Result {
+ let pattern = config.pattern.clone();
+
+ // Build regex flags
+ let mut regex_flags = regex::RegexBuilder::new(&pattern);
+ regex_flags.case_insensitive(!config.case_sensitive);
+ regex_flags.multi_line(config.multiline);
+
+ let regex = regex_flags.build()
+ .map_err(|e| anyhow::anyhow!("Invalid regex pattern '{}': {}", pattern, e))?;
+
+ Ok(Self {
+ config,
+ regex,
+ })
+ }
+
+ /// Get detector configuration
+ pub fn config(&self) -> &CustomDetectorConfig {
+ &self.config
+ }
+
+ /// Check if this detector should process the given file
+ fn should_process_file(&self, file_path: &Path) -> bool {
+ if self.config.file_extensions.is_empty() {
+ return true; // Process all files
+ }
+
+ if let Some(ext) = file_path.extension().and_then(|s| s.to_str()) {
+ self.config.file_extensions.iter()
+ .any(|allowed_ext| allowed_ext.eq_ignore_ascii_case(ext))
+ } else {
+ false
+ }
+ }
+}
+
+impl PatternDetector for CustomDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ if !self.config.enabled || !self.should_process_file(file_path) {
+ return Vec::new();
+ }
+
+ let mut matches = Vec::new();
+
+ for cap in self.regex.captures_iter(content) {
+ if let Some(full_match) = cap.get(0) {
+ // Find line and column
+ let (line_number, column) = find_line_column(content, full_match.start());
+
+ // Extract message from capture groups or use full match
+ let message = if !self.config.capture_groups.is_empty() {
+ self.extract_message_from_groups(&cap)
+ } else {
+ full_match.as_str().trim().to_string()
+ };
+
+ matches.push(Match {
+ file_path: file_path.to_string_lossy().to_string(),
+ line_number,
+ column,
+ pattern: self.config.name.clone(),
+ message: format!("{}: {}", self.config.name, message),
+ });
+ }
+ }
+
+ matches
+ }
+}
+
+impl CustomDetector {
+ fn extract_message_from_groups(&self, cap: ®ex::Captures) -> String {
+ let mut parts = Vec::new();
+
+ for group_name in &self.config.capture_groups {
+ if let Some(group_match) = cap.name(group_name) {
+ parts.push(format!("{}={}", group_name, group_match.as_str()));
+ }
+ }
+
+ if parts.is_empty() {
+ cap.get(0).map_or("".to_string(), |m| m.as_str().to_string())
+ } else {
+ parts.join(", ")
+ }
+ }
+}
+
+/// Manager for custom detectors
+pub struct CustomDetectorManager {
+ detectors: HashMap,
+ config_file: Option,
+}
+
+impl CustomDetectorManager {
+ pub fn new() -> Self {
+ Self {
+ detectors: HashMap::new(),
+ config_file: None,
+ }
+ }
+
+ /// Load detectors from configuration file
+ pub fn load_from_file>(&mut self, config_file: P) -> Result<()> {
+ let config_file = config_file.as_ref();
+ let content = std::fs::read_to_string(config_file)?;
+
+ let configs: Vec = match config_file.extension().and_then(|s| s.to_str()) {
+ Some("json") => serde_json::from_str(&content)?,
+ Some("yaml") | Some("yml") => serde_yaml::from_str(&content)?,
+ Some("toml") => toml::from_str(&content)?,
+ _ => return Err(anyhow::anyhow!("Unsupported config file format")),
+ };
+
+ for config in configs {
+ let detector = CustomDetector::new(config.clone())?;
+ self.detectors.insert(config.name.clone(), detector);
+ }
+
+ self.config_file = Some(config_file.to_path_buf());
+ println!("📁 Loaded {} custom detectors from {}",
+ self.detectors.len(), config_file.display());
+
+ Ok(())
+ }
+
+ /// Save detectors to configuration file
+ pub fn save_to_file>(&self, config_file: P) -> Result<()> {
+ let configs: Vec = self.detectors.values()
+ .map(|d| d.config().clone())
+ .collect();
+
+ let config_file = config_file.as_ref();
+ let content = match config_file.extension().and_then(|s| s.to_str()) {
+ Some("json") => serde_json::to_string_pretty(&configs)?,
+ Some("yaml") | Some("yml") => serde_yaml::to_string(&configs)?,
+ Some("toml") => toml::to_string_pretty(&configs)?,
+ _ => return Err(anyhow::anyhow!("Unsupported config file format")),
+ };
+
+ std::fs::write(config_file, content)?;
+ println!("💾 Saved {} custom detectors to {}",
+ configs.len(), config_file.display());
+
+ Ok(())
+ }
+
+ /// Add a new custom detector
+ pub fn add_detector(&mut self, config: CustomDetectorConfig) -> Result<()> {
+ let name = config.name.clone();
+ let detector = CustomDetector::new(config)?;
+ self.detectors.insert(name.clone(), detector);
+ println!("➕ Added custom detector: {}", name);
+ Ok(())
+ }
+
+ /// Remove a custom detector
+ pub fn remove_detector(&mut self, name: &str) -> bool {
+ if self.detectors.remove(name).is_some() {
+ println!("➖ Removed custom detector: {}", name);
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Get all custom detectors as PatternDetector trait objects
+ pub fn get_detectors(&self) -> Vec> {
+ self.detectors.values()
+ .filter(|d| d.config().enabled)
+ .map(|d| Box::new(d.clone()) as Box)
+ .collect()
+ }
+
+ /// List all detector configurations
+ pub fn list_detectors(&self) -> Vec<&CustomDetectorConfig> {
+ self.detectors.values()
+ .map(|d| d.config())
+ .collect()
+ }
+
+ /// Enable/disable a detector
+ pub fn set_detector_enabled(&mut self, name: &str, enabled: bool) -> Result<()> {
+ if let Some(detector) = self.detectors.get_mut(name) {
+ // Note: We'd need to modify CustomDetector to allow config mutation
+ // For now, we'll recreate the detector with updated config
+ let mut config = detector.config().clone();
+ config.enabled = enabled;
+ let new_detector = CustomDetector::new(config)?;
+ self.detectors.insert(name.to_string(), new_detector);
+ println!("🔄 {} detector: {}", if enabled { "Enabled" } else { "Disabled" }, name);
+ Ok(())
+ } else {
+ Err(anyhow::anyhow!("Detector '{}' not found", name))
+ }
+ }
+
+ /// Create some example detectors
+ pub fn create_examples(&mut self) -> Result<()> {
+ let examples = vec![
+ CustomDetectorConfig {
+ name: "SQL_INJECTION".to_string(),
+ description: "Detect potential SQL injection vulnerabilities".to_string(),
+ pattern: r#"(?i)(query|execute)\s*\(\s*["']\s*SELECT.*\+.*["']\s*\)"#.to_string(),
+ file_extensions: vec!["py".to_string(), "js".to_string(), "php".to_string()],
+ case_sensitive: false,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Critical,
+ category: DetectorCategory::Security,
+ examples: vec![
+ r#"query("SELECT * FROM users WHERE id = " + user_id)"#.to_string(),
+ ],
+ enabled: true,
+ },
+ CustomDetectorConfig {
+ name: "HARDCODED_PASSWORD".to_string(),
+ description: "Detect hardcoded passwords and secrets".to_string(),
+ pattern: r#"(?i)(password|secret|key|token)\s*[=:]\s*["'][^"']{8,}["']"#.to_string(),
+ file_extensions: vec![],
+ case_sensitive: false,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::High,
+ category: DetectorCategory::Security,
+ examples: vec![
+ r#"password = "secretpassword123""#.to_string(),
+ ],
+ enabled: true,
+ },
+ CustomDetectorConfig {
+ name: "LARGE_FUNCTION".to_string(),
+ description: "Detect functions that might be too large".to_string(),
+ pattern: r#"fn\s+\w+[^{]*\{(?:[^{}]*\{[^{}]*\})*[^{}]{500,}\}"#.to_string(),
+ file_extensions: vec!["rs".to_string()],
+ case_sensitive: true,
+ multiline: true,
+ capture_groups: vec![],
+ severity: Severity::Medium,
+ category: DetectorCategory::CodeQuality,
+ examples: vec![
+ "Functions with more than 500 characters in body".to_string(),
+ ],
+ enabled: true,
+ },
+ ];
+
+ for config in examples {
+ self.add_detector(config)?;
+ }
+
+ Ok(())
+ }
+}
+
+impl Default for CustomDetectorManager {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Helper function to find line and column from byte offset
+fn find_line_column(content: &str, offset: usize) -> (usize, usize) {
+ let mut line = 1;
+ let mut column = 1;
+
+ for (i, ch) in content.char_indices() {
+ if i >= offset {
+ break;
+ }
+
+ if ch == '\n' {
+ line += 1;
+ column = 1;
+ } else {
+ column += 1;
+ }
+ }
+
+ (line, column)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_custom_detector_creation() {
+ let config = CustomDetectorConfig {
+ name: "TEST".to_string(),
+ description: "Test detector".to_string(),
+ pattern: r"test".to_string(),
+ file_extensions: vec!["rs".to_string()],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config);
+ assert!(detector.is_ok());
+ }
+
+ #[test]
+ fn test_custom_detector_matching() {
+ let config = CustomDetectorConfig {
+ name: "TODO_CUSTOM".to_string(),
+ description: "Custom TODO detector".to_string(),
+ pattern: r"TODO:.*".to_string(),
+ file_extensions: vec![],
+ case_sensitive: false,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Documentation,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "// TODO: implement this\nsome code";
+ let matches = detector.detect(content, Path::new("test.rs"));
+
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].line_number, 1);
+ }
+
+ #[test]
+ fn test_detector_manager() {
+ let mut manager = CustomDetectorManager::new();
+ assert_eq!(manager.list_detectors().len(), 0);
+
+ manager.create_examples().unwrap();
+ assert!(manager.list_detectors().len() > 0);
+
+ let detectors = manager.get_detectors();
+ assert!(detectors.len() > 0);
+ }
+
+ #[test]
+ fn test_empty_pattern() {
+ let config = CustomDetectorConfig {
+ name: "EMPTY".to_string(),
+ description: "Empty pattern test".to_string(),
+ pattern: "".to_string(),
+ file_extensions: vec![],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config);
+ // Empty pattern is actually valid in regex (matches empty string)
+ assert!(detector.is_ok());
+ }
+
+ #[test]
+ fn test_complex_regex() {
+ let config = CustomDetectorConfig {
+ name: "COMPLEX".to_string(),
+ description: "Complex regex with word boundaries".to_string(),
+ pattern: r"\bclass\s+\w+\s+extends\s+\w+\s*\{".to_string(),
+ file_extensions: vec!["js".to_string()],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Medium,
+ category: DetectorCategory::CodeQuality,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "class MyClass extends Base {\n constructor() {}\n}";
+ let matches = detector.detect(content, Path::new("test.js"));
+ assert_eq!(matches.len(), 1);
+ }
+
+ #[test]
+ fn test_large_content() {
+ let config = CustomDetectorConfig {
+ name: "LARGE_TEST".to_string(),
+ description: "Test with large content".to_string(),
+ pattern: r"TODO".to_string(),
+ file_extensions: vec![],
+ case_sensitive: false,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let large_content = "some code\n".repeat(10000) + "// TODO: large file test\n" + &"more code\n".repeat(10000);
+ let matches = detector.detect(&large_content, Path::new("large.rs"));
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].line_number, 10001);
+ }
+
+ #[test]
+ fn test_multiline_pattern() {
+ let config = CustomDetectorConfig {
+ name: "MULTILINE".to_string(),
+ description: "Multiline pattern test".to_string(),
+ pattern: r"function\s+\w+\([^)]*\)\s*\{[^}]*\}".to_string(),
+ file_extensions: vec!["js".to_string()],
+ case_sensitive: true,
+ multiline: true,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "function test() {\n return true;\n}\nother code";
+ let matches = detector.detect(content, Path::new("test.js"));
+ assert_eq!(matches.len(), 1);
+ }
+
+ #[test]
+ fn test_case_insensitive() {
+ let config = CustomDetectorConfig {
+ name: "CASE_TEST".to_string(),
+ description: "Case insensitive test".to_string(),
+ pattern: r"todo".to_string(),
+ file_extensions: vec![],
+ case_sensitive: false,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "// TODO: case test\n// todo: another";
+ let matches = detector.detect(content, Path::new("test.rs"));
+ assert_eq!(matches.len(), 2);
+ }
+
+ #[test]
+ fn test_file_extension_filtering() {
+ let config = CustomDetectorConfig {
+ name: "EXT_TEST".to_string(),
+ description: "File extension test".to_string(),
+ pattern: r"test".to_string(),
+ file_extensions: vec!["rs".to_string()],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "test content";
+
+ // Should match .rs file
+ let matches_rs = detector.detect(content, Path::new("test.rs"));
+ assert_eq!(matches_rs.len(), 1);
+
+ // Should not match .js file
+ let matches_js = detector.detect(content, Path::new("test.js"));
+ assert_eq!(matches_js.len(), 0);
+ }
+
+ #[test]
+ fn test_capture_groups() {
+ let config = CustomDetectorConfig {
+ name: "CAPTURE".to_string(),
+ description: "Capture groups test".to_string(),
+ pattern: r"let\s+(?P\w+)\s*=\s*(?P\w+);".to_string(),
+ file_extensions: vec![],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec!["var".to_string(), "value".to_string()],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "let x = 42;";
+ let matches = detector.detect(content, Path::new("test.rs"));
+ assert_eq!(matches.len(), 1);
+ assert!(matches[0].message.contains("var=x"));
+ assert!(matches[0].message.contains("value=42"));
+ }
+
+ #[test]
+ fn test_disabled_detector() {
+ let config = CustomDetectorConfig {
+ name: "DISABLED".to_string(),
+ description: "Disabled detector test".to_string(),
+ pattern: r"test".to_string(),
+ file_extensions: vec![],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: false,
+ };
+
+ let detector = CustomDetector::new(config).unwrap();
+ let content = "test content";
+ let matches = detector.detect(content, Path::new("test.rs"));
+ assert_eq!(matches.len(), 0);
+ }
+
+ #[test]
+ fn test_invalid_regex() {
+ let config = CustomDetectorConfig {
+ name: "INVALID".to_string(),
+ description: "Invalid regex test".to_string(),
+ pattern: r"[unclosed".to_string(),
+ file_extensions: vec![],
+ case_sensitive: true,
+ multiline: false,
+ capture_groups: vec![],
+ severity: Severity::Low,
+ category: DetectorCategory::Testing,
+ examples: vec![],
+ enabled: true,
+ };
+
+ let detector = CustomDetector::new(config);
+ assert!(detector.is_err());
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/detector_factory.rs b/crates/core/src/detector_factory.rs
new file mode 100644
index 0000000..97f3f51
--- /dev/null
+++ b/crates/core/src/detector_factory.rs
@@ -0,0 +1,229 @@
+use crate::detectors::*;
+use crate::enhanced_config::{DetectorType, EnhancedScanConfig};
+use crate::PatternDetector;
+use anyhow::Result;
+
+/// Factory for creating pattern detectors based on configuration
+pub struct DetectorFactory;
+
+impl DetectorFactory {
+ /// Create all enabled detectors from configuration
+ pub fn create_detectors(config: &EnhancedScanConfig) -> Vec> {
+ let mut detectors = Vec::new();
+ for detector_type in &config.enabled_detectors {
+ match Self::create_detector(detector_type, Some(config)) {
+ Ok(Some(detector)) => detectors.push(detector),
+ Ok(None) => {} // Detector type not supported or disabled
+ Err(e) => eprintln!("Warning: Failed to create detector for {:?}: {}", detector_type, e),
+ }
+ }
+ detectors
+ }
+
+ /// Create a default set of detectors (backwards compatibility)
+ pub fn create_default_detectors() -> Vec> {
+ vec![
+ Box::new(TodoDetector),
+ Box::new(FixmeDetector),
+ ]
+ }
+
+ /// Create an extended set of detectors for comprehensive scanning
+ pub fn create_comprehensive_detectors() -> Vec> {
+ vec![
+ // Comment patterns
+ Box::new(TodoDetector),
+ Box::new(FixmeDetector),
+ Box::new(HackDetector),
+ Box::new(BugDetector),
+ Box::new(XxxDetector),
+ Box::new(NoteDetector),
+ Box::new(WarningDetector),
+
+ // Rust-specific patterns
+ Box::new(PanicDetector),
+ Box::new(UnwrapDetector),
+ Box::new(ExpectDetector),
+ Box::new(UnimplementedDetector),
+ Box::new(UnreachableDetector),
+
+ // Performance patterns
+ Box::new(CloneDetector),
+ Box::new(ToStringDetector),
+
+ // Security patterns
+ Box::new(UnsafeDetector),
+ ]
+ }
+
+ /// Create security-focused detectors
+ pub fn create_security_detectors() -> Vec> {
+ vec![
+ Box::new(UnsafeDetector),
+ Box::new(PanicDetector),
+ Box::new(UnwrapDetector),
+ Box::new(ExpectDetector),
+ ]
+ }
+
+ /// Create performance-focused detectors
+ pub fn create_performance_detectors() -> Vec> {
+ vec![
+ Box::new(CloneDetector),
+ Box::new(ToStringDetector),
+ Box::new(UnwrapDetector), // Can cause performance issues
+ ]
+ }
+
+ /// Create a single detector by type
+ fn create_detector(detector_type: &DetectorType, config: Option<&EnhancedScanConfig>) -> Result>> {
+ match detector_type {
+ DetectorType::Todo => Ok(Some(Box::new(TodoDetector))),
+ DetectorType::Fixme => Ok(Some(Box::new(FixmeDetector))),
+ DetectorType::Hack => Ok(Some(Box::new(HackDetector))),
+ DetectorType::Bug => Ok(Some(Box::new(BugDetector))),
+ DetectorType::Xxx => Ok(Some(Box::new(XxxDetector))),
+ DetectorType::Note => Ok(Some(Box::new(NoteDetector))),
+ DetectorType::Warning => Ok(Some(Box::new(WarningDetector))),
+ DetectorType::Panic => Ok(Some(Box::new(PanicDetector))),
+ DetectorType::Unwrap => Ok(Some(Box::new(UnwrapDetector))),
+ DetectorType::Expect => Ok(Some(Box::new(ExpectDetector))),
+ DetectorType::Unimplemented => Ok(Some(Box::new(UnimplementedDetector))),
+ DetectorType::Unreachable => Ok(Some(Box::new(UnreachableDetector))),
+ DetectorType::Clone => Ok(Some(Box::new(CloneDetector))),
+ DetectorType::ToString => Ok(Some(Box::new(ToStringDetector))),
+ DetectorType::Unsafe => Ok(Some(Box::new(UnsafeDetector))),
+ DetectorType::Custom(name) => {
+ if let Some(config) = config {
+ if let Some(pattern) = config.custom_patterns.get(name) {
+ let detector = CustomPatternDetector::new(name, pattern)?;
+ Ok(Some(Box::new(detector)))
+ } else {
+ Ok(None) // Pattern not found in config
+ }
+ } else {
+ Ok(None) // No config provided
+ }
+ }
+ }
+ }
+}
+
+/// Predefined detector profiles for common use cases
+pub enum DetectorProfile {
+ /// Basic TODO/FIXME detection
+ Basic,
+ /// All available detectors
+ Comprehensive,
+ /// Security-focused scanning
+ Security,
+ /// Performance-focused scanning
+ Performance,
+ /// Rust-specific patterns only
+ Rust,
+ /// Custom configuration
+ Custom(Box),
+}
+
+impl DetectorProfile {
+ /// Get detectors for the specified profile
+ pub fn get_detectors(&self) -> Vec> {
+ match self {
+ DetectorProfile::Basic => DetectorFactory::create_default_detectors(),
+ DetectorProfile::Comprehensive => DetectorFactory::create_comprehensive_detectors(),
+ DetectorProfile::Security => DetectorFactory::create_security_detectors(),
+ DetectorProfile::Performance => DetectorFactory::create_performance_detectors(),
+ DetectorProfile::Rust => vec![
+ Box::new(PanicDetector),
+ Box::new(UnwrapDetector),
+ Box::new(ExpectDetector),
+ Box::new(UnimplementedDetector),
+ Box::new(UnreachableDetector),
+ Box::new(CloneDetector),
+ Box::new(ToStringDetector),
+ Box::new(UnsafeDetector),
+ ],
+ DetectorProfile::Custom(config) => DetectorFactory::create_detectors(config),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_default_detectors() {
+ let detectors = DetectorFactory::create_default_detectors();
+ assert_eq!(detectors.len(), 2);
+ }
+
+ #[test]
+ fn test_comprehensive_detectors() {
+ let detectors = DetectorFactory::create_comprehensive_detectors();
+ assert!(detectors.len() > 10);
+ }
+
+ #[test]
+ fn test_security_detectors() {
+ let detectors = DetectorFactory::create_security_detectors();
+ assert!(detectors.len() >= 4);
+ }
+
+ #[test]
+ fn test_detector_profiles() {
+ let basic = DetectorProfile::Basic.get_detectors();
+ let comprehensive = DetectorProfile::Comprehensive.get_detectors();
+
+ assert!(comprehensive.len() > basic.len());
+ }
+
+ #[test]
+ fn test_factory_with_custom_detectors() {
+ let mut config = EnhancedScanConfig::default();
+ config.custom_patterns.insert("MY_PATTERN".to_string(), r"custom".to_string());
+ config.enabled_detectors.push(DetectorType::Custom("MY_PATTERN".to_string()));
+
+ let detectors = DetectorFactory::create_detectors(&config);
+ assert!(detectors.len() >= 1);
+ // The default config has 2 detectors, plus our custom one
+ assert!(detectors.len() >= 3);
+ }
+
+ #[test]
+ fn test_custom_detector_creation_success() {
+ let mut config = EnhancedScanConfig::default();
+ config.custom_patterns.insert("TEST".to_string(), r"test".to_string());
+
+ let result = DetectorFactory::create_detector(&DetectorType::Custom("TEST".to_string()), Some(&config));
+ assert!(result.is_ok());
+ assert!(result.unwrap().is_some());
+ }
+
+ #[test]
+ fn test_custom_detector_creation_missing_pattern() {
+ let config = EnhancedScanConfig::default();
+
+ let result = DetectorFactory::create_detector(&DetectorType::Custom("MISSING".to_string()), Some(&config));
+ assert!(result.is_ok());
+ assert!(result.unwrap().is_none());
+ }
+
+ #[test]
+ fn test_custom_detector_creation_no_config() {
+ let result = DetectorFactory::create_detector(&DetectorType::Custom("TEST".to_string()), None);
+ assert!(result.is_ok());
+ assert!(result.unwrap().is_none());
+ }
+
+ #[test]
+ fn test_custom_detector_invalid_regex() {
+ let mut config = EnhancedScanConfig::default();
+ config.custom_patterns.insert("INVALID".to_string(), r"[invalid".to_string());
+ config.enabled_detectors.push(DetectorType::Custom("INVALID".to_string()));
+
+ let detectors = DetectorFactory::create_detectors(&config);
+ // Should have default detectors but not the invalid custom one
+ assert_eq!(detectors.len(), 2); // default has 2
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/detectors.rs b/crates/core/src/detectors.rs
new file mode 100644
index 0000000..af11448
--- /dev/null
+++ b/crates/core/src/detectors.rs
@@ -0,0 +1,327 @@
+use crate::{Match, PatternDetector};
+use anyhow::Result;
+use lazy_static::lazy_static;
+use regex::Regex;
+use std::path::Path;
+
+lazy_static! {
+ pub static ref TODO_REGEX: Regex = Regex::new(r"\b(?i)todo\b").unwrap();
+ pub static ref FIXME_REGEX: Regex = Regex::new(r"\b(?i)fixme\b").unwrap();
+ pub static ref HACK_REGEX: Regex = Regex::new(r"\b(?i)hack\b").unwrap();
+ pub static ref BUG_REGEX: Regex = Regex::new(r"\b(?i)bug\b").unwrap();
+ pub static ref XXX_REGEX: Regex = Regex::new(r"\bXXX\b").unwrap();
+ pub static ref NOTE_REGEX: Regex = Regex::new(r"\b(?i)note\b").unwrap();
+ pub static ref WARNING_REGEX: Regex = Regex::new(r"\b(?i)warning\b").unwrap();
+ // Rust-specific patterns
+ pub static ref PANIC_REGEX: Regex = Regex::new(r"\bpanic!\s*\(").unwrap();
+ pub static ref UNWRAP_REGEX: Regex = Regex::new(r"\.unwrap\s*\(\s*\)").unwrap();
+ pub static ref EXPECT_REGEX: Regex = Regex::new(r"\.expect\s*\(").unwrap();
+ pub static ref UNIMPLEMENTED_REGEX: Regex = Regex::new(r"\bunimplemented!\s*\(").unwrap();
+ pub static ref UNREACHABLE_REGEX: Regex = Regex::new(r"\bunreachable!\s*\(").unwrap();
+ // Performance patterns
+ pub static ref CLONE_REGEX: Regex = Regex::new(r"\.clone\s*\(\s*\)").unwrap();
+ pub static ref TO_STRING_REGEX: Regex = Regex::new(r"\.to_string\s*\(\s*\)").unwrap();
+ // Security patterns
+ pub static ref UNSAFE_REGEX: Regex = Regex::new(r"\bunsafe\s+\{").unwrap();
+}
+
+fn detect_pattern_with_context(
+ content: &str,
+ file_path: &Path,
+ pattern_name: &str,
+ re: &Regex
+) -> Vec {
+ let mut matches = Vec::new();
+ for (line_idx, line) in content.lines().enumerate() {
+ for mat in re.find_iter(line) {
+ // Extract more context around the match
+ let context_start = mat.start().saturating_sub(10);
+ let context_end = (mat.end() + 20).min(line.len());
+ let context = &line[context_start..context_end];
+
+ matches.push(Match {
+ file_path: file_path.to_string_lossy().to_string(),
+ line_number: line_idx + 1,
+ column: mat.start() + 1,
+ pattern: pattern_name.to_string(),
+ message: format!("{}: {}", pattern_name, context.trim()),
+ });
+ }
+ }
+ matches
+}
+
+/// Default detector for TODO comments (case-insensitive)
+pub struct TodoDetector;
+
+impl PatternDetector for TodoDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "TODO", &TODO_REGEX)
+ }
+}
+
+/// Default detector for FIXME comments (case-insensitive)
+pub struct FixmeDetector;
+
+impl PatternDetector for FixmeDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "FIXME", &FIXME_REGEX)
+ }
+}
+
+/// Detector for HACK comments indicating temporary workarounds
+pub struct HackDetector;
+
+impl PatternDetector for HackDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "HACK", &HACK_REGEX)
+ }
+}
+
+/// Detector for BUG comments indicating known issues
+pub struct BugDetector;
+
+impl PatternDetector for BugDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "BUG", &BUG_REGEX)
+ }
+}
+
+/// Detector for XXX comments indicating urgent attention needed
+pub struct XxxDetector;
+
+impl PatternDetector for XxxDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "XXX", &XXX_REGEX)
+ }
+}
+
+/// Detector for NOTE comments
+pub struct NoteDetector;
+
+impl PatternDetector for NoteDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "NOTE", &NOTE_REGEX)
+ }
+}
+
+/// Detector for WARNING comments
+pub struct WarningDetector;
+
+impl PatternDetector for WarningDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, "WARNING", &WARNING_REGEX)
+ }
+}
+
+/// Detector for panic! macros in Rust code
+pub struct PanicDetector;
+
+impl PatternDetector for PanicDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "PANIC", &PANIC_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for .unwrap() calls in Rust code (potential panic points)
+pub struct UnwrapDetector;
+
+impl PatternDetector for UnwrapDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "UNWRAP", &UNWRAP_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for .expect() calls in Rust code
+pub struct ExpectDetector;
+
+impl PatternDetector for ExpectDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "EXPECT", &EXPECT_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for unimplemented! macros in Rust code
+pub struct UnimplementedDetector;
+
+impl PatternDetector for UnimplementedDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "UNIMPLEMENTED", &UNIMPLEMENTED_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for unreachable! macros in Rust code
+pub struct UnreachableDetector;
+
+impl PatternDetector for UnreachableDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "UNREACHABLE", &UNREACHABLE_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for excessive .clone() calls (potential performance issue)
+pub struct CloneDetector;
+
+impl PatternDetector for CloneDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "CLONE", &CLONE_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for .to_string() calls (potential performance issue)
+pub struct ToStringDetector;
+
+impl PatternDetector for ToStringDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "TO_STRING", &TO_STRING_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Detector for unsafe blocks in Rust code (security concern)
+pub struct UnsafeDetector;
+
+impl PatternDetector for UnsafeDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ // Only detect in Rust files
+ if let Some(ext) = file_path.extension() {
+ if ext == "rs" {
+ return detect_pattern_with_context(content, file_path, "UNSAFE", &UNSAFE_REGEX);
+ }
+ }
+ Vec::new()
+ }
+}
+
+/// Custom pattern detector that uses user-defined regex patterns
+pub struct CustomPatternDetector {
+ name: String,
+ regex: Regex,
+}
+
+impl CustomPatternDetector {
+ /// Creates a new custom pattern detector with the given name and regex pattern
+ pub fn new(name: &str, pattern: &str) -> Result {
+ let regex = Regex::new(pattern)?;
+ Ok(Self {
+ name: name.to_string(),
+ regex,
+ })
+ }
+}
+
+impl PatternDetector for CustomPatternDetector {
+ fn detect(&self, content: &str, file_path: &Path) -> Vec {
+ detect_pattern_with_context(content, file_path, &self.name, &self.regex)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::path::PathBuf;
+
+ #[test]
+ fn test_hack_detector() {
+ let detector = HackDetector;
+ let content = "// HACK: temporary fix\nlet x = 1;";
+ let path = PathBuf::from("test.rs");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].pattern, "HACK");
+ }
+
+ #[test]
+ fn test_panic_detector_rust_only() {
+ let detector = PanicDetector;
+ let rust_content = "panic!(\"error\");";
+ let js_content = "panic!(\"error\");";
+
+ let rust_path = PathBuf::from("test.rs");
+ let js_path = PathBuf::from("test.js");
+
+ let rust_matches = detector.detect(rust_content, &rust_path);
+ let js_matches = detector.detect(js_content, &js_path);
+
+ assert_eq!(rust_matches.len(), 1);
+ assert_eq!(js_matches.len(), 0);
+ }
+
+ #[test]
+ fn test_unwrap_detector() {
+ let detector = UnwrapDetector;
+ let content = "let value = some_option.unwrap();";
+ let path = PathBuf::from("test.rs");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].pattern, "UNWRAP");
+ }
+
+ #[test]
+ fn test_case_insensitive_todo() {
+ let detector = TodoDetector;
+ let content = "todo: fix this\nTODO: another\nTodo: yet another";
+ let path = PathBuf::from("test.rs");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 3);
+ }
+
+ #[test]
+ fn test_custom_pattern_detector() {
+ let detector = CustomPatternDetector::new("TEST", r"test").unwrap();
+ let content = "this is a test";
+ let path = PathBuf::from("test.txt");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].pattern, "TEST");
+ assert_eq!(matches[0].line_number, 1);
+ assert!(matches[0].message.contains("TEST"));
+ }
+
+ #[test]
+ fn test_custom_pattern_detector_invalid_regex() {
+ let result = CustomPatternDetector::new("TEST", r"[invalid");
+ assert!(result.is_err());
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/distributed.rs b/crates/core/src/distributed.rs
new file mode 100644
index 0000000..04ab5a3
--- /dev/null
+++ b/crates/core/src/distributed.rs
@@ -0,0 +1,354 @@
+use crate::{Match, PatternDetector};
+use anyhow::Result;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use std::time::Instant;
+
+/// Work unit for distributed processing
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WorkUnit {
+ pub id: String,
+ pub files: Vec,
+ pub detector_types: Vec,
+ pub priority: u8, // 0-255, higher = more priority
+ pub estimated_duration_ms: u64,
+}
+
+/// Result from processing a work unit
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WorkResult {
+ pub unit_id: String,
+ pub worker_id: String,
+ pub matches: Vec,
+ pub files_processed: usize,
+ pub processing_time_ms: u64,
+ pub timestamp: u64,
+ pub errors: Vec,
+}
+
+/// Worker node configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WorkerConfig {
+ pub worker_id: String,
+ pub max_concurrent_units: usize,
+ pub supported_detectors: Vec,
+ pub cpu_cores: usize,
+ pub memory_limit_mb: usize,
+ pub endpoint: Option, // For remote workers
+}
+
+/// Distributed scan coordinator
+pub struct DistributedCoordinator {
+ workers: Vec,
+ work_queue: Vec,
+ completed_work: HashMap,
+ detectors: HashMap>,
+}
+
+impl DistributedCoordinator {
+ pub fn new() -> Self {
+ Self {
+ workers: Vec::new(),
+ work_queue: Vec::new(),
+ completed_work: HashMap::new(),
+ detectors: HashMap::new(),
+ }
+ }
+
+ /// Register a worker node
+ pub fn register_worker(&mut self, config: WorkerConfig) {
+ println!("🤖 Registered worker: {} (cores: {}, memory: {}MB)",
+ config.worker_id, config.cpu_cores, config.memory_limit_mb);
+ self.workers.push(config);
+ }
+
+ /// Register pattern detectors
+ pub fn register_detector(&mut self, name: String, detector: Box) {
+ self.detectors.insert(name, detector);
+ }
+
+ /// Create work units from file list
+ pub fn create_work_units(&mut self, files: Vec, batch_size: usize) -> Result<()> {
+ for (unit_id, chunk) in files.chunks(batch_size).enumerate() {
+ let estimated_duration = self.estimate_processing_time(chunk);
+
+ let work_unit = WorkUnit {
+ id: format!("unit_{}", unit_id),
+ files: chunk.to_vec(),
+ detector_types: self.detectors.keys().cloned().collect(),
+ priority: self.calculate_priority(chunk),
+ estimated_duration_ms: estimated_duration,
+ };
+
+ self.work_queue.push(work_unit);
+ }
+
+ // Sort by priority (higher priority first)
+ self.work_queue.sort_by(|a, b| b.priority.cmp(&a.priority));
+
+ println!("📦 Created {} work units from {} files",
+ self.work_queue.len(), files.len());
+ Ok(())
+ }
+
+ /// Distribute and execute work units
+ pub fn execute_distributed_scan(&mut self) -> Result> {
+ let start_time = Instant::now();
+ let total_units = self.work_queue.len();
+
+ println!("🚀 Starting distributed scan with {} workers and {} work units",
+ self.workers.len(), total_units);
+
+ if self.workers.is_empty() {
+ // Fallback to local processing
+ return self.execute_local_fallback();
+ }
+
+ // Simulate distributed processing (in real implementation, this would use
+ // actual network communication, message queues, etc.)
+ self.simulate_distributed_execution()?;
+
+ let total_matches: Vec = self.completed_work
+ .values()
+ .flat_map(|result| result.matches.clone())
+ .collect();
+
+ let duration = start_time.elapsed();
+ self.print_execution_summary(duration, total_matches.len());
+
+ Ok(total_matches)
+ }
+
+ /// Get distributed scan statistics
+ pub fn get_statistics(&self) -> DistributedStats {
+ let total_files: usize = self.completed_work.values()
+ .map(|r| r.files_processed)
+ .sum();
+
+ let total_processing_time: u64 = self.completed_work.values()
+ .map(|r| r.processing_time_ms)
+ .sum();
+
+ let worker_utilization: HashMap = self.workers.iter()
+ .map(|w| {
+ let worker_results: Vec<&WorkResult> = self.completed_work.values()
+ .filter(|r| r.worker_id == w.worker_id)
+ .collect();
+
+ let utilization = if !worker_results.is_empty() {
+ worker_results.len() as f64 / self.work_queue.len() as f64
+ } else {
+ 0.0
+ };
+
+ (w.worker_id.clone(), utilization)
+ })
+ .collect();
+
+ DistributedStats {
+ total_workers: self.workers.len(),
+ total_work_units: self.work_queue.len(),
+ completed_units: self.completed_work.len(),
+ total_files_processed: total_files,
+ total_processing_time_ms: total_processing_time,
+ worker_utilization,
+ average_unit_size: if !self.work_queue.is_empty() {
+ total_files as f64 / self.work_queue.len() as f64
+ } else {
+ 0.0
+ },
+ }
+ }
+
+ fn simulate_distributed_execution(&mut self) -> Result<()> {
+ use rayon::prelude::*;
+
+ // Process work units in parallel (simulating distributed workers)
+ let results: Vec = self.work_queue
+ .par_iter()
+ .enumerate()
+ .map(|(i, unit)| {
+ let worker_id = format!("worker_{}", i % self.workers.len());
+ self.process_work_unit(unit, &worker_id)
+ })
+ .collect::>>()?;
+
+ // Store results
+ for result in results {
+ self.completed_work.insert(result.unit_id.clone(), result);
+ }
+
+ Ok(())
+ }
+
+ fn process_work_unit(&self, unit: &WorkUnit, worker_id: &str) -> Result {
+ let start_time = Instant::now();
+ let mut all_matches = Vec::new();
+ let mut errors = Vec::new();
+ let mut files_processed = 0;
+
+ for file_path in &unit.files {
+ match std::fs::read_to_string(file_path) {
+ Ok(content) => {
+ for detector_name in &unit.detector_types {
+ if let Some(detector) = self.detectors.get(detector_name) {
+ let matches = detector.detect(&content, file_path);
+ all_matches.extend(matches);
+ }
+ }
+ files_processed += 1;
+ }
+ Err(e) => {
+ errors.push(format!("Failed to read {}: {}", file_path.display(), e));
+ }
+ }
+ }
+
+ let processing_time = start_time.elapsed();
+
+ Ok(WorkResult {
+ unit_id: unit.id.clone(),
+ worker_id: worker_id.to_string(),
+ matches: all_matches,
+ files_processed,
+ processing_time_ms: processing_time.as_millis() as u64,
+ timestamp: std::time::SystemTime::now()
+ .duration_since(std::time::UNIX_EPOCH)?
+ .as_secs(),
+ errors,
+ })
+ }
+
+ fn execute_local_fallback(&mut self) -> Result> {
+ println!("⚠️ No workers available, falling back to local processing");
+
+ let mut all_matches = Vec::new();
+ for unit in &self.work_queue {
+ let mut result = self.process_work_unit(unit, "local_worker")?;
+ let matches = std::mem::take(&mut result.matches);
+ self.completed_work.insert(unit.id.clone(), result);
+ all_matches.extend(matches);
+ }
+
+ Ok(all_matches)
+ }
+
+ fn estimate_processing_time(&self, files: &[PathBuf]) -> u64 {
+ // Simple estimation: 1ms per file + size factor
+ let base_time = files.len() as u64;
+ let size_factor: u64 = files.iter()
+ .filter_map(|f| std::fs::metadata(f).ok())
+ .map(|m| (m.len() / 1024).min(100)) // Cap at 100ms per file
+ .sum();
+
+ base_time + size_factor
+ }
+
+ fn calculate_priority(&self, files: &[PathBuf]) -> u8 {
+ // Higher priority for smaller batches (process quickly)
+ // and files that are likely to have issues
+ let size_priority = match files.len() {
+ 1..=10 => 200,
+ 11..=50 => 150,
+ 51..=100 => 100,
+ _ => 50,
+ };
+
+ // Boost priority for certain file types
+ let type_priority = files.iter()
+ .filter_map(|f| f.extension())
+ .filter_map(|ext| ext.to_str())
+ .map(|ext| match ext {
+ "rs" => 50, // Rust files get higher priority
+ "py" | "js" | "ts" => 30,
+ _ => 10,
+ })
+ .max()
+ .unwrap_or(0);
+
+ (size_priority + type_priority).min(255) as u8
+ }
+
+ fn print_execution_summary(&self, duration: std::time::Duration, total_matches: usize) {
+ println!("✅ Distributed scan completed!");
+ println!(" Duration: {:?}", duration);
+ println!(" Total matches: {}", total_matches);
+ println!(" Work units processed: {}", self.completed_work.len());
+
+ let stats = self.get_statistics();
+ println!(" Files processed: {}", stats.total_files_processed);
+ println!(" Average unit size: {:.1} files", stats.average_unit_size);
+
+ // Show worker utilization
+ for (worker_id, utilization) in &stats.worker_utilization {
+ println!(" {}: {:.1}% utilization", worker_id, utilization * 100.0);
+ }
+ }
+}
+
+/// Statistics for distributed scanning
+#[derive(Debug, Clone)]
+pub struct DistributedStats {
+ pub total_workers: usize,
+ pub total_work_units: usize,
+ pub completed_units: usize,
+ pub total_files_processed: usize,
+ pub total_processing_time_ms: u64,
+ pub worker_utilization: HashMap,
+ pub average_unit_size: f64,
+}
+
+impl Default for DistributedCoordinator {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::detectors::TodoDetector;
+ use tempfile::TempDir;
+
+ #[test]
+ fn test_distributed_coordinator_creation() {
+ let coordinator = DistributedCoordinator::new();
+ assert_eq!(coordinator.workers.len(), 0);
+ assert_eq!(coordinator.work_queue.len(), 0);
+ }
+
+ #[test]
+ fn test_worker_registration() {
+ let mut coordinator = DistributedCoordinator::new();
+
+ let worker_config = WorkerConfig {
+ worker_id: "test_worker".to_string(),
+ max_concurrent_units: 4,
+ supported_detectors: vec!["TODO".to_string()],
+ cpu_cores: 8,
+ memory_limit_mb: 4096,
+ endpoint: None,
+ };
+
+ coordinator.register_worker(worker_config);
+ assert_eq!(coordinator.workers.len(), 1);
+ }
+
+ #[test]
+ fn test_work_unit_creation() {
+ let temp_dir = TempDir::new().unwrap();
+ let test_file = temp_dir.path().join("test.rs");
+ std::fs::write(&test_file, "// TODO: test").unwrap();
+
+ let mut coordinator = DistributedCoordinator::new();
+ coordinator.register_detector("TODO".to_string(), Box::new(TodoDetector));
+
+ let files = vec![test_file];
+ coordinator.create_work_units(files, 10).unwrap();
+
+ assert_eq!(coordinator.work_queue.len(), 1);
+ assert_eq!(coordinator.work_queue[0].files.len(), 1);
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/enhanced_config.rs b/crates/core/src/enhanced_config.rs
new file mode 100644
index 0000000..6154563
--- /dev/null
+++ b/crates/core/src/enhanced_config.rs
@@ -0,0 +1,112 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+use crate::Severity;
+
+/// Enhanced configuration for more flexible pattern detection
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EnhancedScanConfig {
+ /// Enabled pattern detectors
+ pub enabled_detectors: Vec,
+ /// File extensions to include in scanning
+ pub include_extensions: Vec,
+ /// File extensions to exclude from scanning
+ pub exclude_extensions: Vec,
+ /// Paths to exclude from scanning (glob patterns)
+ pub exclude_paths: Vec,
+ /// Maximum file size to scan (in bytes)
+ pub max_file_size: Option,
+ /// Custom regex patterns
+ pub custom_patterns: HashMap,
+ /// Severity levels for different pattern types
+ pub severity_levels: HashMap,
+}
+
+/// Types of available pattern detectors
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum DetectorType {
+ // Comment-based patterns
+ Todo,
+ Fixme,
+ Hack,
+ Bug,
+ Xxx,
+ Note,
+ Warning,
+
+ // Rust-specific patterns
+ Panic,
+ Unwrap,
+ Expect,
+ Unimplemented,
+ Unreachable,
+
+ // Performance patterns
+ Clone,
+ ToString,
+
+ // Security patterns
+ Unsafe,
+
+ // Custom pattern with name
+ Custom(String),
+}
+
+
+
+impl Default for EnhancedScanConfig {
+ fn default() -> Self {
+ let mut severity_levels = HashMap::new();
+ severity_levels.insert("TODO".to_string(), Severity::Low);
+ severity_levels.insert("FIXME".to_string(), Severity::Medium);
+ severity_levels.insert("HACK".to_string(), Severity::High);
+ severity_levels.insert("BUG".to_string(), Severity::High);
+ severity_levels.insert("XXX".to_string(), Severity::Critical);
+ severity_levels.insert("PANIC".to_string(), Severity::High);
+ severity_levels.insert("UNWRAP".to_string(), Severity::Medium);
+ severity_levels.insert("UNSAFE".to_string(), Severity::High);
+
+ Self {
+ enabled_detectors: vec![
+ DetectorType::Todo,
+ DetectorType::Fixme,
+ ],
+ include_extensions: vec![
+ "rs".to_string(),
+ "py".to_string(),
+ "js".to_string(),
+ "ts".to_string(),
+ "java".to_string(),
+ "cpp".to_string(),
+ "c".to_string(),
+ "h".to_string(),
+ "go".to_string(),
+ "md".to_string(),
+ "txt".to_string(),
+ ],
+ exclude_extensions: vec![
+ "exe".to_string(),
+ "dll".to_string(),
+ "so".to_string(),
+ "bin".to_string(),
+ "png".to_string(),
+ "jpg".to_string(),
+ "jpeg".to_string(),
+ "gif".to_string(),
+ "pdf".to_string(),
+ "zip".to_string(),
+ ],
+ exclude_paths: vec![
+ "target/*".to_string(),
+ "node_modules/*".to_string(),
+ ".git/*".to_string(),
+ "*.lock".to_string(),
+ "vendor/*".to_string(),
+ "build/*".to_string(),
+ ],
+ max_file_size: Some(1024 * 1024), // 1MB default
+ custom_patterns: HashMap::new(),
+ severity_levels,
+ }
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/incremental.rs b/crates/core/src/incremental.rs
new file mode 100644
index 0000000..ebbc4f8
--- /dev/null
+++ b/crates/core/src/incremental.rs
@@ -0,0 +1,343 @@
+use crate::{Match, PatternDetector};
+use anyhow::Result;
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+/// File metadata for incremental scanning
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FileMetadata {
+ pub path: PathBuf,
+ pub modified_time: u64,
+ pub size: u64,
+ pub hash: Option,
+ pub last_scan_time: u64,
+ pub match_count: usize,
+}
+
+/// Incremental scan state persistence
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Default)]
+pub struct IncrementalState {
+ pub last_full_scan: u64,
+ pub file_metadata: HashMap,
+ pub scan_history: Vec,
+}
+
+/// Result of an incremental scan
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IncrementalScanResult {
+ pub timestamp: u64,
+ pub files_scanned: usize,
+ pub files_skipped: usize,
+ pub files_modified: usize,
+ pub files_added: usize,
+ pub files_removed: usize,
+ pub total_matches: usize,
+ pub scan_duration_ms: u64,
+}
+
+/// Incremental scanner that only scans changed files
+pub struct IncrementalScanner {
+ detectors: Vec>,
+ state: IncrementalState,
+ state_file: PathBuf,
+ force_rescan_threshold: u64, // Days after which to force full rescan
+}
+
+impl IncrementalScanner {
+ /// Create a new incremental scanner
+ pub fn new(
+ detectors: Vec>,
+ state_file: PathBuf,
+ ) -> Result {
+ let state = if state_file.exists() {
+ let content = std::fs::read_to_string(&state_file)?;
+ serde_json::from_str(&content).unwrap_or_default()
+ } else {
+ IncrementalState::default()
+ };
+
+ Ok(Self {
+ detectors,
+ state,
+ state_file,
+ force_rescan_threshold: 7, // 7 days
+ })
+ }
+
+ /// Perform incremental scan
+ pub fn scan_incremental(&mut self, root: &Path) -> Result<(Vec, IncrementalScanResult)> {
+ let start_time = std::time::Instant::now();
+ let scan_timestamp = SystemTime::now()
+ .duration_since(UNIX_EPOCH)?
+ .as_secs();
+
+ let mut all_matches = Vec::new();
+ let mut files_scanned = 0;
+ let mut files_skipped = 0;
+ let mut files_modified = 0;
+ let mut files_added = 0;
+ let mut files_removed = 0;
+
+ // Check if we need a full rescan
+ let days_since_full_scan = (scan_timestamp - self.state.last_full_scan) / (24 * 60 * 60);
+ let force_full_scan = days_since_full_scan > self.force_rescan_threshold;
+
+ if force_full_scan {
+ println!("🔄 Performing full rescan (last full scan: {} days ago)", days_since_full_scan);
+ self.state.last_full_scan = scan_timestamp;
+ self.state.file_metadata.clear();
+ }
+
+ // Collect current files
+ let current_files = self.collect_files(root)?;
+ let mut current_file_set = std::collections::HashSet::new();
+
+ for file_path in current_files {
+ current_file_set.insert(file_path.clone());
+
+ if let Some(metadata) = self.get_file_metadata(&file_path)? {
+ let existing_metadata = self.state.file_metadata.get(&file_path);
+
+ let needs_scan = match existing_metadata {
+ Some(existing) => {
+ // Check if file has been modified
+ existing.modified_time != metadata.modified_time ||
+ existing.size != metadata.size ||
+ force_full_scan
+ }
+ None => {
+ // New file
+ files_added += 1;
+ true
+ }
+ };
+
+ if needs_scan {
+ if existing_metadata.is_some() {
+ files_modified += 1;
+ }
+
+ // Scan the file
+ let content = std::fs::read_to_string(&file_path)?;
+ let file_matches: Vec = self
+ .detectors
+ .iter()
+ .flat_map(|detector| detector.detect(&content, &file_path))
+ .collect();
+
+ let updated_metadata = FileMetadata {
+ path: file_path.clone(),
+ modified_time: metadata.modified_time,
+ size: metadata.size,
+ hash: metadata.hash,
+ last_scan_time: scan_timestamp,
+ match_count: file_matches.len(),
+ };
+
+ self.state.file_metadata.insert(file_path, updated_metadata);
+ all_matches.extend(file_matches);
+ files_scanned += 1;
+ } else {
+ // File unchanged, use cached results
+ files_skipped += 1;
+
+ // For complete results, we'd need to store and retrieve cached matches
+ // For now, we'll just note that the file was skipped
+ }
+ }
+ }
+
+ // Find removed files
+ let existing_files: Vec = self.state.file_metadata.keys().cloned().collect();
+ for existing_file in existing_files {
+ if !current_file_set.contains(&existing_file) {
+ self.state.file_metadata.remove(&existing_file);
+ files_removed += 1;
+ }
+ }
+
+ let scan_duration = start_time.elapsed();
+ let result = IncrementalScanResult {
+ timestamp: scan_timestamp,
+ files_scanned,
+ files_skipped,
+ files_modified,
+ files_added,
+ files_removed,
+ total_matches: all_matches.len(),
+ scan_duration_ms: scan_duration.as_millis() as u64,
+ };
+
+ // Save state
+ self.save_state()?;
+
+ // Update scan history
+ self.state.scan_history.push(result.clone());
+ if self.state.scan_history.len() > 100 {
+ self.state.scan_history.remove(0); // Keep last 100 scans
+ }
+
+ println!("📊 Incremental scan completed:");
+ println!(" Files scanned: {} | Skipped: {} | Modified: {} | Added: {} | Removed: {}",
+ files_scanned, files_skipped, files_modified, files_added, files_removed);
+ println!(" Speed improvement: {:.1}x faster than full scan",
+ self.calculate_speedup(files_scanned, files_skipped));
+
+ Ok((all_matches, result))
+ }
+
+ /// Force a full rescan on next scan
+ pub fn force_full_rescan(&mut self) {
+ self.state.last_full_scan = 0;
+ self.state.file_metadata.clear();
+ }
+
+ /// Get incremental scan statistics
+ pub fn get_statistics(&self) -> IncrementalStats {
+ let recent_scans = self.state.scan_history.iter().rev().take(10).collect::>();
+
+ let avg_speedup = if !recent_scans.is_empty() {
+ recent_scans.iter()
+ .map(|scan| self.calculate_speedup(scan.files_scanned, scan.files_skipped))
+ .sum::() / recent_scans.len() as f64
+ } else {
+ 1.0
+ };
+
+ IncrementalStats {
+ total_files_tracked: self.state.file_metadata.len(),
+ last_scan_time: recent_scans.first().map(|s| s.timestamp),
+ average_speedup: avg_speedup,
+ cache_hit_rate: if !recent_scans.is_empty() {
+ let total_files = recent_scans.iter().map(|s| s.files_scanned + s.files_skipped).sum::();
+ let total_skipped = recent_scans.iter().map(|s| s.files_skipped).sum::();
+ if total_files > 0 {
+ total_skipped as f64 / total_files as f64
+ } else {
+ 0.0
+ }
+ } else {
+ 0.0
+ },
+ scan_history_count: self.state.scan_history.len(),
+ }
+ }
+
+ fn collect_files(&self, root: &Path) -> Result> {
+ use ignore::WalkBuilder;
+
+ let mut files = Vec::new();
+ for entry in WalkBuilder::new(root).build() {
+ let entry = entry?;
+ if entry.file_type().is_some_and(|ft| ft.is_file()) {
+ files.push(entry.path().to_path_buf());
+ }
+ }
+ Ok(files)
+ }
+
+ fn get_file_metadata(&self, path: &Path) -> Result> {
+ if let Ok(metadata) = std::fs::metadata(path) {
+ let modified_time = metadata
+ .modified()?
+ .duration_since(UNIX_EPOCH)?
+ .as_secs();
+
+ // Optional: Calculate file hash for more accurate change detection
+ let hash = if metadata.len() < 1024 * 1024 { // Only hash files < 1MB
+ self.calculate_file_hash(path).ok()
+ } else {
+ None
+ };
+
+ Ok(Some(FileMetadata {
+ path: path.to_path_buf(),
+ modified_time,
+ size: metadata.len(),
+ hash,
+ last_scan_time: 0,
+ match_count: 0,
+ }))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn calculate_file_hash(&self, path: &Path) -> Result {
+ use std::collections::hash_map::DefaultHasher;
+ use std::hash::{Hash, Hasher};
+
+ let content = std::fs::read(path)?;
+ let mut hasher = DefaultHasher::new();
+ content.hash(&mut hasher);
+ Ok(format!("{:x}", hasher.finish()))
+ }
+
+ fn calculate_speedup(&self, files_scanned: usize, files_skipped: usize) -> f64 {
+ let total_files = files_scanned + files_skipped;
+ if total_files > 0 && files_scanned > 0 {
+ total_files as f64 / files_scanned as f64
+ } else {
+ 1.0
+ }
+ }
+
+ fn save_state(&self) -> Result<()> {
+ let content = serde_json::to_string_pretty(&self.state)?;
+ std::fs::write(&self.state_file, content)?;
+ Ok(())
+ }
+}
+
+
+/// Statistics for incremental scanning
+#[derive(Debug, Clone)]
+pub struct IncrementalStats {
+ pub total_files_tracked: usize,
+ pub last_scan_time: Option,
+ pub average_speedup: f64,
+ pub cache_hit_rate: f64,
+ pub scan_history_count: usize,
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::detectors::TodoDetector;
+ use tempfile::{TempDir, NamedTempFile};
+
+ #[test]
+ fn test_incremental_scanner_creation() {
+ let temp_file = NamedTempFile::new().unwrap();
+ let detectors: Vec> = vec![Box::new(TodoDetector)];
+
+ let scanner = IncrementalScanner::new(detectors, temp_file.path().to_path_buf());
+ assert!(scanner.is_ok());
+ }
+
+ #[test]
+ fn test_file_metadata_tracking() {
+ let temp_dir = TempDir::new().unwrap();
+ let test_file = temp_dir.path().join("test.rs");
+ std::fs::write(&test_file, "// TODO: test").unwrap();
+
+ let temp_state = NamedTempFile::new().unwrap();
+ let detectors: Vec> = vec![Box::new(TodoDetector)];
+ let mut scanner = IncrementalScanner::new(detectors, temp_state.path().to_path_buf()).unwrap();
+
+ // First scan
+ let (matches1, result1) = scanner.scan_incremental(temp_dir.path()).unwrap();
+ assert_eq!(result1.files_added, 1);
+ assert_eq!(result1.files_scanned, 1);
+ assert_eq!(matches1.len(), 1);
+
+ // Second scan without changes - should skip file
+ let (_matches2, result2) = scanner.scan_incremental(temp_dir.path()).unwrap();
+ assert_eq!(result2.files_skipped, 1);
+ assert_eq!(result2.files_scanned, 0);
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
new file mode 100644
index 0000000..a8079ce
--- /dev/null
+++ b/crates/core/src/lib.rs
@@ -0,0 +1,186 @@
+use anyhow::Result;
+use dashmap::DashMap;
+use ignore::WalkBuilder;
+use rayon::prelude::*;
+use std::path::Path;
+
+pub mod config;
+pub mod detectors;
+pub mod detector_factory;
+pub mod enhanced_config;
+pub mod optimized_scanner;
+pub mod performance;
+pub mod incremental;
+pub mod distributed;
+pub mod custom_detectors;
+
+
+
+/// Represents a detected pattern match in a file.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
+pub struct Match {
+ /// The path to the file where the match was found.
+ pub file_path: String,
+ /// The line number (1-based) where the match starts.
+ pub line_number: usize,
+ /// The column number (1-based) where the match starts.
+ pub column: usize,
+ /// The type of pattern detected (e.g., "TODO", "FIXME").
+ pub pattern: String,
+ /// The matched text or a descriptive message.
+ pub message: String,
+}
+
+/// Severity levels for detected patterns.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
+pub enum Severity {
+ Info,
+ Low,
+ Medium,
+ High,
+ Critical,
+}
+
+/// Trait for detecting patterns in code content.
+/// Implementors should define how to find specific patterns like TODO or FIXME.
+pub trait PatternDetector: Send + Sync {
+ /// Detects patterns in the given content and returns a list of matches.
+ /// The file_path is provided for context, such as filtering by file type.
+ fn detect(&self, content: &str, file_path: &Path) -> Vec;
+}
+
+/// A scanner that uses parallel processing to scan codebases for patterns.
+pub struct Scanner {
+ detectors: Vec>,
+ cache: DashMap>,
+}
+
+impl Scanner {
+ /// Creates a new scanner with the given pattern detectors.
+ pub fn new(detectors: Vec>) -> Self {
+ Self {
+ detectors,
+ cache: DashMap::new(),
+ }
+ }
+
+ /// Scans the directory tree starting from the given root path.
+ /// Returns all matches found by the detectors.
+ /// Uses parallel processing for performance with improved load balancing and caching.
+ pub fn scan(&self, root: &Path) -> Result> {
+ let matches: Vec = WalkBuilder::new(root)
+ .build()
+ .par_bridge()
+ .filter_map(|entry| {
+ let entry = entry.ok()?;
+ let file_type = entry.file_type()?;
+ if file_type.is_file() {
+ let path = entry.path();
+ let path_str = path.to_string_lossy().to_string();
+ if let Some(cached) = self.cache.get(&path_str) {
+ Some(cached.clone())
+ } else {
+ let content = std::fs::read_to_string(path).ok()?;
+ let file_matches: Vec = self
+ .detectors
+ .par_iter()
+ .flat_map(|detector| detector.detect(&content, path))
+ .collect();
+ self.cache.insert(path_str, file_matches.clone());
+ Some(file_matches)
+ }
+ } else {
+ None
+ }
+ })
+ .flatten()
+ .collect();
+
+ Ok(matches)
+ }
+}
+
+// Re-export detectors and factory for convenience
+pub use detectors::*;
+pub use detector_factory::*;
+pub use enhanced_config::*;
+pub use optimized_scanner::*;
+pub use performance::*;
+pub use incremental::*;
+pub use distributed::*;
+pub use custom_detectors::*;
+
+
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::path::PathBuf;
+
+ #[test]
+ fn test_todo_detector() {
+ let detector = TodoDetector;
+ let content = "Some code\n// TODO: fix this\nMore code";
+ let path = PathBuf::from("test.rs");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].pattern, "TODO");
+ assert_eq!(matches[0].line_number, 2);
+ assert_eq!(matches[0].column, 4); // "// " is 3 chars, then TODO
+ assert!(matches[0].message.contains("TODO"));
+ }
+
+ #[test]
+ fn test_fixme_detector() {
+ let detector = FixmeDetector;
+ let content = "Code\nFIXME: issue here\nEnd";
+ let path = PathBuf::from("test.js");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 1);
+ assert_eq!(matches[0].pattern, "FIXME");
+ assert_eq!(matches[0].line_number, 2);
+ assert_eq!(matches[0].column, 1);
+ assert!(matches[0].message.contains("FIXME"));
+ }
+
+ #[test]
+ fn test_no_matches() {
+ let detector = TodoDetector;
+ let content = "No todos here";
+ let path = PathBuf::from("test.txt");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 0);
+ }
+
+ #[test]
+ fn test_multiple_matches() {
+ let detector = TodoDetector;
+ let content = "TODO\n// TODO again";
+ let path = PathBuf::from("test.rs");
+ let matches = detector.detect(content, &path);
+ assert_eq!(matches.len(), 2);
+ }
+
+ #[test]
+ fn test_scanner_with_detectors() {
+ let detectors: Vec> =
+ vec![Box::new(TodoDetector), Box::new(FixmeDetector)];
+ let scanner = Scanner::new(detectors);
+ // For testing, we can create a temp dir, but for simplicity, assume a test file exists.
+ // Since it's hard to create files in test, perhaps mock or use a known path.
+ // For now, skip integration test or use a string-based approach.
+ // Actually, since scan reads files, for unit test, perhaps test the logic separately.
+ // But to have coverage, perhaps create a temp file in test.
+ use tempfile::TempDir;
+ let temp_dir = TempDir::new().unwrap();
+ let file_path = temp_dir.path().join("test.rs");
+ std::fs::write(&file_path, "TODO: test\nFIXME: another").unwrap();
+ let matches = scanner.scan(temp_dir.path()).unwrap();
+ assert_eq!(matches.len(), 2);
+ // Sort by pattern for deterministic test
+ let mut sorted = matches;
+ sorted.sort_by(|a, b| a.pattern.cmp(&b.pattern));
+ assert_eq!(sorted[0].pattern, "FIXME");
+ assert_eq!(sorted[1].pattern, "TODO");
+ }
+}
diff --git a/crates/core/src/optimized_scanner.rs b/crates/core/src/optimized_scanner.rs
new file mode 100644
index 0000000..f35d5b6
--- /dev/null
+++ b/crates/core/src/optimized_scanner.rs
@@ -0,0 +1,354 @@
+use crate::{Match, PatternDetector};
+use anyhow::Result;
+use dashmap::DashMap;
+use ignore::WalkBuilder;
+use rayon::prelude::*;
+use std::path::Path;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::time::Instant;
+
+/// Performance metrics for scanning operations
+#[derive(Debug, Clone)]
+pub struct ScanMetrics {
+ pub total_files_scanned: usize,
+ pub total_lines_processed: usize,
+ pub total_matches_found: usize,
+ pub scan_duration_ms: u64,
+ pub cache_hits: usize,
+ pub cache_misses: usize,
+}
+
+/// Optimized scanner with performance enhancements
+pub struct OptimizedScanner {
+ detectors: Vec>,
+ cache: DashMap>,
+ file_cache: DashMap)>, // (modified_time, matches)
+ max_cache_size: usize,
+}
+
+impl OptimizedScanner {
+ /// Creates a new optimized scanner with the given pattern detectors
+ pub fn new(detectors: Vec>) -> Self {
+ Self {
+ detectors,
+ cache: DashMap::new(),
+ file_cache: DashMap::new(),
+ max_cache_size: 10000, // Maximum number of cached file results
+ }
+ }
+
+ /// Set maximum cache size
+ pub fn with_cache_size(mut self, size: usize) -> Self {
+ self.max_cache_size = size;
+ self
+ }
+
+ /// Optimized scan with performance improvements
+ pub fn scan_optimized(&self, root: &Path) -> Result<(Vec, ScanMetrics)> {
+ let start_time = Instant::now();
+ let files_processed = AtomicUsize::new(0);
+ let lines_processed = AtomicUsize::new(0);
+ let cache_hits = AtomicUsize::new(0);
+ let cache_misses = AtomicUsize::new(0);
+
+ // Pre-compile regex patterns and optimize file filtering
+ let matches: Vec = WalkBuilder::new(root)
+ .standard_filters(true) // Use gitignore, etc.
+ .build()
+ .par_bridge()
+ .filter_map(|entry| {
+ let entry = entry.ok()?;
+ let file_type = entry.file_type()?;
+
+ if !file_type.is_file() {
+ return None;
+ }
+
+ let path = entry.path();
+
+ // Skip binary files and large files early
+ if !self.should_scan_file(path) {
+ return None;
+ }
+
+ files_processed.fetch_add(1, Ordering::Relaxed);
+
+ let path_str = path.to_string_lossy().to_string();
+
+ // Check file-based cache with modification time
+ if let Some(cached_result) = self.get_cached_result(path, &path_str) {
+ cache_hits.fetch_add(1, Ordering::Relaxed);
+ return Some(cached_result);
+ }
+
+ cache_misses.fetch_add(1, Ordering::Relaxed);
+
+ // Read and process file
+ let content = std::fs::read_to_string(path).ok()?;
+ lines_processed.fetch_add(content.lines().count(), Ordering::Relaxed);
+
+ // Use optimized parallel processing for detectors
+ let file_matches: Vec = if self.detectors.len() > 3 {
+ // For many detectors, use parallel processing
+ self.detectors
+ .par_iter()
+ .flat_map(|detector| detector.detect(&content, path))
+ .collect()
+ } else {
+ // For few detectors, sequential is faster (less overhead)
+ self.detectors
+ .iter()
+ .flat_map(|detector| detector.detect(&content, path))
+ .collect()
+ };
+
+ // Cache the result with file modification time
+ self.cache_result(path, &path_str, &file_matches);
+
+ Some(file_matches)
+ })
+ .flatten()
+ .collect();
+
+ let duration = start_time.elapsed();
+
+ let metrics = ScanMetrics {
+ total_files_scanned: files_processed.load(Ordering::Relaxed),
+ total_lines_processed: lines_processed.load(Ordering::Relaxed),
+ total_matches_found: matches.len(),
+ scan_duration_ms: duration.as_millis() as u64,
+ cache_hits: cache_hits.load(Ordering::Relaxed),
+ cache_misses: cache_misses.load(Ordering::Relaxed),
+ };
+
+ Ok((matches, metrics))
+ }
+
+ /// Check if a file should be scanned based on size and type
+ fn should_scan_file(&self, path: &Path) -> bool {
+ // Check file extension
+ if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
+ match ext.to_lowercase().as_str() {
+ // Skip binary files
+ "exe" | "dll" | "so" | "dylib" | "bin" | "obj" | "o" | "a" | "lib" => return false,
+ // Skip image files
+ "png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" | "bmp" | "tiff" => return false,
+ // Skip compressed files
+ "zip" | "tar" | "gz" | "rar" | "7z" | "bz2" | "xz" => return false,
+ // Skip media files
+ "mp3" | "mp4" | "avi" | "mov" | "wav" | "flac" => return false,
+ _ => {}
+ }
+ }
+
+ // Check file size (skip files larger than 5MB)
+ if let Ok(metadata) = std::fs::metadata(path) {
+ if metadata.len() > 5 * 1024 * 1024 {
+ return false;
+ }
+ }
+
+ true
+ }
+
+ /// Get cached result if file hasn't been modified
+ fn get_cached_result(&self, path: &Path, path_str: &str) -> Option> {
+ if let Ok(metadata) = std::fs::metadata(path) {
+ if let Ok(modified) = metadata.modified() {
+ if let Some(cached_entry) = self.file_cache.get(path_str) {
+ let (cached_time, cached_matches) = cached_entry.value();
+ let modified_timestamp = modified
+ .duration_since(std::time::UNIX_EPOCH)
+ .ok()?
+ .as_secs();
+
+ if modified_timestamp == *cached_time {
+ return Some(cached_matches.clone());
+ }
+ }
+ }
+ }
+ None
+ }
+
+ /// Cache result with file modification time
+ fn cache_result(&self, path: &Path, path_str: &str, matches: &[Match]) {
+ // Manage cache size
+ if self.file_cache.len() >= self.max_cache_size {
+ // Remove some old entries (simple LRU-like behavior)
+ let keys_to_remove: Vec = self
+ .file_cache
+ .iter()
+ .take(self.max_cache_size / 4)
+ .map(|entry| entry.key().clone())
+ .collect();
+
+ for key in keys_to_remove {
+ self.file_cache.remove(&key);
+ }
+ }
+
+ if let Ok(metadata) = std::fs::metadata(path) {
+ if let Ok(modified) = metadata.modified() {
+ let modified_timestamp = modified
+ .duration_since(std::time::UNIX_EPOCH)
+ .map(|d| d.as_secs())
+ .unwrap_or(0);
+
+ self.file_cache.insert(
+ path_str.to_string(),
+ (modified_timestamp, matches.to_vec()),
+ );
+ }
+ }
+ }
+
+ /// Clear all caches
+ pub fn clear_cache(&self) {
+ self.cache.clear();
+ self.file_cache.clear();
+ }
+
+ /// Get cache statistics
+ pub fn cache_stats(&self) -> (usize, usize) {
+ (self.cache.len(), self.file_cache.len())
+ }
+}
+
+/// Memory-efficient streaming scanner for very large codebases
+pub struct StreamingScanner {
+ detectors: Vec>,
+ batch_size: usize,
+}
+
+impl StreamingScanner {
+ pub fn new(detectors: Vec>) -> Self {
+ Self {
+ detectors,
+ batch_size: 100, // Process files in batches
+ }
+ }
+
+ /// Scan with memory-efficient streaming
+ pub fn scan_streaming(&self, root: &Path, mut callback: F) -> Result
+ where
+ F: FnMut(Vec) -> Result<()>,
+ {
+ let start_time = Instant::now();
+ let mut total_files = 0;
+ let mut total_lines = 0;
+ let mut total_matches = 0;
+
+ let walker = WalkBuilder::new(root)
+ .standard_filters(true)
+ .build();
+
+ let mut file_batch = Vec::new();
+
+ for entry in walker {
+ let entry = entry?;
+ if entry.file_type().is_some_and(|ft| ft.is_file()) {
+ file_batch.push(entry.path().to_path_buf());
+
+ if file_batch.len() >= self.batch_size {
+ let (batch_matches, batch_lines) = self.process_batch(&file_batch)?;
+ total_files += file_batch.len();
+ total_lines += batch_lines;
+ total_matches += batch_matches.len();
+
+ callback(batch_matches)?;
+ file_batch.clear();
+ }
+ }
+ }
+
+ // Process remaining files
+ if !file_batch.is_empty() {
+ let (batch_matches, batch_lines) = self.process_batch(&file_batch)?;
+ total_files += file_batch.len();
+ total_lines += batch_lines;
+ total_matches += batch_matches.len();
+
+ callback(batch_matches)?;
+ }
+
+ let duration = start_time.elapsed();
+
+ Ok(ScanMetrics {
+ total_files_scanned: total_files,
+ total_lines_processed: total_lines,
+ total_matches_found: total_matches,
+ scan_duration_ms: duration.as_millis() as u64,
+ cache_hits: 0,
+ cache_misses: 0,
+ })
+ }
+
+ fn process_batch(&self, files: &[std::path::PathBuf]) -> Result<(Vec, usize)> {
+ let results: Vec<(Vec, usize)> = files
+ .par_iter()
+ .filter_map(|path| {
+ let content = std::fs::read_to_string(path).ok()?;
+ let line_count = content.lines().count();
+
+ let matches: Vec = self
+ .detectors
+ .iter()
+ .flat_map(|detector| detector.detect(&content, path))
+ .collect();
+
+ Some((matches, line_count))
+ })
+ .collect();
+
+ let all_matches: Vec = results.iter().flat_map(|(m, _)| m.clone()).collect();
+ let total_lines: usize = results.iter().map(|(_, l)| *l).sum();
+
+ Ok((all_matches, total_lines))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::detectors::*;
+ use tempfile::TempDir;
+
+ #[test]
+ fn test_optimized_scanner() {
+ let temp_dir = TempDir::new().unwrap();
+ let file_path = temp_dir.path().join("test.rs");
+ std::fs::write(&file_path, "// TODO: test\n// FIXME: another").unwrap();
+
+ let detectors: Vec> = vec![
+ Box::new(TodoDetector),
+ Box::new(FixmeDetector),
+ ];
+
+ let scanner = OptimizedScanner::new(detectors);
+ let (matches, metrics) = scanner.scan_optimized(temp_dir.path()).unwrap();
+
+ assert_eq!(matches.len(), 2);
+ assert_eq!(metrics.total_files_scanned, 1);
+ assert!(metrics.scan_duration_ms > 0);
+ }
+
+ #[test]
+ fn test_caching() {
+ let temp_dir = TempDir::new().unwrap();
+ let file_path = temp_dir.path().join("test.rs");
+ std::fs::write(&file_path, "// TODO: test").unwrap();
+
+ let detectors: Vec> = vec![Box::new(TodoDetector)];
+ let scanner = OptimizedScanner::new(detectors);
+
+ // First scan
+ let (matches1, _metrics1) = scanner.scan_optimized(temp_dir.path()).unwrap();
+
+ // Second scan should use cache
+ let (matches2, metrics2) = scanner.scan_optimized(temp_dir.path()).unwrap();
+
+ assert_eq!(matches1.len(), matches2.len());
+ assert!(metrics2.cache_hits > 0);
+ }
+}
\ No newline at end of file
diff --git a/crates/core/src/performance.rs b/crates/core/src/performance.rs
new file mode 100644
index 0000000..a1c99a1
--- /dev/null
+++ b/crates/core/src/performance.rs
@@ -0,0 +1,300 @@
+use std::collections::HashMap;
+use std::time::{Duration, Instant};
+
+/// Performance profiler for tracking operation timings
+#[derive(Debug, Clone)]
+pub struct PerformanceProfiler {
+ timings: HashMap>,
+ start_times: HashMap,
+}
+
+impl PerformanceProfiler {
+ pub fn new() -> Self {
+ Self {
+ timings: HashMap::new(),
+ start_times: HashMap::new(),
+ }
+ }
+
+ /// Start timing an operation
+ pub fn start(&mut self, operation: &str) {
+ self.start_times.insert(operation.to_string(), Instant::now());
+ }
+
+ /// End timing an operation
+ pub fn end(&mut self, operation: &str) {
+ if let Some(start_time) = self.start_times.remove(operation) {
+ let duration = start_time.elapsed();
+ self.timings
+ .entry(operation.to_string())
+ .or_default()
+ .push(duration);
+ }
+ }
+
+ /// Get average duration for an operation
+ pub fn average_duration(&self, operation: &str) -> Option {
+ let durations = self.timings.get(operation)?;
+ if durations.is_empty() {
+ return None;
+ }
+
+ let total: Duration = durations.iter().sum();
+ Some(total / durations.len() as u32)
+ }
+
+ /// Get total duration for an operation
+ pub fn total_duration(&self, operation: &str) -> Option {
+ let durations = self.timings.get(operation)?;
+ Some(durations.iter().sum())
+ }
+
+ /// Get operation count
+ pub fn operation_count(&self, operation: &str) -> usize {
+ self.timings.get(operation).map_or(0, |d| d.len())
+ }
+
+ /// Generate performance report
+ pub fn report(&self) -> String {
+ let mut report = String::from("Performance Report:\n");
+ report.push_str("==================\n\n");
+
+ for (operation, durations) in &self.timings {
+ if durations.is_empty() {
+ continue;
+ }
+
+ let total: Duration = durations.iter().sum();
+ let average = total / durations.len() as u32;
+ let min = *durations.iter().min().unwrap();
+ let max = *durations.iter().max().unwrap();
+
+ report.push_str(&format!(
+ "{}: {} calls\n Total: {:?}\n Average: {:?}\n Min: {:?}\n Max: {:?}\n\n",
+ operation,
+ durations.len(),
+ total,
+ average,
+ min,
+ max
+ ));
+ }
+
+ report
+ }
+
+ /// Clear all timings
+ pub fn clear(&mut self) {
+ self.timings.clear();
+ self.start_times.clear();
+ }
+}
+
+impl Default for PerformanceProfiler {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Memory usage tracker
+#[derive(Debug, Clone)]
+pub struct MemoryTracker {
+ peak_memory: usize,
+ current_memory: usize,
+}
+
+impl MemoryTracker {
+ pub fn new() -> Self {
+ Self {
+ peak_memory: 0,
+ current_memory: 0,
+ }
+ }
+
+ /// Track memory allocation
+ pub fn allocate(&mut self, size: usize) {
+ self.current_memory += size;
+ if self.current_memory > self.peak_memory {
+ self.peak_memory = self.current_memory;
+ }
+ }
+
+ /// Track memory deallocation
+ pub fn deallocate(&mut self, size: usize) {
+ self.current_memory = self.current_memory.saturating_sub(size);
+ }
+
+ /// Get current memory usage
+ pub fn current_usage(&self) -> usize {
+ self.current_memory
+ }
+
+ /// Get peak memory usage
+ pub fn peak_usage(&self) -> usize {
+ self.peak_memory
+ }
+
+ /// Reset tracking
+ pub fn reset(&mut self) {
+ self.current_memory = 0;
+ self.peak_memory = 0;
+ }
+}
+
+impl Default for MemoryTracker {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Input stats for performance calculation
+#[derive(Debug, Clone)]
+pub struct ScanStats {
+ pub scan_duration: Duration,
+ pub total_files: usize,
+ pub total_lines: usize,
+ pub total_matches: usize,
+ pub cache_hits: usize,
+ pub cache_total: usize,
+ pub memory_usage_bytes: usize,
+ pub thread_count: usize,
+}
+
+/// Comprehensive performance metrics
+#[derive(Debug, Clone)]
+pub struct PerformanceMetrics {
+ pub scan_duration: Duration,
+ pub files_per_second: f64,
+ pub lines_per_second: f64,
+ pub matches_per_second: f64,
+ pub cache_hit_rate: f64,
+ pub memory_usage_mb: f64,
+ pub parallelism_efficiency: f64,
+}
+
+impl PerformanceMetrics {
+ pub fn calculate(stats: ScanStats) -> Self {
+ let duration_secs = stats.scan_duration.as_secs_f64();
+
+ let files_per_second = if duration_secs > 0.0 {
+ stats.total_files as f64 / duration_secs
+ } else {
+ 0.0
+ };
+
+ let lines_per_second = if duration_secs > 0.0 {
+ stats.total_lines as f64 / duration_secs
+ } else {
+ 0.0
+ };
+
+ let matches_per_second = if duration_secs > 0.0 {
+ stats.total_matches as f64 / duration_secs
+ } else {
+ 0.0
+ };
+
+ let cache_hit_rate = if stats.cache_total > 0 {
+ stats.cache_hits as f64 / stats.cache_total as f64
+ } else {
+ 0.0
+ };
+
+ let memory_usage_mb = stats.memory_usage_bytes as f64 / (1024.0 * 1024.0);
+
+ // Simple parallelism efficiency metric
+ let ideal_duration = duration_secs * stats.thread_count as f64;
+ let parallelism_efficiency = if ideal_duration > 0.0 {
+ (duration_secs / ideal_duration).min(1.0)
+ } else {
+ 0.0
+ };
+
+ Self {
+ scan_duration: stats.scan_duration,
+ files_per_second,
+ lines_per_second,
+ matches_per_second,
+ cache_hit_rate,
+ memory_usage_mb,
+ parallelism_efficiency,
+ }
+ }
+
+ pub fn report(&self) -> String {
+ format!(
+ "Performance Metrics:\n\
+ ===================\n\
+ Scan Duration: {:?}\n\
+ Files/sec: {:.2}\n\
+ Lines/sec: {:.2}\n\
+ Matches/sec: {:.2}\n\
+ Cache Hit Rate: {:.2}%\n\
+ Memory Usage: {:.2} MB\n\
+ Parallelism Efficiency: {:.2}%\n",
+ self.scan_duration,
+ self.files_per_second,
+ self.lines_per_second,
+ self.matches_per_second,
+ self.cache_hit_rate * 100.0,
+ self.memory_usage_mb,
+ self.parallelism_efficiency * 100.0
+ )
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::thread;
+
+ #[test]
+ fn test_performance_profiler() {
+ let mut profiler = PerformanceProfiler::new();
+
+ profiler.start("test_operation");
+ thread::sleep(Duration::from_millis(10));
+ profiler.end("test_operation");
+
+ assert!(profiler.average_duration("test_operation").is_some());
+ assert_eq!(profiler.operation_count("test_operation"), 1);
+ }
+
+ #[test]
+ fn test_memory_tracker() {
+ let mut tracker = MemoryTracker::new();
+
+ tracker.allocate(1024);
+ assert_eq!(tracker.current_usage(), 1024);
+ assert_eq!(tracker.peak_usage(), 1024);
+
+ tracker.allocate(512);
+ assert_eq!(tracker.current_usage(), 1536);
+ assert_eq!(tracker.peak_usage(), 1536);
+
+ tracker.deallocate(1024);
+ assert_eq!(tracker.current_usage(), 512);
+ assert_eq!(tracker.peak_usage(), 1536); // Peak should remain
+ }
+
+ #[test]
+ fn test_performance_metrics() {
+ let stats = ScanStats {
+ scan_duration: Duration::from_secs(2),
+ total_files: 100,
+ total_lines: 10000,
+ total_matches: 50,
+ cache_hits: 80,
+ cache_total: 100,
+ memory_usage_bytes: 1024 * 1024,
+ thread_count: 4,
+ };
+ let metrics = PerformanceMetrics::calculate(stats);
+
+ assert_eq!(metrics.files_per_second, 50.0);
+ assert_eq!(metrics.lines_per_second, 5000.0);
+ assert_eq!(metrics.matches_per_second, 25.0);
+ assert_eq!(metrics.cache_hit_rate, 0.8);
+ assert_eq!(metrics.memory_usage_mb, 1.0);
+ }
+}
\ No newline at end of file
diff --git a/crates/output/Cargo.toml b/crates/output/Cargo.toml
new file mode 100644
index 0000000..34349ba
--- /dev/null
+++ b/crates/output/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "code-guardian-output"
+version = "0.1.0-alpha"
+edition = "2021"
+
+[dependencies]
+code-guardian-core = { path = "../core" }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+serde_yaml = { workspace = true }
+thiserror = { workspace = true }
+anyhow = { workspace = true }
+chrono = { workspace = true }
+colored = { workspace = true }
+comfy-table = { workspace = true }
+csv = "1.1"
+
+[dev-dependencies]
+proptest = { workspace = true }
diff --git a/crates/output/src/formatters/csv.rs b/crates/output/src/formatters/csv.rs
new file mode 100644
index 0000000..70256e4
--- /dev/null
+++ b/crates/output/src/formatters/csv.rs
@@ -0,0 +1,138 @@
+use super::Formatter;
+use code_guardian_core::Match;
+
+/// Formatter that outputs matches in CSV format.
+/// Includes headers for spreadsheet compatibility.
+pub struct CsvFormatter;
+
+impl Formatter for CsvFormatter {
+ fn format(&self, matches: &[Match]) -> String {
+ let mut wtr = csv::Writer::from_writer(vec![]);
+ wtr.write_record(["file_path", "line_number", "column", "pattern", "message"])
+ .unwrap();
+
+ for m in matches {
+ wtr.write_record([
+ &m.file_path,
+ &m.line_number.to_string(),
+ &m.column.to_string(),
+ &m.pattern,
+ &m.message,
+ ])
+ .unwrap();
+ }
+
+ wtr.flush().unwrap();
+ String::from_utf8(wtr.into_inner().unwrap()).unwrap()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_empty_matches() {
+ let formatter = CsvFormatter;
+ let matches = vec![];
+ let output = formatter.format(&matches);
+ let lines: Vec<&str> = output.lines().collect();
+ assert_eq!(lines.len(), 1); // Only header
+ assert!(lines[0].contains("file_path,line_number,column,pattern,message"));
+ }
+
+ #[test]
+ fn test_single_match() {
+ let formatter = CsvFormatter;
+ let matches = vec![Match {
+ file_path: "test.rs".to_string(),
+ line_number: 1,
+ column: 1,
+ pattern: "TODO".to_string(),
+ message: "TODO: fix this".to_string(),
+ }];
+ let output = formatter.format(&matches);
+ let lines: Vec<&str> = output.lines().collect();
+ assert_eq!(lines.len(), 2);
+ assert!(lines[1].contains("test.rs,1,1,TODO,TODO: fix this"));
+ }
+
+ #[test]
+ fn test_multiple_matches() {
+ let formatter = CsvFormatter;
+ let matches = vec![
+ Match {
+ file_path: "test.rs".to_string(),
+ line_number: 1,
+ column: 1,
+ pattern: "TODO".to_string(),
+ message: "TODO".to_string(),
+ },
+ Match {
+ file_path: "test.js".to_string(),
+ line_number: 2,
+ column: 3,
+ pattern: "FIXME".to_string(),
+ message: "FIXME".to_string(),
+ },
+ ];
+ let output = formatter.format(&matches);
+ let lines: Vec<&str> = output.lines().collect();
+ assert_eq!(lines.len(), 3);
+ assert!(lines[1].contains("test.rs"));
+ assert!(lines[2].contains("test.js"));
+ }
+
+ #[test]
+ fn test_csv_escaping() {
+ let formatter = CsvFormatter;
+ let matches = vec![Match {
+ file_path: "test,file.rs".to_string(),
+ line_number: 1,
+ column: 1,
+ pattern: "TODO".to_string(),
+ message: "TODO, with comma".to_string(),
+ }];
+ let output = formatter.format(&matches);
+ let lines: Vec<&str> = output.lines().collect();
+ assert!(lines[1].contains("\"test,file.rs\""));
+ assert!(lines[1].contains("\"TODO, with comma\""));
+ }
+}
+
+#[cfg(test)]
+mod proptest_tests {
+ use super::*;
+ use proptest::prelude::*;
+
+ fn arb_match() -> impl Strategy {
+ ("[a-zA-Z0-9_.]+", 1..10000usize, 1..10000usize, "[A-Z]+", ".*").prop_map(|(fp, ln, col, pat, msg)| Match {
+ file_path: fp.to_string(),
+ line_number: ln,
+ column: col,
+ pattern: pat.to_string(),
+ message: msg.to_string(),
+ })
+ }
+
+ proptest! {
+ #[test]
+ fn test_csv_formatter_arbitrary_matches(matches in proptest::collection::vec(arb_match(), 0..10)) {
+ let formatter = CsvFormatter;
+ let output = formatter.format(&matches);
+ // Check that it's valid CSV
+ let mut rdr = csv::Reader::from_reader(output.as_bytes());
+ let records: Vec<_> = rdr.records().collect();
+ prop_assert_eq!(records.len(), matches.len());
+ for (i, record) in records.into_iter().enumerate() {
+ let record = record.unwrap();
+ prop_assert_eq!(record.len(), 5);
+ prop_assert_eq!(record[0].to_string(), matches[i].file_path.clone());
+ prop_assert_eq!(record[1].to_string(), matches[i].line_number.to_string());
+ prop_assert_eq!(record[2].to_string(), matches[i].column.to_string());
+ prop_assert_eq!(record[3].to_string(), matches[i].pattern.clone());
+ prop_assert_eq!(record[4].to_string(), matches[i].message.clone());
+ }
+ }
+ }
+}
diff --git a/crates/output/src/formatters/html.rs b/crates/output/src/formatters/html.rs
new file mode 100644
index 0000000..89fad8c
--- /dev/null
+++ b/crates/output/src/formatters/html.rs
@@ -0,0 +1,178 @@
+use super::Formatter;
+use code_guardian_core::Match;
+
+/// Formatter that outputs matches in HTML table format.
+/// Includes basic HTML structure for standalone display.
+pub struct HtmlFormatter;
+
+impl Formatter for HtmlFormatter {
+ fn format(&self, matches: &[Match]) -> String {
+ let mut output = String::from(
+ r#"
+
+
+ Code Guardian Matches
+
+
+
+ Code Guardian Scan Results
+
+
+
+ File
+ Line
+ Column
+ Pattern
+ Message
+
+
+
+"#,
+ );
+
+ if matches.is_empty() {
+ output.push_str(" No matches found. \n");
+ } else {
+ for m in matches {
+ output.push_str(&format!(
+ " \n {} \n {} \n {} \n {} \n {} \n \n",
+ html_escape(&m.file_path),
+ m.line_number,
+ m.column,
+ html_escape(&m.pattern),
+ html_escape(&m.message)
+ ));
+ }
+ }
+
+ output.push_str(
+ r#"
+
+
+
+"#,
+ );
+
+ output
+ }
+}
+
+/// Escapes HTML special characters.
+fn html_escape(text: &str) -> String {
+ text.replace('&', "&")
+ .replace('<', "<")
+ .replace('>', ">")
+ .replace('"', """)
+ .replace('\'', "'")
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_empty_matches() {
+ let formatter = HtmlFormatter;
+ let matches = vec![];
+ let output = formatter.format(&matches);
+ assert!(output.contains(""));
+ assert!(output.contains("No matches found."));
+ assert!(output.contains(""));
+ }
+
+ #[test]
+ fn test_single_match() {
+ let formatter = HtmlFormatter;
+ let matches = vec![Match {
+ file_path: "test.rs".to_string(),
+ line_number: 1,
+ column: 1,
+ pattern: "TODO".to_string(),
+ message: "TODO: fix this".to_string(),
+ }];
+ let output = formatter.format(&matches);
+ assert!(output.contains(""));
+ assert!(output.contains("test.rs "));
+ assert!(output.contains("1 "));
+ assert!(output.contains("TODO "));
+ assert!(output.contains("TODO: fix this "));
+ assert!(output.contains("