1+ #!/usr/bin/env python3
2+ """
3+ cross_version_link_linter.py
4+
5+ Detects and prevents cross-version links in CockroachDB documentation.
6+ This linter runs on PRs to ensure documentation links stay within the same version.
7+
8+ Usage:
9+ python cross_version_link_linter.py <file1> <file2> ...
10+ """
11+
12+ import re
13+ import sys
14+ import json
15+ import os
16+ from pathlib import Path
17+ from typing import List , Dict , Optional , Tuple
18+
19+
20+ class CrossVersionLinkLinter :
21+ """Linter to detect cross-version links in CockroachDB documentation."""
22+
23+ # Regex patterns to detect different types of cross-version links
24+ # Note: Version capture group should always be the first group for consistency
25+ PATTERNS = {
26+ 'liquid_link' : r'{%\s*link\s+(v\d+\.\d+)/[^%]*%}' ,
27+ 'include_cached' : r'{%\s*include_cached\s+(v\d+\.\d+)/[^%]*%}' ,
28+ 'include' : r'{%\s*include\s+(v\d+\.\d+)/[^%]*%}' ,
29+ 'image_ref' : r"{{\s*'images/(v\d+\.\d+)/[^']*'\s*\|\s*relative_url\s*}}" ,
30+ 'markdown_relative' : r'\[[^\]]+\]\((?:\.\./)+(v\d+\.\d+)/[^\)]+\)' ,
31+ 'markdown_absolute' : r'\[[^\]]+\]\(/docs/(v\d+\.\d+)/[^\)]+\)' ,
32+ 'html_link' : r'<a[^>]*href=["\']/?(?:docs/)?(v\d+\.\d+)/[^"\']+["\'][^>]*>' ,
33+ }
34+
35+ # Patterns that are allowed (using dynamic version variables)
36+ ALLOWED_PATTERNS = [
37+ r'page\.version\.version' ,
38+ r'site\.versions(?:\.|\[)' ,
39+ r'include\.version' ,
40+ r'page\.release_info' ,
41+ ]
42+
43+ def __init__ (self , verbose : bool = False ):
44+ """Initialize the linter."""
45+ self .verbose = verbose
46+ self .violations = []
47+
48+ def extract_file_version (self , filepath : Path ) -> Optional [str ]:
49+ """
50+ Extract the version from a file path.
51+
52+ Args:
53+ filepath: Path to the file
54+
55+ Returns:
56+ Version string (e.g., 'v25.4') or None if no version found
57+ """
58+ parts = filepath .parts
59+ for part in parts :
60+ if re .match (r'^v\d+\.\d+$' , part ):
61+ return part
62+ return None
63+
64+ def is_allowed_pattern (self , text : str ) -> bool :
65+ """
66+ Check if the text contains an allowed dynamic version pattern.
67+
68+ Args:
69+ text: The text to check
70+
71+ Returns:
72+ True if the pattern is allowed (uses dynamic variables)
73+ """
74+ for pattern in self .ALLOWED_PATTERNS :
75+ if re .search (pattern , text ):
76+ return True
77+ return False
78+
79+ def generate_fix (self , pattern_type : str , original : str , source_version : str ) -> str :
80+ """
81+ Generate a fix suggestion for the violation.
82+
83+ Args:
84+ pattern_type: Type of the pattern that was matched
85+ original: The original problematic text
86+ source_version: The version the file belongs to
87+
88+ Returns:
89+ Suggested fix for the violation
90+ """
91+ if pattern_type == 'liquid_link' :
92+ return re .sub (r'v\d+\.\d+' , '{{ page.version.version }}' , original )
93+ elif pattern_type in ['include_cached' , 'include' ]:
94+ # For includes, we need to be careful about the path structure
95+ return re .sub (r'v\d+\.\d+' , '{{ page.version.version }}' , original )
96+ elif pattern_type == 'image_ref' :
97+ # Fix image references to use dynamic version
98+ return re .sub (r'(images/)(v\d+\.\d+)(/)' , r'\1{{ page.version.version }}\3' , original )
99+ elif pattern_type in ['markdown_relative' , 'markdown_absolute' ]:
100+ # For markdown links, suggest using Jekyll link syntax
101+ link_match = re .search (r'\[([^\]]+)\]\([^\)]+\)' , original )
102+ if link_match :
103+ link_text = link_match .group (1 )
104+ # Extract the full path after the version token
105+ path_match = re .search (r'v\d+\.\d+/([^\)]+)' , original )
106+ if path_match :
107+ page_path = path_match .group (1 ) # e.g. 'admin/restore.md'
108+ return f'[{ link_text } ]({{% link {{{{ page.version.version }}}}/{ page_path } %}})'
109+ return "Use {% link {{ page.version.version }}/page.md %} syntax"
110+ else :
111+ return "Use appropriate version variable or relative path within same version"
112+
113+ def find_violations (self , filepath : Path , content : str ) -> List [Dict ]:
114+ """
115+ Find all cross-version link violations in a file.
116+
117+ Args:
118+ filepath: Path to the file
119+ content: File content
120+
121+ Returns:
122+ List of violation dictionaries
123+ """
124+ source_version = self .extract_file_version (filepath )
125+ if not source_version :
126+ # File doesn't have a version in its path, skip checking
127+ if self .verbose :
128+ print (f"Skipping { filepath } : No version in path" )
129+ return []
130+
131+ violations = []
132+ lines = content .split ('\n ' )
133+
134+ for pattern_name , pattern in self .PATTERNS .items ():
135+ for match in re .finditer (pattern , content , re .MULTILINE | re .IGNORECASE ):
136+ # Check if this is an allowed pattern (uses dynamic variables)
137+ if self .is_allowed_pattern (match .group (0 )):
138+ continue
139+
140+ # Extract target version from the match
141+ target_version = match .group (1 )
142+
143+ # Check if it's a cross-version link
144+ if target_version != source_version :
145+ # Calculate line number
146+ line_num = content [:match .start ()].count ('\n ' ) + 1
147+
148+ # Get the actual line content for context
149+ line_content = lines [line_num - 1 ].strip () if line_num <= len (lines ) else ""
150+
151+ # Generate fix suggestion
152+ fix = self .generate_fix (pattern_name , match .group (0 ), source_version )
153+
154+ violations .append ({
155+ 'file' : str (filepath ),
156+ 'line' : line_num ,
157+ 'source_version' : source_version ,
158+ 'target_version' : target_version ,
159+ 'link' : match .group (0 ),
160+ 'line_content' : line_content ,
161+ 'fix' : fix ,
162+ 'type' : pattern_name
163+ })
164+
165+ return violations
166+
167+ def check_file (self , filepath : str ) -> List [Dict ]:
168+ """
169+ Check a single file for cross-version link violations.
170+
171+ Args:
172+ filepath: Path to the file to check
173+
174+ Returns:
175+ List of violations found
176+ """
177+ path = Path (filepath )
178+
179+ # Skip non-markdown files
180+ if not path .suffix in ['.md' , '.markdown' ]:
181+ if self .verbose :
182+ print (f"Skipping non-markdown file: { filepath } " )
183+ return []
184+
185+ # Skip if file doesn't exist
186+ if not path .exists ():
187+ if self .verbose :
188+ print (f"File not found: { filepath } " )
189+ return []
190+
191+ try :
192+ with open (path , 'r' , encoding = 'utf-8' ) as f :
193+ content = f .read ()
194+ except Exception as e :
195+ print (f"Error reading { filepath } : { e } " , file = sys .stderr )
196+ return []
197+
198+ violations = self .find_violations (path , content )
199+ self .violations .extend (violations )
200+ return violations
201+
202+ def format_violations_for_github (self ) -> str :
203+ """
204+ Format violations as a GitHub comment.
205+
206+ Returns:
207+ Formatted markdown string for GitHub comment
208+ """
209+ if not self .violations :
210+ return "✅ **Cross-Version Link Check Passed**\n \n No cross-version links detected."
211+
212+ # Group violations by file
213+ violations_by_file = {}
214+ for v in self .violations :
215+ file_path = v ['file' ]
216+ if file_path not in violations_by_file :
217+ violations_by_file [file_path ] = []
218+ violations_by_file [file_path ].append (v )
219+
220+ # Build the comment
221+ lines = [
222+ "❌ **Cross-Version Link Check Failed**" ,
223+ "" ,
224+ f"Found { len (self .violations )} cross-version link violation{ 's' if len (self .violations ) > 1 else '' } that must be fixed:" ,
225+ ""
226+ ]
227+
228+ for file_path , file_violations in violations_by_file .items ():
229+ lines .append ("---" )
230+ lines .append ("" )
231+ lines .append (f"### File: `{ file_path } `" )
232+ lines .append ("" )
233+
234+ for v in file_violations :
235+ lines .append (f"**Line { v ['line' ]} **: Link from { v ['source_version' ]} to { v ['target_version' ]} detected" )
236+ lines .append (f"```" )
237+ lines .append (f"{ v ['link' ]} " )
238+ lines .append (f"```" )
239+ lines .append (f"**Suggested fix:**" )
240+ lines .append (f"```" )
241+ lines .append (f"{ v ['fix' ]} " )
242+ lines .append (f"```" )
243+ lines .append ("" )
244+
245+ lines .extend ([
246+ "---" ,
247+ "" ,
248+ "**Action Required**: Please update all cross-version links to use version variables or ensure links stay within the same version." ,
249+ "" ,
250+ "For more information about proper link formatting, see the [CockroachDB Docs Style Guide](https://github.com/cockroachdb/docs/blob/main/StyleGuide.md#links)."
251+ ])
252+
253+ return "\n " .join (lines )
254+
255+ def print_violations (self ):
256+ """Print violations to stderr in a human-readable format."""
257+ if not self .violations :
258+ return
259+
260+ print ("\n ❌ Cross-version link violations found:\n " , file = sys .stderr )
261+
262+ for v in self .violations :
263+ print (f"File: { v ['file' ]} " , file = sys .stderr )
264+ print (f" Line { v ['line' ]} : Link from { v ['source_version' ]} to { v ['target_version' ]} " , file = sys .stderr )
265+ print (f" Problematic link: { v ['link' ]} " , file = sys .stderr )
266+ print (f" Fix: { v ['fix' ]} " , file = sys .stderr )
267+ print (file = sys .stderr )
268+
269+
270+ def main ():
271+ """Main entry point for the linter."""
272+ if len (sys .argv ) < 2 :
273+ print ("Usage: python cross_version_link_linter.py <file1> [file2] ..." , file = sys .stderr )
274+ sys .exit (1 )
275+
276+ # Parse command line arguments
277+ verbose = os .environ .get ('VERBOSE' , '' ).lower () in ['true' , '1' , 'yes' ]
278+
279+ # Get list of files to check
280+ # Files can be passed as separate arguments or as a single space-separated string
281+ files = []
282+ for arg in sys .argv [1 :]:
283+ if ' ' in arg :
284+ # Split space-separated list
285+ files .extend (arg .split ())
286+ else :
287+ files .append (arg )
288+
289+ # Initialize linter
290+ linter = CrossVersionLinkLinter (verbose = verbose )
291+
292+ # Check each file
293+ for filepath in files :
294+ if verbose :
295+ print (f"Checking { filepath } ..." )
296+ linter .check_file (filepath )
297+
298+ # Print violations to stderr
299+ linter .print_violations ()
300+
301+ # If running in GitHub Actions, write comment to file
302+ if os .environ .get ('GITHUB_ACTIONS' ):
303+ comment = linter .format_violations_for_github ()
304+ comment_file = os .environ .get ('GITHUB_STEP_SUMMARY' )
305+ if comment_file :
306+ with open (comment_file , 'w' ) as f :
307+ f .write (comment )
308+
309+ # Also write to a file for the PR comment action
310+ with open ('pr-comment.md' , 'w' ) as f :
311+ f .write (comment )
312+
313+ # Exit with appropriate code
314+ if linter .violations :
315+ print (f"\n ❌ Found { len (linter .violations )} cross-version link violation(s)" , file = sys .stderr )
316+ sys .exit (1 )
317+ else :
318+ print ("✅ No cross-version link violations found" )
319+ sys .exit (0 )
320+
321+
322+ if __name__ == '__main__' :
323+ main ()
0 commit comments