Skip to content

Commit 2574b4f

Browse files
authored
Merge pull request #21923 from cockroachdb/add-cross-version-link-linter
Add cross-version link linter to prevent documentation version mixing
2 parents 03c398c + df8a8d5 commit 2574b4f

File tree

3 files changed

+852
-0
lines changed

3 files changed

+852
-0
lines changed
Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
#!/usr/bin/env python3
2+
"""
3+
cross_version_link_linter.py
4+
5+
Detects and prevents cross-version links in CockroachDB documentation.
6+
This linter runs on PRs to ensure documentation links stay within the same version.
7+
8+
Usage:
9+
python cross_version_link_linter.py <file1> <file2> ...
10+
"""
11+
12+
import re
13+
import sys
14+
import json
15+
import os
16+
from pathlib import Path
17+
from typing import List, Dict, Optional, Tuple
18+
19+
20+
class CrossVersionLinkLinter:
21+
"""Linter to detect cross-version links in CockroachDB documentation."""
22+
23+
# Regex patterns to detect different types of cross-version links
24+
# Note: Version capture group should always be the first group for consistency
25+
PATTERNS = {
26+
'liquid_link': r'{%\s*link\s+(v\d+\.\d+)/[^%]*%}',
27+
'include_cached': r'{%\s*include_cached\s+(v\d+\.\d+)/[^%]*%}',
28+
'include': r'{%\s*include\s+(v\d+\.\d+)/[^%]*%}',
29+
'image_ref': r"{{\s*'images/(v\d+\.\d+)/[^']*'\s*\|\s*relative_url\s*}}",
30+
'markdown_relative': r'\[[^\]]+\]\((?:\.\./)+(v\d+\.\d+)/[^\)]+\)',
31+
'markdown_absolute': r'\[[^\]]+\]\(/docs/(v\d+\.\d+)/[^\)]+\)',
32+
'html_link': r'<a[^>]*href=["\']/?(?:docs/)?(v\d+\.\d+)/[^"\']+["\'][^>]*>',
33+
}
34+
35+
# Patterns that are allowed (using dynamic version variables)
36+
ALLOWED_PATTERNS = [
37+
r'page\.version\.version',
38+
r'site\.versions(?:\.|\[)',
39+
r'include\.version',
40+
r'page\.release_info',
41+
]
42+
43+
def __init__(self, verbose: bool = False):
44+
"""Initialize the linter."""
45+
self.verbose = verbose
46+
self.violations = []
47+
48+
def extract_file_version(self, filepath: Path) -> Optional[str]:
49+
"""
50+
Extract the version from a file path.
51+
52+
Args:
53+
filepath: Path to the file
54+
55+
Returns:
56+
Version string (e.g., 'v25.4') or None if no version found
57+
"""
58+
parts = filepath.parts
59+
for part in parts:
60+
if re.match(r'^v\d+\.\d+$', part):
61+
return part
62+
return None
63+
64+
def is_allowed_pattern(self, text: str) -> bool:
65+
"""
66+
Check if the text contains an allowed dynamic version pattern.
67+
68+
Args:
69+
text: The text to check
70+
71+
Returns:
72+
True if the pattern is allowed (uses dynamic variables)
73+
"""
74+
for pattern in self.ALLOWED_PATTERNS:
75+
if re.search(pattern, text):
76+
return True
77+
return False
78+
79+
def generate_fix(self, pattern_type: str, original: str, source_version: str) -> str:
80+
"""
81+
Generate a fix suggestion for the violation.
82+
83+
Args:
84+
pattern_type: Type of the pattern that was matched
85+
original: The original problematic text
86+
source_version: The version the file belongs to
87+
88+
Returns:
89+
Suggested fix for the violation
90+
"""
91+
if pattern_type == 'liquid_link':
92+
return re.sub(r'v\d+\.\d+', '{{ page.version.version }}', original)
93+
elif pattern_type in ['include_cached', 'include']:
94+
# For includes, we need to be careful about the path structure
95+
return re.sub(r'v\d+\.\d+', '{{ page.version.version }}', original)
96+
elif pattern_type == 'image_ref':
97+
# Fix image references to use dynamic version
98+
return re.sub(r'(images/)(v\d+\.\d+)(/)', r'\1{{ page.version.version }}\3', original)
99+
elif pattern_type in ['markdown_relative', 'markdown_absolute']:
100+
# For markdown links, suggest using Jekyll link syntax
101+
link_match = re.search(r'\[([^\]]+)\]\([^\)]+\)', original)
102+
if link_match:
103+
link_text = link_match.group(1)
104+
# Extract the full path after the version token
105+
path_match = re.search(r'v\d+\.\d+/([^\)]+)', original)
106+
if path_match:
107+
page_path = path_match.group(1) # e.g. 'admin/restore.md'
108+
return f'[{link_text}]({{% link {{{{ page.version.version }}}}/{page_path} %}})'
109+
return "Use {% link {{ page.version.version }}/page.md %} syntax"
110+
else:
111+
return "Use appropriate version variable or relative path within same version"
112+
113+
def find_violations(self, filepath: Path, content: str) -> List[Dict]:
114+
"""
115+
Find all cross-version link violations in a file.
116+
117+
Args:
118+
filepath: Path to the file
119+
content: File content
120+
121+
Returns:
122+
List of violation dictionaries
123+
"""
124+
source_version = self.extract_file_version(filepath)
125+
if not source_version:
126+
# File doesn't have a version in its path, skip checking
127+
if self.verbose:
128+
print(f"Skipping {filepath}: No version in path")
129+
return []
130+
131+
violations = []
132+
lines = content.split('\n')
133+
134+
for pattern_name, pattern in self.PATTERNS.items():
135+
for match in re.finditer(pattern, content, re.MULTILINE | re.IGNORECASE):
136+
# Check if this is an allowed pattern (uses dynamic variables)
137+
if self.is_allowed_pattern(match.group(0)):
138+
continue
139+
140+
# Extract target version from the match
141+
target_version = match.group(1)
142+
143+
# Check if it's a cross-version link
144+
if target_version != source_version:
145+
# Calculate line number
146+
line_num = content[:match.start()].count('\n') + 1
147+
148+
# Get the actual line content for context
149+
line_content = lines[line_num - 1].strip() if line_num <= len(lines) else ""
150+
151+
# Generate fix suggestion
152+
fix = self.generate_fix(pattern_name, match.group(0), source_version)
153+
154+
violations.append({
155+
'file': str(filepath),
156+
'line': line_num,
157+
'source_version': source_version,
158+
'target_version': target_version,
159+
'link': match.group(0),
160+
'line_content': line_content,
161+
'fix': fix,
162+
'type': pattern_name
163+
})
164+
165+
return violations
166+
167+
def check_file(self, filepath: str) -> List[Dict]:
168+
"""
169+
Check a single file for cross-version link violations.
170+
171+
Args:
172+
filepath: Path to the file to check
173+
174+
Returns:
175+
List of violations found
176+
"""
177+
path = Path(filepath)
178+
179+
# Skip non-markdown files
180+
if not path.suffix in ['.md', '.markdown']:
181+
if self.verbose:
182+
print(f"Skipping non-markdown file: {filepath}")
183+
return []
184+
185+
# Skip if file doesn't exist
186+
if not path.exists():
187+
if self.verbose:
188+
print(f"File not found: {filepath}")
189+
return []
190+
191+
try:
192+
with open(path, 'r', encoding='utf-8') as f:
193+
content = f.read()
194+
except Exception as e:
195+
print(f"Error reading {filepath}: {e}", file=sys.stderr)
196+
return []
197+
198+
violations = self.find_violations(path, content)
199+
self.violations.extend(violations)
200+
return violations
201+
202+
def format_violations_for_github(self) -> str:
203+
"""
204+
Format violations as a GitHub comment.
205+
206+
Returns:
207+
Formatted markdown string for GitHub comment
208+
"""
209+
if not self.violations:
210+
return "✅ **Cross-Version Link Check Passed**\n\nNo cross-version links detected."
211+
212+
# Group violations by file
213+
violations_by_file = {}
214+
for v in self.violations:
215+
file_path = v['file']
216+
if file_path not in violations_by_file:
217+
violations_by_file[file_path] = []
218+
violations_by_file[file_path].append(v)
219+
220+
# Build the comment
221+
lines = [
222+
"❌ **Cross-Version Link Check Failed**",
223+
"",
224+
f"Found {len(self.violations)} cross-version link violation{'s' if len(self.violations) > 1 else ''} that must be fixed:",
225+
""
226+
]
227+
228+
for file_path, file_violations in violations_by_file.items():
229+
lines.append("---")
230+
lines.append("")
231+
lines.append(f"### File: `{file_path}`")
232+
lines.append("")
233+
234+
for v in file_violations:
235+
lines.append(f"**Line {v['line']}**: Link from {v['source_version']} to {v['target_version']} detected")
236+
lines.append(f"```")
237+
lines.append(f"{v['link']}")
238+
lines.append(f"```")
239+
lines.append(f"**Suggested fix:**")
240+
lines.append(f"```")
241+
lines.append(f"{v['fix']}")
242+
lines.append(f"```")
243+
lines.append("")
244+
245+
lines.extend([
246+
"---",
247+
"",
248+
"**Action Required**: Please update all cross-version links to use version variables or ensure links stay within the same version.",
249+
"",
250+
"For more information about proper link formatting, see the [CockroachDB Docs Style Guide](https://github.com/cockroachdb/docs/blob/main/StyleGuide.md#links)."
251+
])
252+
253+
return "\n".join(lines)
254+
255+
def print_violations(self):
256+
"""Print violations to stderr in a human-readable format."""
257+
if not self.violations:
258+
return
259+
260+
print("\n❌ Cross-version link violations found:\n", file=sys.stderr)
261+
262+
for v in self.violations:
263+
print(f"File: {v['file']}", file=sys.stderr)
264+
print(f" Line {v['line']}: Link from {v['source_version']} to {v['target_version']}", file=sys.stderr)
265+
print(f" Problematic link: {v['link']}", file=sys.stderr)
266+
print(f" Fix: {v['fix']}", file=sys.stderr)
267+
print(file=sys.stderr)
268+
269+
270+
def main():
271+
"""Main entry point for the linter."""
272+
if len(sys.argv) < 2:
273+
print("Usage: python cross_version_link_linter.py <file1> [file2] ...", file=sys.stderr)
274+
sys.exit(1)
275+
276+
# Parse command line arguments
277+
verbose = os.environ.get('VERBOSE', '').lower() in ['true', '1', 'yes']
278+
279+
# Get list of files to check
280+
# Files can be passed as separate arguments or as a single space-separated string
281+
files = []
282+
for arg in sys.argv[1:]:
283+
if ' ' in arg:
284+
# Split space-separated list
285+
files.extend(arg.split())
286+
else:
287+
files.append(arg)
288+
289+
# Initialize linter
290+
linter = CrossVersionLinkLinter(verbose=verbose)
291+
292+
# Check each file
293+
for filepath in files:
294+
if verbose:
295+
print(f"Checking {filepath}...")
296+
linter.check_file(filepath)
297+
298+
# Print violations to stderr
299+
linter.print_violations()
300+
301+
# If running in GitHub Actions, write comment to file
302+
if os.environ.get('GITHUB_ACTIONS'):
303+
comment = linter.format_violations_for_github()
304+
comment_file = os.environ.get('GITHUB_STEP_SUMMARY')
305+
if comment_file:
306+
with open(comment_file, 'w') as f:
307+
f.write(comment)
308+
309+
# Also write to a file for the PR comment action
310+
with open('pr-comment.md', 'w') as f:
311+
f.write(comment)
312+
313+
# Exit with appropriate code
314+
if linter.violations:
315+
print(f"\n❌ Found {len(linter.violations)} cross-version link violation(s)", file=sys.stderr)
316+
sys.exit(1)
317+
else:
318+
print("✅ No cross-version link violations found")
319+
sys.exit(0)
320+
321+
322+
if __name__ == '__main__':
323+
main()

0 commit comments

Comments
 (0)