1- # scrapers/cert_manager.py
2-
31from bs4 import BeautifulSoup
42from collections import OrderedDict
3+ from packaging .version import Version
54from utils import (
65 print_error ,
76 fetch_page ,
87 update_compatibility_info ,
98 get_chart_versions ,
109 validate_semver ,
11- expand_kube_versions ,
1210)
1311
1412app_name = "karpenter"
1513compatibility_url = "https://karpenter.sh/preview/upgrading/compatibility/"
1614
1715
18- def parse_page (content ):
16+ def find_compatibility_table (content ):
1917 soup = BeautifulSoup (content , "html.parser" )
20- sections = soup .find_all ("h2" )
21- for section in sections :
22- if section .text == "Compatibility Matrix" :
23- return section .find_next ("table" )
24- return sections
25-
26-
27- def find_target_tables (sections ):
28- target_tables = []
29- table = []
30-
31- for section in sections :
32- for text in section .stripped_strings :
33- lines = [line .strip () for line in text .split ("\n " ) if line .strip ()]
34-
35- # If "KUBERNETES" or "karpenter" is found, start a new table
36- if lines and (lines [0 ] in ["KUBERNETES" , "karpenter" ]):
37- if (
38- table
39- ): # If there's an existing table, add it to target_tables
40- target_tables .append (table )
41- table = lines # Start a new table
42- else :
43- # Add the lines to the current table
44- table .extend (lines )
45-
46- # Add the last table if it exists
47- if table :
48- target_tables .append (table )
49-
50- return target_tables
51-
52-
53- def extract_table_data (target_tables , chart_versions ):
54- if len (target_tables ) < 2 :
55- print_error ("Insufficient data in target tables." )
56- return []
57-
58- k8s_versions = target_tables [0 ][1 :] # Starting from the second element
59- kar_versions = target_tables [1 ][1 :] # Starting from the second element
60-
61- rows = []
62- for k8s_ver , kar_ver in zip (k8s_versions , kar_versions ):
63- expanded_k8s_ver = expand_kube_versions ("1.19" , k8s_ver )
64- kar_ver = kar_ver .split (" " )[1 ].strip ()
65- kar_ver = validate_semver (kar_ver )
66-
67- if kar_ver :
68- ver = str (kar_ver )
69- chart_version = chart_versions .get (ver )
70- if not chart_version :
71- continue
72- version_info = OrderedDict (
73- {
74- "version" : ver ,
75- "kube" : expanded_k8s_ver ,
76- "chart_version" : chart_version ,
77- "images" : [],
78- "requirements" : [],
79- "incompatibilities" : [],
80- }
81- )
82- rows .append (version_info )
18+
19+ header = soup .find (id = "compatibility-matrix" )
20+ if header :
21+ table = header .find_next ("table" )
22+ if table :
23+ return table
24+
25+ for h2 in soup .find_all ("h2" ):
26+ if "Compatibility Matrix" in h2 .get_text (strip = True ):
27+ table = h2 .find_next ("table" )
28+ if table :
29+ return table
30+
31+ return None
32+
33+
34+ def parse_table (table ):
35+ rows = table .find_all ("tr" )
36+ if len (rows ) < 2 :
37+ return {}
38+
39+ kube_headers = [c .get_text (strip = True ).lstrip ("v" ) for c in rows [0 ].find_all (["th" , "td" ])][1 :]
40+ requirements = [c .get_text (strip = True ) for c in rows [1 ].find_all (["th" , "td" ])][1 :]
41+
42+ min_map = {}
43+ for kube , required in zip (kube_headers , requirements ):
44+ normalized = required .replace (">=" , "" ).replace ("v" , "" ).strip ()
45+ semver = validate_semver (normalized )
46+ if semver :
47+ min_map [kube ] = str (semver )
48+
49+ return min_map
50+
51+
52+ def build_rows_from_table (table , chart_versions ):
53+ min_map = parse_table (table )
54+ if not min_map :
55+ return {}
56+
57+ version_sets = {}
58+ versions = sorted ({v for v in min_map .values ()}, key = lambda s : validate_semver (s ))
59+
60+ for ver in versions :
61+ ver_sem = validate_semver (ver )
62+ if not ver_sem :
63+ continue
64+
65+ kube_versions = []
66+ for kube , required in min_map .items ():
67+ req_sem = validate_semver (required )
68+ if req_sem and req_sem <= ver_sem :
69+ kube_versions .append (kube )
70+
71+ if kube_versions :
72+ version_sets .setdefault (ver , set ()).update (kube_versions )
73+
74+ rows = {}
75+ for ver , kube_set in version_sets .items ():
76+ chart_version = chart_versions .get (ver )
77+ kube_sorted = sorted (kube_set , key = lambda k : Version (k ), reverse = True )
78+ ordered_items = [
79+ ("version" , ver ),
80+ ("kube" , kube_sorted ),
81+ ]
82+ if chart_version :
83+ ordered_items .append (("chart_version" , chart_version ))
84+ ordered_items .extend (
85+ [
86+ ("requirements" , []),
87+ ("incompatibilities" , []),
88+ ]
89+ )
90+ rows [ver ] = OrderedDict (ordered_items )
8391
8492 return rows
8593
@@ -90,14 +98,18 @@ def scrape():
9098 if not page_content :
9199 return
92100
93- sections = parse_page (page_content )
94- target_tables = find_target_tables (sections )
95- if target_tables .__len__ () >= 1 :
96- chart_versions = get_chart_versions (app_name )
97- rows = extract_table_data (target_tables , chart_versions )
98- update_compatibility_info (
99- f"../../static/compatibilities/{ app_name } .yaml" , rows
100- )
101- else :
101+ chart_versions = get_chart_versions (app_name )
102+
103+ table = find_compatibility_table (page_content )
104+ if table is None :
102105 print_error ("No compatibility information found." )
106+ return
107+
108+ page_rows = build_rows_from_table (table , chart_versions )
109+ if not page_rows :
110+ print_error ("No compatibility information found." )
111+ return
103112
113+ update_compatibility_info (
114+ f"../../static/compatibilities/{ app_name } .yaml" , list (page_rows .values ())
115+ )
0 commit comments