55import glob
66import subprocess
77from pathlib import Path
8- from typing import Any , Dict , List , Optional , Tuple
8+ from typing import Any , Dict , List , Optional , Tuple , Union
99
1010from sphinx .application import Sphinx
1111from sphinx .environment import BuildEnvironment
@@ -129,6 +129,7 @@ def __init__(self):
129129 self .srcdir : Optional [str ] = None
130130 self .outdir : Optional [str ] = None
131131 self .app : Optional [Sphinx ] = None
132+ self .ignored_pages : set = set ()
132133
133134 def set_master_doc (self , master_doc : str ):
134135 """Set the master document name."""
@@ -144,6 +145,27 @@ def update_page_title(self, docname: str, title: str):
144145 """Update the title for a page."""
145146 self .collector .update_page_title (docname , title )
146147
148+ def mark_page_ignored (self , docname : str ):
149+ """Mark a page as ignored due to llms-txt-ignore metadata."""
150+ self .ignored_pages .add (docname )
151+
152+ def _filter_ignored_pages (
153+ self , page_order : Union [List [str ], List [Tuple [str , str ]]]
154+ ) -> Union [List [str ], List [Tuple [str , str ]]]:
155+ """Filter out ignored pages from page_order."""
156+ filtered_pages = []
157+ for item in page_order :
158+ # Handle both old format (str) and new format (tuple)
159+ if isinstance (item , tuple ):
160+ docname , _ = item
161+ else :
162+ docname = item
163+
164+ if docname not in self .ignored_pages :
165+ filtered_pages .append (item )
166+
167+ return filtered_pages
168+
147169 def set_config (self , config : Dict [str , Any ]):
148170 """Set configuration options."""
149171 self .config = config
@@ -286,6 +308,11 @@ def combine_sources(self, outdir: str, srcdir: str):
286308 should_abort_early = size_policy_action in ["skip" , "note" ]
287309
288310 for docname , _ in page_order :
311+ # Skip pages marked as ignored
312+ if docname in self .ignored_pages :
313+ logger .debug (f"sphinx-llms-txt: Skipping ignored page: { docname } " )
314+ continue
315+
289316 if docname in docname_to_file :
290317 file_path = docname_to_file [docname ]
291318 content , line_count = self ._read_source_file (file_path , docname )
@@ -383,6 +410,13 @@ def combine_sources(self, outdir: str, srcdir: str):
383410 if docname is None :
384411 continue
385412
413+ # Skip pages marked as ignored
414+ if docname in self .ignored_pages :
415+ logger .debug (
416+ f"sphinx-llms-txt: Skipping ignored remaining file: { docname } "
417+ )
418+ continue
419+
386420 # Skip excluded docnames
387421 if exclude_patterns and any (
388422 self .collector ._match_exclude_pattern (docname , pattern )
@@ -468,8 +502,11 @@ def combine_sources(self, outdir: str, srcdir: str):
468502 logger .info (f"sphinx-llms-txt: Skipping { filename } generation" )
469503 # Log summary information if requested
470504 if self .config .get ("llms_txt_file" ):
505+ filtered_page_order = self ._filter_ignored_pages (page_order )
471506 self .writer .write_verbose_info_to_file (
472- page_order , self .collector .page_titles , total_line_count
507+ filtered_page_order ,
508+ self .collector .page_titles ,
509+ total_line_count ,
473510 )
474511 return
475512 elif action == "note" :
@@ -478,8 +515,11 @@ def combine_sources(self, outdir: str, srcdir: str):
478515
479516 # Log summary information if requested
480517 if self .config .get ("llms_txt_file" ):
518+ filtered_page_order = self ._filter_ignored_pages (page_order )
481519 self .writer .write_verbose_info_to_file (
482- page_order , self .collector .page_titles , total_line_count
520+ filtered_page_order ,
521+ self .collector .page_titles ,
522+ total_line_count ,
483523 )
484524 return
485525 elif action == "keep" :
@@ -496,8 +536,9 @@ def combine_sources(self, outdir: str, srcdir: str):
496536
497537 # Log summary information if requested
498538 if success and self .config .get ("llms_txt_file" ):
539+ filtered_page_order = self ._filter_ignored_pages (page_order )
499540 self .writer .write_verbose_info_to_file (
500- page_order , self .collector .page_titles , total_line_count
541+ filtered_page_order , self .collector .page_titles , total_line_count
501542 )
502543
503544 def _read_source_file (self , file_path : Path , docname : str ) -> Tuple [str , int ]:
0 commit comments