wasmerio · SurfyPenguin · Dec 30, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/PDF Highlighter Script/README.md b/PDF Highlighter Script/README.md
@@ -0,0 +1,28 @@
+# PDF Keyword Highlighter
+
+A command-line tool to highlight one or more keywords in a PDF file using PyMuPDF. Supports multiple keywords, optional case-sensitive search, and outputs per-page highlight statistics.
+
+## Requirements
+- Python `>=3.12`
+- PyMuPDF: `pip install pymupdf`
+
+## Usage
+```bash
+usage: Highlight keywords in PDF [-h] -i INPUT [-o OUTPUT] -k KEYS [KEYS ...] [-s]
+
+options:
+  -h, --help            show this help message and exit
+  -i INPUT, --input INPUT
+                        Input PDF
+  -o OUTPUT, --output OUTPUT
+                        Output PDF
+  -k KEYS [KEYS ...], --keys KEYS [KEYS ...]
+                        Keyword(s) to highlight. Sentences are not supported
+  -s, --sensitive       Case-sensitive search
+```
+## Examples
+```bash
+python3 pdf_highlight.py -i input.pdf -k python code script -o highlighted.pdf
+
+python3 pdf_highlight.py -i input.pdf -k Python -s -o output.pdf  # case-sensitive
+```
diff --git a/PDF Highlighter Script/pdf_highlight.py b/PDF Highlighter Script/pdf_highlight.py
@@ -0,0 +1,95 @@
+import argparse
+import pymupdf as fitz
+import string
+
+def parse_args() -> argparse.Namespace:
+    """Parses command-line arguments using `argparse.ArgumentParser()`.
+
+    Returns:
+        `argparse.Namespace`: Parsed arguments as attributes.
+    """    
+    parser = argparse.ArgumentParser("Highlight keywords in PDF")
+
+    # add arguments to be accepted
+    parser.add_argument("-i", "--input", type=str, required=True, help="Input PDF")
+    parser.add_argument("-o", "--output", type=str, default="highlighted.pdf", help="Output PDF")
+    parser.add_argument("-k", "--keys", type=str, required=True, nargs="+" ,help='Keyword(s) to highlight. Sentences are not supported')
+    parser.add_argument("-s", "--sensitive", action="store_true", help='Case-sensitive search')
+
+    return parser.parse_args()
+
+def highlight_pdf(input_file : str, output_file : str, keywords : list[str], case_sensitive=False) -> dict[str, int]:
+    """`Highlghts occurances of `keywords` in the PDF and saves a new file.
+
+    Args:
+        input_file (str): Path of the input PDF.
+        output_file (str): Path for the output PDF(highlighted).
+        keywords (list[str]): List of keywords to highlight
+        case_sensitive (bool, optional): if True, matching is case-sensitive. Defaults to False.
+
+    Returns:
+        dict[str, int]: Page numbers and highlight counts.
+    """    
+    try:
+        doc = fitz.open(input_file)
+    except Exception as e:
+        print(f"Error opening PDF: {e}")
+        return {}
+
+    stats = {}
+
+    if case_sensitive:
+        keyword_set = {key.strip() for key in keywords}
+    else:
+        keyword_set = {key.strip().lower() for key in keywords}
+
+    for page in doc:
+        hits = []
+        page_no = f"Page {page.number + 1}"
+        words = page.get_text("words")
+
+        for word in words:
+            rect = word[:4]
+            match_word = word[4].strip(string.punctuation)
+            if not case_sensitive:
+                match_word = match_word.lower()
+
+            if match_word in keyword_set:
+                hits.append(rect)
+
+        if hits:
+            annotation = page.add_highlight_annot(hits)
+            stats[page_no] = len(hits)
+
+    doc.save(output_file, garbage=4, deflate=True, clean=True)
+    doc.close()
+
+    return stats
+
+def print_stats(stats : dict) -> None:
+    """Prints highlight statistics.
+
+    Args:
+        stats (dict): Page numbers and highlight counts.
+    """    
+    if not stats:
+        print("\nNo matches found.\n")
+        return
+    total = sum(stats.values())
+
+    print("\n" + "-"*28)
+    print("HIGHLIGHT".center(28))
+    print("-"*28)
+
+    for page, count in stats.items():
+        print(f"{page:18} | {count:3d}")
+    print("-"*28)
+    print(f"Total: {total} highlights\n")
+
+if __name__ == "__main__":
+    args = parse_args()
+    print(f"Keywords: {", ".join(args.keys)}")
+    print(f"Case-sensitive: {args.sensitive}")
+
+    stats = highlight_pdf(args.input, args.output, args.keys, args.sensitive)
+    print_stats(stats)
diff --git a/README.md b/README.md
@@ -110,6 +110,7 @@ More information on contributing and the general code of conduct for discussion
 | Password Generator                       | [Password Generator](https://github.com/DhanushNehru/Python-Scripts/tree/main/Password%20Generator)                                                    | Generates a random password.                                                                                                                                      |
 | Password Manager                         | [Password Manager](https://github.com/nem5345/Python-Scripts/tree/main/Password%20Manager)                                                             | Generate and interact with a password manager.                                                                                                                    |
 | Password Strength Checker                | [Password Strength Checker](https://github.com/nem5345/Python-Scripts/tree/main/Password%20Strength%20Checker)                                         | Evaluates how strong a given password is.                                                                                                                         |
+| PDF Highlighter                          | [PDF Highlighter Script](https://github.com/SurfyPenguin/Python-Scripts/tree/main/PDF%20Highlighter%20Script)                                                          | A command-line tool to highlight one or more keywords in a PDF file using PyMuPDF. Supports multiple keywords, and optional case-sensitive search
 | PDF Merger                               | [PDF Merger](https://github.com/DhanushNehru/Python-Scripts/tree/main/PDF%20Merger)                                                                    | Merges multiple PDF files into a single PDF, with options for output location and custom order.                                                                   |
 | PDF to Audio                             | [PDF to Audio](https://github.com/DhanushNehru/Python-Scripts/tree/main/PDF%20to%20Audio)                                                              | Converts PDF to audio.                                                                                                                                            |
 | PDF to Text                              | [PDF to text](https://github.com/DhanushNehru/Python-Scripts/tree/main/PDF%20to%20text)                                                                | Converts PDF to text.                                                                                                                                             |