fulcrumgenomics
diff --git a/‎docs/scripts/strip_doctest_flags.py‎
Lines changed: 25 additions & 0 deletions b/‎docs/scripts/strip_doctest_flags.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎fgpyo/collections/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎fgpyo/collections/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎fgpyo/fasta/builder.py‎
Lines changed: 23 additions & 15 deletions b/‎fgpyo/fasta/builder.py‎
Lines changed: 23 additions & 15 deletions
diff --git a/‎fgpyo/fasta/sequence_dictionary.py‎
Lines changed: 26 additions & 19 deletions b/‎fgpyo/fasta/sequence_dictionary.py‎
Lines changed: 26 additions & 19 deletions
diff --git a/‎fgpyo/fastx/__init__.py‎
Lines changed: 7 additions & 6 deletions b/‎fgpyo/fastx/__init__.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎fgpyo/io/__init__.py‎
Lines changed: 52 additions & 30 deletions b/‎fgpyo/io/__init__.py‎
Lines changed: 52 additions & 30 deletions
@@ -0,0 +1,25 @@
+"""MkDocs hook to strip doctest flags from rendered documentation.
+
+Doctest flags like `# doctest: +SKIP` are useful for controlling doctest execution,
+but they clutter the documentation. This hook removes them from the rendered HTML.
+"""
+
+import re
+from typing import Any
+
+# Pattern to match doctest flags like: # doctest: +SKIP, # doctest: +ELLIPSIS, etc.
+# Also handles multiple flags like: # doctest: +SKIP, +ELLIPSIS
+DOCTEST_FLAG_PATTERN = re.compile(r"\s*#\s*doctest:\s*[+\w,\s]+")
+
+
+def on_page_content(html: str, **kwargs: Any) -> str:
+    """Remove doctest flags from page content.
+
+    Args:
+        html: The rendered HTML content of the page.
+        **kwargs: Additional keyword arguments passed by MkDocs.
+
+    Returns:
+        The HTML content with doctest flags removed.
+    """
+    return DOCTEST_FLAG_PATTERN.sub("", html)
@@ -17,6 +17,7 @@
 True
 >>> is_sorted([1, 2, 4, 3])
 False
+
 ```
 
 ## Examples of a "Peekable" Iterator
@@ -34,7 +35,10 @@
 >>> from fgpyo.collections import PeekableIterator
 >>> piter = PeekableIterator(iter([]))
 >>> piter.peek()
+Traceback (most recent call last):
+    ...
 StopIteration
+
 ```
 
 A peekable iterator will return the next item before consuming it.
@@ -47,6 +51,7 @@
 1
 >>> [j for j in piter]
 [2, 3]
+
 ```
 
 The [`can_peek()`][fgpyo.collections.PeekableIterator.can_peek] function can be used to determine if
@@ -63,7 +68,10 @@
 >>> piter.peek() if piter.can_peek() else -1
 -1
 >>> next(piter)
+Traceback (most recent call last):
+    ...
 StopIteration
+
 ```
 
 [`PeekableIterator`][fgpyo.collections.PeekableIterator]'s constructor supports creation from
 
@@ -9,31 +9,39 @@
 Writing a FASTA with two contigs each with 100 bases:
 
 ```python
-    >>> from fgpyo.fasta.builder import FastaBuilder
-    >>> builder = FastaBuilder()
-    >>> builder.add("chr10").add("AAAAAAAAAA", 10)
-    >>> builder.add("chr11").add("GGGGGGGGGG", 10)
-    >>> builder.to_file(path = pathlib.Path("test.fasta"))
+>>> from pathlib import Path
+>>> from fgpyo.fasta.builder import FastaBuilder
+>>> builder = FastaBuilder()
+>>> builder.add("chr10").add("AAAAAAAAAA", 10)  # doctest: +ELLIPSIS
+<fgpyo.fasta.builder.ContigBuilder object at ...>
+>>> builder = builder.add("chr11").add("GGGGGGGGGG", 10)
+>>> fasta_path = Path(getfixture("tmp_path")) / "test.fasta"
+>>> builder.to_file(path=fasta_path)  # doctest: +SKIP
+
 ```
 
 Writing a FASTA with one contig with 100 A's and 50 T's:
 
 ```python
-    >>> from fgpyo.fasta.builder import FastaBuilder
-    >>> builder = FastaBuilder()
-    >>> builder.add("chr10").add("AAAAAAAAAA", 10).add("TTTTTTTTTT", 5)
-    >>> builder.to_file(path = pathlib.Path("test.fasta"))
+>>> from fgpyo.fasta.builder import FastaBuilder
+>>> builder = FastaBuilder()
+>>> builder.add("chr10").add("AAAAAAAAAA", 10).add("TTTTTTTTTT", 5)  # doctest: +ELLIPSIS
+<fgpyo.fasta.builder.ContigBuilder object at ...>
+>>> builder.to_file(path=fasta_path)  # doctest: +SKIP
+
 ```
 
 Add bases to existing contig:
 
 ```python
-    >>> from fgpyo.fasta.builder import FastaBuilder
-    >>> builder = FastaBuilder()
-    >>> contig_one = builder.add("chr10").add("AAAAAAAAAA", 1)
-    >>> contig_one.add("NNN", 1)
-    >>> contig_one.bases
-    'AAAAAAAAAANNN'
+>>> from fgpyo.fasta.builder import FastaBuilder
+>>> builder = FastaBuilder()
+>>> contig_one = builder.add("chr10").add("AAAAAAAAAA", 1)
+>>> contig_one.add("NNN", 1)  # doctest: +ELLIPSIS
+<fgpyo.fasta.builder.ContigBuilder object at ...>
+>>> contig_one.bases
+'AAAAAAAAAANNN'
+
 ```
 
 """
 
@@ -9,10 +9,9 @@
 >>> import pysam
 >>> from fgpyo.fasta.sequence_dictionary import SequenceDictionary
 >>> sd: SequenceDictionary
->>> with pysam.AlignmentFile("./fgpyo/sam/tests/data/valid.sam") as fh:
-    ...    sd = SequenceDictionary.from_sam(header=fh.header)
-...
->>> print(sd)
+>>> with pysam.AlignmentFile("./tests/fgpyo/sam/data/valid.sam") as fh:
+...     sd = SequenceDictionary.from_sam(fh.header)
+>>> print(sd)  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101
 @SQ	SN:chr2	LN:101
 @SQ	SN:chr3	LN:101
@@ -21,48 +20,53 @@
 @SQ	SN:chr6	LN:101
 @SQ	SN:chr7	LN:404
 @SQ	SN:chr8	LN:202
+
 ```
 
 Query based on index:
 
 ```python
->>> print(sd[3])
+>>> print(sd[3])  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr4	LN:101
+
 ```
 
 Query based on name:
 
 ```python
->>> print(sd["chr6"])
+>>> print(sd["chr6"])  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr6	LN:101
+
 ```
 
 Add, get, and delete attributes:
 
 ```python
+>>> from fgpyo.fasta.sequence_dictionary import Keys
 >>> meta = sd[0]
->>> print(meta)
+>>> print(meta)  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101
 >>> meta[Keys.ASSEMBLY] = "hg38"
->>> print(meta))
+>>> print(meta)  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101	AS:hg38
 >>> meta.get(Keys.ASSEMBLY)
-"hg38"
+'hg38'
 >>> meta.get(Keys.SPECIES) is None
 True
 >>> Keys.MD5 in meta
 False
 >>> del meta[Keys.ASSEMBLY]
->>> print(meta)
+>>> print(meta)  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101
+
 ```
 
 Get a sequence based on one of its aliases
 
 ```python
 >>> meta[Keys.ALIASES] = "foo,bar,car"
 >>> sd = SequenceDictionary(infos=[meta] + sd.infos[1:])
->>> print(sd)
+>>> print(sd)  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101	AN:foo,bar,car
 @SQ	SN:chr2	LN:101
 @SQ	SN:chr3	LN:101
@@ -71,18 +75,19 @@
 @SQ	SN:chr6	LN:101
 @SQ	SN:chr7	LN:404
 @SQ	SN:chr8	LN:202
->>> print(sd["chr1"])
+>>> print(sd["chr1"])  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101	AN:foo,bar,car
->>> print(sd["bar"])
+>>> print(sd["bar"])  # doctest: +NORMALIZE_WHITESPACE
 @SQ	SN:chr1	LN:101	AN:foo,bar,car
+
 ```
 
 Create a `pysam.AlignmentHeader` from a sequence dictionary:
 
 ```python
->>> sd.to_sam_header()
-<pysam.libcalignmentfile.AlignmentHeader object at 0x10e93f5f0>
->>> print(sd.to_sam_header())
+>>> sd.to_sam_header()  # doctest: +ELLIPSIS
+<pysam.libcalignmentfile.AlignmentHeader object at ...>
+>>> print(sd.to_sam_header())  # doctest: +NORMALIZE_WHITESPACE
 @HD	VN:1.5
 @SQ	SN:chr1	LN:101	AN:foo,bar,car
 @SQ	SN:chr2	LN:101
@@ -92,18 +97,19 @@
 @SQ	SN:chr6	LN:101
 @SQ	SN:chr7	LN:404
 @SQ	SN:chr8	LN:202
+
 ```
 
 Create a `pysam.AlignmentHeader` from a sequence dictionary with extra header items:
 
 ```python
 >>> sd.to_sam_header(
 ...     extra_header={"RG": [{"ID": "A", "LB": "a-library"}, {"ID": "B", "LB": "b-library"}]}
-... )
-<pysam.libcalignmentfile.AlignmentHeader object at 0x10e93fe30>
+... )  # doctest: +ELLIPSIS
+<pysam.libcalignmentfile.AlignmentHeader object at ...>
 >>> print(sd.to_sam_header(
 ...     extra_header={"RG": [{"ID": "A", "LB": "a-library"}, {"ID": "B", "LB": "b-library"}]}
-... ))
+... ))  # doctest: +NORMALIZE_WHITESPACE
 @HD	VN:1.5
 @SQ	SN:chr1	LN:101	AN:foo,bar,car
 @SQ	SN:chr2	LN:101
@@ -115,6 +121,7 @@
 @SQ	SN:chr8	LN:202
 @RG	ID:A	LB:a-library
 @RG	ID:B	LB:b-library
+
 ```
 """
 
 
@@ -15,12 +15,13 @@
 the state of all previously iterated records, set the parameter ``persist`` to `True`.
 
 ```python
-   >>> from fgpyo.fastx import FastxZipped
-   >>> with FastxZipped("r1.fq", "r2.fq", persist=False) as zipped:
-   ...    for (r1, r2) in zipped:
-   ...         print(f"{r1.name}: {r1.sequence}, {r2.name}: {r2.sequence}")
-   seq1: AAAA, seq1: CCCC
-   seq2: GGGG, seq2: TTTT
+>>> from fgpyo.fastx import FastxZipped
+>>> with FastxZipped("r1.fq", "r2.fq", persist=False) as zipped:  # doctest: +SKIP
+...    for (r1, r2) in zipped:
+...         print(f"{r1.name}: {r1.sequence}, {r2.name}: {r2.sequence}")
+seq1: AAAA, seq1: CCCC
+seq2: GGGG, seq2: TTTT
+
 ```
 
 """
 
@@ -14,30 +14,48 @@
 ## fgpyo.io Examples:
 
 ```python
+>>> import fgpyo.io as fio
+>>> from fgpyo.io import write_lines, read_lines
+>>> from pathlib import Path
+
+```
+
+Assert that a path exists and is readable:
+
+```python
+>>> tmp_dir = Path(getfixture("tmp_path"))
+>>> path_flat: Path = tmp_dir / "example.txt"
+>>> fio.assert_path_is_readable(path_flat)  # doctest: +ELLIPSIS
+Traceback (most recent call last):
+    ...
+AssertionError: Cannot read non-existent path: ...
+
+```
+
+Write to and read from path:
+
+```python
+>>> path_flat = tmp_dir / "example.txt"
+>>> path_compressed = tmp_dir / "example.txt.gz"
+>>> write_lines(path=path_flat, lines_to_write=["flat file", 10])
+>>> write_lines(path=path_compressed, lines_to_write=["gzip file", 10])
+
+```
+
+Read lines from a path into a generator:
+
+```python
+>>> lines = read_lines(path=path_flat)
+>>> next(lines)
+'flat file'
+>>> next(lines)
+'10'
+>>> lines = read_lines(path=path_compressed)
+>>> next(lines)
+'gzip file'
+>>> next(lines)
+'10'
 
-    >>> import fgpyo.io as fio
-    >>> from pathlib import Path
-    Assert that a path exists and is readable
-    >>> path_flat: Path = Path("example.txt")
-    >>> path_compressed: Path = Path("example.txt.gz")
-    >>> fio.path_is_readable(path_flat)
-    AssertionError: Cannot read non-existent path: example.txt
-    >>> fio.path_is_readable(compressed_file)
-    AssertionError: Cannot read non-existent path: example.txt.gz
-    Write to and read from path
-    >>> write_lines(path = path_flat, lines_to_write=["flat file", 10])
-    >>> write_lines(path = path_compressed, lines_to_write=["gzip file", 10])
-    Read lines from a path into a generator
-    >>> lines = read_lines(path = path_flat)
-    >>> next(lines)
-    "flat file"
-    >>> next(lines)
-    "10"
-    >>> lines = read_lines(path = path_compressed)
-    >>> next(lines)
-    "gzip file"
-    >>> next(lines)
-    "10"
 ```
 
 """
@@ -165,9 +183,10 @@ def to_reader(path: Path, threads: Optional[int] = None) -> TextIOWrapper:
         threads: the number of threads to use when decompressing gzip files
 
     Example:
-        >>> reader = fio.to_reader(path = Path("reader.txt"))
-        >>> reader.readlines()
-        >>> reader.close()
+        >>> import fgpyo.io as fio
+        >>> reader = fio.to_reader(path=Path("reader.txt"))  # doctest: +SKIP
+        >>> reader.readlines()  # doctest: +SKIP
+        >>> reader.close()  # doctest: +SKIP
 
     """
     if path.suffix in COMPRESSED_FILE_EXTENSIONS:
@@ -189,9 +208,10 @@ def to_writer(path: Path, append: bool = False, threads: Optional[int] = None) -
         threads: the number of threads to use when compressing gzip files
 
     Example:
-        >>> writer = fio.to_writer(path = Path("writer.txt"))
-        >>> writer.write(f'{something}\\n')
-        >>> writer.close()
+        >>> import fgpyo.io as fio
+        >>> writer = fio.to_writer(path=Path("writer.txt"))  # doctest: +SKIP
+        >>> writer.write("something\\n")  # doctest: +SKIP
+        >>> writer.close()  # doctest: +SKIP
 
     """
     mode_prefix: str = "a" if append else "w"
@@ -226,7 +246,9 @@ def read_lines(path: Path, strip: bool = False, threads: Optional[int] = None) -
         threads: the number of threads to use when decompressing gzip files
 
     Example:
-        read_back = fio.read_lines(path)
+        >>> import fgpyo.io as fio
+        >>> read_back = fio.read_lines(path)  # doctest: +SKIP
+
     """
     with to_reader(path=path, threads=threads) as reader:
         if strip: