diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
new file mode 100644
index 0000000..4f9e765
--- /dev/null
+++ b/.github/workflows/doc.yml
@@ -0,0 +1,28 @@
+name: documentation
+
+on: [push, pull_request, workflow_dispatch]
+
+permissions:
+ contents: write
+
+jobs:
+ docs:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v3
+ - name: Install dependencies
+ run: |
+ pip install sphinx sphinx_rtd_theme myst_parser
+ pip install .
+ - name: Sphinx build
+ run: |
+ sphinx-build docs _build
+ - name: Deploy to GitHub Pages
+ uses: peaceiris/actions-gh-pages@v3
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
+ with:
+ publish_branch: gh-pages
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: _build/
+ force_orphan: true
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d4bb2cb
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/api/segfast.loader.rst b/docs/api/segfast.loader.rst
new file mode 100644
index 0000000..b0c0e49
--- /dev/null
+++ b/docs/api/segfast.loader.rst
@@ -0,0 +1,5 @@
+======
+Loader
+======
+
+.. automethod:: segfast.loader.open
diff --git a/docs/api/segfast.memmap_loader.rst b/docs/api/segfast.memmap_loader.rst
new file mode 100644
index 0000000..6e33963
--- /dev/null
+++ b/docs/api/segfast.memmap_loader.rst
@@ -0,0 +1,8 @@
+============
+MemmapLoader
+============
+
+.. autoclass:: segfast.memmap_loader.MemmapLoader
+ :members:
+ :undoc-members:
+ :member-order: bysource
diff --git a/docs/api/segfast.rst b/docs/api/segfast.rst
new file mode 100644
index 0000000..7065abf
--- /dev/null
+++ b/docs/api/segfast.rst
@@ -0,0 +1,12 @@
+===
+API
+===
+
+.. toctree::
+ :maxdepth: 5
+
+ segfast.loader
+ segfast.memmap_loader
+ segfast.segyio_loader
+ segfast.trace_header_spec
+ segfast.utils
diff --git a/docs/api/segfast.segyio_loader.rst b/docs/api/segfast.segyio_loader.rst
new file mode 100644
index 0000000..0fcfdc5
--- /dev/null
+++ b/docs/api/segfast.segyio_loader.rst
@@ -0,0 +1,13 @@
+============
+SegyioLoader
+============
+
+.. autoclass:: segfast.segyio_loader.SegyioLoader
+ :members:
+ :undoc-members:
+ :member-order: bysource
+
+.. autoclass:: segfast.segyio_loader.SafeSegyioLoader
+ :members:
+ :undoc-members:
+ :member-order: bysource
diff --git a/docs/api/segfast.trace_header_spec.rst b/docs/api/segfast.trace_header_spec.rst
new file mode 100644
index 0000000..de91ee5
--- /dev/null
+++ b/docs/api/segfast.trace_header_spec.rst
@@ -0,0 +1,8 @@
+===============
+TraceHeaderSpec
+===============
+
+.. autoclass:: segfast.trace_header_spec.TraceHeaderSpec
+ :members:
+ :undoc-members:
+ :member-order: bysource
diff --git a/docs/api/segfast.utils.rst b/docs/api/segfast.utils.rst
new file mode 100644
index 0000000..49f350b
--- /dev/null
+++ b/docs/api/segfast.utils.rst
@@ -0,0 +1,5 @@
+=====
+Utils
+=====
+
+.. autoclass:: segfast.utils.ForPoolExecutor
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..bca8d23
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,57 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+import sys, os
+sys.path.insert(0, os.path.abspath('..'))
+import segfast
+
+master_doc = 'index'
+
+project = 'segfast'
+author = 'Analysis Center'
+copyright = '2024, ' + author
+
+release = segfast.__version__
+version = '.'.join(release.split('.'))
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.coverage',
+ 'sphinx.ext.mathjax',
+ 'sphinx.ext.viewcode',
+ 'sphinx.ext.githubpages',
+ 'sphinx.ext.intersphinx',
+ 'sphinx.ext.napoleon',
+ 'sphinx_rtd_theme',
+]
+
+templates_path = ['_templates']
+exclude_patterns = []
+language = 'en'
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_title = "SegFast"
+html_theme = "sphinx_rtd_theme"
+html_static_path = ['_static']
+html_theme_options = {
+ 'logo_only': False
+}
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/', None),
+ 'numpy': ('https://docs.scipy.org/doc/numpy/', None),
+ 'segyio': ('https://segyio.readthedocs.io/en/latest/', None)
+}
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..82b70f5
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,40 @@
+.. segfast documentation master file, created by
+ sphinx-quickstart on Thu Feb 1 14:09:14 2024.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+segfast documentation
+=====================
+
+**segfast** is a library for interacting with SEG-Y seismic data. Main features are:
+
+* Faster access to read data: both traces headers and values
+* Optional bufferization, where the user can provide a preallocated memory to load the data into
+* Convenient API that relies on :class:`numpy.memmap` for most operations, while providing
+ `segyio `_ as a fallback engine
+
+
+Implementation details
+----------------------
+We rely on **segyio** to infer file-wide parameters.
+
+For headers and traces, we use custom methods of reading binary data.
+
+Main differences to **segyio** C++ implementation:
+ - we read all of the requested headers in one file-wide sweep, speeding up by an order of magnitude
+ compared to the **segyio** sequential read of every requested header.
+ Also, we do that in multiple processes across chunks.
+
+ - a memory map over trace data is used for loading values. Avoiding redundant copies and leveraging
+ :mod:`numpy` superiority allows to speed up reading, especially in case of trace slicing along the samples axis.
+ This is extra relevant in the case of loading horizontal (depth) slices.
+
+
+.. toctree::
+ :maxdepth: 1
+ :titlesonly:
+
+ installation
+ start
+ segy
+ api/segfast
diff --git a/docs/installation.rst b/docs/installation.rst
new file mode 100644
index 0000000..1ecd479
--- /dev/null
+++ b/docs/installation.rst
@@ -0,0 +1,14 @@
+Installation
+============
+
+* With ``pip``/``pip3``:
+
+ .. code-block:: bash
+
+ pip3 install segfast
+
+* Developer version (add ``--depth 1`` if needed)
+
+ .. code-block:: bash
+
+ git clone https://github.com/analysiscenter/segfast.git
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..32bb245
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.https://www.sphinx-doc.org/
+ exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/segy.rst b/docs/segy.rst
new file mode 100644
index 0000000..91c4486
--- /dev/null
+++ b/docs/segy.rst
@@ -0,0 +1,29 @@
+SEG-Y description
+=================
+
+The most complete description can be found in `the official SEG-Y specification `_ but here we give
+a brief intro into SEG-Y format.
+
+The SEG-Y is a binary file divided into several blocks:
+
+- file-wide information block which in most cases takes the first 3600 bytes:
+
+ - **textual header**: the first 3200 bytes are reserved for textual info about the file. Most of the software uses
+ this header to keep acquisition meta, date of creation, author, etc.
+ - **binary header**: 3200–3600 bytes contain file-wide headers, which describe the number of traces, the format used
+ for storing numbers, the number of samples for each trace, acquisition parameters, etc.
+ - (optional) 3600+ bytes can be used to store the **extended textual information**. If there is such a header,
+ then this is indicated by the value in one of the 3200–3600 bytes.
+
+- a sequence of traces, where each trace is a combination of its header and signal data:
+
+ - **trace header** takes the first 240 bytes and describes the meta info about its trace: shot/receiver coordinates,
+ the method of acquisition, current trace length, etc. Analogously to binary file header, each trace also
+ can have extended headers.
+ - **trace data** is usually an array of amplitude values, which can be stored in various numerical types.
+ As the original SEG-Y is quite old (1975), one of those numerical formats is IBM float,
+ which is very different from standard IEEE floats; therefore, special caution is required to
+ correctly decode values from such files.
+
+For the most part, SEG-Y files are written with a constant size of each trace, although the standard itself allows
+for variable-sized traces. We do not work with such files.
diff --git a/docs/start.rst b/docs/start.rst
new file mode 100644
index 0000000..515dcad
--- /dev/null
+++ b/docs/start.rst
@@ -0,0 +1,31 @@
+Quick start
+===========
+
+* Open the file:
+
+ .. code-block:: python
+
+ import segfast
+ segy_file = segfast.open('/path/to/file.sgy')
+
+* Load headers:
+
+ .. code-block:: python
+
+ headers = segy_file.load_headers(['CDP_X', 'CDP_Y', 'INLINE_3D', 'CROSSLINE_3D'])
+
+* Load inline:
+
+ .. code-block:: python
+
+ traces_idx = headers[headers['INLINE_3D'] == INLINE_IDX].index
+ inline = segy_file.load_traces(traces_idx)
+
+* Load certain depths from all traces:
+
+ .. code-block:: python
+
+ segy_file.load_depth_slices(DEPTHS)
+
+ The resulting array will have shape ``(n_traces, len(DEPTHS))`` so it must be processed to be transformed
+ to an array of the field shape.
diff --git a/pylintrc b/pylintrc
index 8ff5727..f6505c0 100644
--- a/pylintrc
+++ b/pylintrc
@@ -15,7 +15,7 @@ variable-rgx=(.*[a-z][a-z0-9_]{1,30}|[a-z_])$ # snake_case + single letters
argument-rgx=(.*[a-z][a-z0-9_]{1,30}|[a-z_])$ # snake_case + single letters
[MESSAGE CONTROL]
-disable=no-value-for-parameter, no-self-use, too-few-public-methods, unsubscriptable-object, no-member, too-many-lines,
+disable=no-value-for-parameter, too-few-public-methods, unsubscriptable-object, no-member, too-many-lines,
arguments-differ, too-many-locals, import-error, cyclic-import, duplicate-code, relative-beyond-top-level,
unused-argument, too-many-public-methods, invalid-name, attribute-defined-outside-init, arguments-renamed,
abstract-method, no-name-in-module, import-self
diff --git a/segfast/loader.py b/segfast/loader.py
index a0086d8..f84b29f 100644
--- a/segfast/loader.py
+++ b/segfast/loader.py
@@ -5,7 +5,26 @@
def Loader(path, engine='memmap', endian='big', strict=False, ignore_geometry=True):
- """ Selector class for loading SEG-Y with either segyio-based loader or memmap-based one. """
+ """ Selector class for loading SEG-Y with either segyio-based loader or memmap-based one.
+
+ Parameters
+ ----------
+ path : str
+ Path to the SEG-Y file
+ engine : 'memmap' or 'segyio'
+ Engine to load data from file: ``'memmap'`` is based on :class:`numpy.memmap` created for the whole file and
+ ``'segyio'`` is for using **segyio** library instruments. in any case, **segyio** is used to load information
+ about the entire file (e.g. ``'sample_interval'`` or ``'shape'``).
+ endian : 'big' or 'little'
+ Byte order in the file.
+ strict : bool
+ See :func:`segyio.open`
+ ignore_geometry : bool
+ See :func:`segyio.open`
+ Return
+ ------
+ :class:`~.memmap_loader.MemmapLoader` or :class:`~.segyio_loader.SegyioLoader`
+ """
loader_class = _select_loader_class(engine)
return loader_class(path=path, endian=endian, strict=strict, ignore_geometry=ignore_geometry)
open = File = Loader
diff --git a/segfast/memmap_loader.py b/segfast/memmap_loader.py
index 3dd514b..91b4568 100755
--- a/segfast/memmap_loader.py
+++ b/segfast/memmap_loader.py
@@ -18,44 +18,45 @@
class MemmapLoader(SegyioLoader):
""" Custom reader/writer for SEG-Y files.
- Relies on memory mapping mechanism for actual reads of headers and traces.
+ Relies on a memory mapping mechanism for actual reads of headers and traces.
SEG-Y description
-----------------
- Here we give a brief intro into SEG-Y format. Each SEG-Y file consists of:
- - file-wide information, in most cases the first 3600 bytes.
- - the first 3200 bytes are reserved for textual info about the file.
- Most software uses this to keep track of processing operations, date of creation, author, etc.
- - 3200-3600 bytes contain file-wide headers, which describe the number of traces,
- used format, depth, acquisition parameters, etc.
- - 3600+ bytes can be used to store the extended textual information, which is optional and indicated by
- one of the values in 3200-3600 bytes.
-
- - a sequence of traces, where each trace is a combination of header and its actual data.
- - header is the first 240 bytes and it describes the meta info about that trace:
- its coordinates in different types, the method of acquisition, etc.
- - data is an array of values, usually amplitudes, which can be stored in multiple numerical types.
- As the original SEG-Y is quite old (1975), one of those numerical formats is IBM float,
- which is very different from standard IEEE floats; therefore, a special caution is required to
- correctly decode values from such files.
-
- For the most part, SEG-Y files are written with constant size of each trace, although the standard itself allows
- for variable-sized traces. We do not work with such files.
+ The SEG-Y is a binary file divided into several blocks:
+
+ - file-wide information block which in most cases takes the first 3600 bytes:
+
+ - **textual header**: the first 3200 bytes are reserved for textual info about the file. Most of the
+ software uses this header to keep acquisition meta, date of creation, author, etc.
+ - **binary header**: 3200–3600 bytes contain file-wide headers, which describe the number of traces,
+ a format used for storing numbers, the number of samples for each trace, acquisition parameters, etc.
+ - (optional) 3600+ bytes can be used to store the **extended textual information**. If there is
+ such a header, then this is indicated by the value in one of the 3200–3600 bytes.
+
+ - a sequence of traces, where each trace is a combination of its header and signal data:
+ - **trace header** takes the first 240 bytes and describes the meta info about its trace: shot/receiver
+ coordinates, the method of acquisition, current trace length, etc. Analogously to binary file header,
+ each trace also can have extended headers.
+ - **trace data** is usually an array of amplitude values, which can be stored in various numerical types.
+ As the original SEG-Y is quite old (1975), one of those numerical formats is IBM float,
+ which is very different from standard IEEE floats; therefore, a special caution is required to
+ correctly decode values from such files.
+
+ For the most part, SEG-Y files are written with a constant size of each trace, although the standard itself allows
+ for variable-sized traces. We do not work with such files.
Implementation details
----------------------
- We rely on `segyio` to infer file-wide parameters.
-
- For headers and traces, we use custom methods of reading binary data.
- Main differences to `segyio C++` implementation:
+ We rely on :mod:`segyio` to infer file-wide parameters. For headers and traces, we use custom methods of reading
+ binary data. Main differences to :mod:`segyio` `C++` implementation:
- we read all of the requested headers in one file-wide sweep, speeding up by an order of magnitude
- compared to the `segyio` sequential read of every requested header.
- Also, we do that in multiple processes across chunks.
+ compared to the :mod:`segyio` sequential read of every requested header.
+ Also, we do that in multiple processes across chunks.
- - a memory map over traces data is used for loading values. Avoiding redundant copies and leveraging
- `numpy` superiority allows to speed up reading, especially in case of trace slicing along the samples axis.
- This is extra relevant in case of loading horizontal (depth) slices.
+ - a memory map over trace data is used for loading values. Avoiding redundant copies and leveraging
+ :mod:`numpy` superiority allows to speed up reading, especially in case of trace slicing along the samples
+ axis. This is extra relevant in the case of loading horizontal (depth) slices.
"""
def __init__(self, path, endian='big', strict=False, ignore_geometry=True):
# Re-use most of the file-wide attributes from the `segyio` loader
@@ -92,9 +93,9 @@ def _construct_data_mmap(self):
def load_headers(self, headers, indices=None, reconstruct_tsf=True, sort_columns=True, return_specs=False,
chunk_size=25_000, max_workers=4, pbar=False, **kwargs):
""" Load requested trace headers from a SEG-Y file for each trace into a dataframe.
- If needed, we reconstruct the `'TRACE_SEQUENCE_FILE'` manually be re-indexing traces.
+ If needed, we reconstruct the ``'TRACE_SEQUENCE_FILE'`` manually be re-indexing traces.
- Under the hood, we create a memory mapping over the SEG-Y file, and view it with a special dtype.
+ Under the hood, we create a memory mapping over the SEG-Y file, and view it with special dtype.
That dtype skips all of the trace data bytes and all of the unrequested headers, leaving only passed `headers`
as non-void dtype.
@@ -104,11 +105,11 @@ def load_headers(self, headers, indices=None, reconstruct_tsf=True, sort_columns
----------
headers : sequence
An array-like where each element can be:
- - str -- header name,
- - int -- header starting byte,
- - :class:~`.utils.TraceHeaderSpec` -- used as is,
- - tuple -- args to init :class:~`.utils.TraceHeaderSpec`,
- - dict -- kwargs to init :class:~`.utils.TraceHeaderSpec`.
+ - ``str`` -- header name,
+ - ``int`` -- header starting byte,
+ - :class:`~.trace_header_spec.TraceHeaderSpec` -- used as is,
+ - ``tuple`` -- args to init :class:`~.trace_header_spec.TraceHeaderSpec`,
+ - ``dict`` -- kwargs to init :class:`~.trace_header_spec.TraceHeaderSpec`.
indices : sequence or None
Indices of traces to load trace headers for. If not given, trace headers are loaded for all traces.
reconstruct_tsf : bool
@@ -119,25 +120,50 @@ def load_headers(self, headers, indices=None, reconstruct_tsf=True, sort_columns
Whether to return header specs used to load trace headers.
chunk_size : int
Maximum amount of traces in each chunk.
- max_workers : int or None
- Maximum number of parallel processes to spawn. If None, then the number of CPU cores is used.
- pbar : bool, str
+ max_workers : int, optional
+ Maximum number of parallel processes to spawn. If ``None``, then the number of CPU cores is used.
+ pbar : bool or str
If bool, then whether to display progress bar over the file sweep.
- If str, then type of progress bar to display: `'t'` for textual, `'n'` for widget.
+ If str, then type of progress bar to display: ``'t'`` for textual, ``'n'`` for widget.
+
+ Return
+ ------
+ ``pd.DataFrame``
Examples
--------
- Standard 'CDP_X' and 'CDP_Y' headers:
- >>> segfast_file.load_headers(['CDP_X', 'CDP_Y'])
- Standard headers from 181 and 185 bytes with standard dtypes:
- >>> segfast_file.load_headers([181, 185])
- Load 'CDP_X' and 'CDP_Y' from non-standard bytes positions corresponding to some standard headers (i.e. load
- 'CDP_X' from bytes for 'INLINE_3D' with '>> segfast_file.load_headers([{'name': 'CDP_X', 'start_byte': 189, 'dtype': '>> segfast_file.load_headers([('CDP_X', 45, '>f4'), ('CDP_Y', 10, '>f4')])
- Load 'FieldRecord' header for the first 5 traces:
- >>> segfast_file.load_headers(['FieldRecord'], indices=np.arange(5))
+ * Standard ``'CDP_X'`` and ``'CDP_Y'`` headers:
+
+ .. code-block:: python
+
+ segfast_file.load_headers(['CDP_X', 'CDP_Y'])
+
+ * Standard headers from 181 and 185 bytes with standard dtypes:
+
+ .. code-block:: python
+
+ segfast_file.load_headers([181, 185])
+
+ * Load ``'CDP_X'`` and ``'CDP_Y'`` from non-standard bytes positions corresponding to some standard headers
+ (i.e. load ``'CDP_X'`` from bytes for ``'INLINE_3D'`` with ``'f4'), ('CDP_Y', 10, '>f4')])
+
+ * Load 'FieldRecord' header for the first 5 traces:
+
+ .. code-block:: python
+
+ segfast_file.load_headers(['FieldRecord'], indices=np.arange(5))
+
"""
_ = kwargs
headers = self.make_headers_specs(headers)
@@ -201,23 +227,28 @@ def callback(future, start):
@staticmethod
def _make_mmap_headers_dtype(headers):
- """ Create list of `numpy` dtypes to view headers data.
+ """ Create a list of :mod:`numpy` dtypes to view headers data.
Defines a dtype for exactly 240 bytes, where each of the requested headers would have its own named subdtype,
- and the rest of bytes are lumped into `np.void` of certain lengths.
+ and the rest of bytes are lumped into :class:`numpy.void` of certain lengths.
- Only the headers data should be viewed under this dtype: the rest of trace data (values)
+ Only the header data should be viewed under this dtype: the rest of trace data (values)
should be processed (or skipped) separately.
- We do not apply final conversion to `np.dtype` to the resulting list of dtypes so it is easier to append to it.
+ We do not apply the final conversion to :class:`numpy.dtype` to the resulting list of dtypes so it is easier
+ to append to it.
Examples
--------
- if `headers` are `INLINE_3D` and `CROSSLINE_3D`, which are 189-192 and 193-196 bytes, the output would be:
- >>> [('unused_0', numpy.void, 188),
- >>> ('INLINE_3D', '>i4'),
- >>> ('CROSSLINE_3D', '>i4'),
- >>> ('unused_1', numpy.void, 44)]
+ If ``headers`` are ``'INLINE_3D'`` and ``'CROSSLINE_3D'``, which are 189-192 and 193-196 bytes, the output
+ would be:
+
+ .. code-block:: python
+
+ [('unused_0', numpy.void, 188),
+ ('INLINE_3D', '>i4'),
+ ('CROSSLINE_3D', '>i4'),
+ ('unused_1', numpy.void, 44)]
"""
headers = sorted(headers, key=lambda x: x.start_byte)
@@ -250,18 +281,22 @@ def _make_mmap_headers_dtype(headers):
# Traces loading
def load_traces(self, indices, limits=None, buffer=None):
""" Load traces by their indices.
- Under the hood, we use a pre-made memory mapping over the file, where trace data is viewed with a special dtype.
+ Under the hood, we use pre-made memory mapping over the file, where trace data is viewed with a special dtype.
Regardless of the numerical dtype of SEG-Y file, we output IEEE float32:
for IBM floats, that requires an additional conversion.
Parameters
----------
indices : sequence
- Indices (TRACE_SEQUENCE_FILE) of the traces to read.
+ Indices (``'TRACE_SEQUENCE_FILE'``) of the traces to read.
limits : sequence of ints, slice, optional
Slice of the data along the depth axis.
- buffer : np.ndarray, optional
+ buffer : numpy.ndarray, optional
Buffer to read the data into. If possible, avoids copies.
+
+ Return
+ ------
+ numpy.ndarray
"""
limits = self.process_limits(limits)
@@ -281,14 +316,18 @@ def load_traces(self, indices, limits=None, buffer=None):
def load_depth_slices(self, indices, buffer=None):
""" Load horizontal (depth) slices of the data.
- Requires a ~full sweep through SEG-Y, therefore is slow.
+ Requires an almost full sweep through SEG-Y, therefore is slow.
Parameters
----------
indices : sequence
Indices (ordinals) of the depth slices to read.
- buffer : np.ndarray, optional
+ buffer : numpy.ndarray, optional
Buffer to read the data into. If possible, avoids copies.
+
+ Return
+ ------
+ numpy.ndarray
"""
depth_slices = self.data_mmap[:, indices]
if self.file_format == 1:
@@ -316,7 +355,7 @@ def __getstate__(self):
return state
def __setstate__(self, state):
- """ Recreate instance from unpickled state, reopen source SEG-Y file and memmap. """
+ """ Recreate instance from the unpickled state, reopen source SEG-Y file and memmap. """
super().__setstate__(state)
self.data_mmap = self._construct_data_mmap()
@@ -324,7 +363,7 @@ def __setstate__(self, state):
# Conversion to other SEG-Y formats (data dtype)
def convert(self, path=None, format=8, transform=None, chunk_size=25_000, max_workers=4,
pbar='t', overwrite=True):
- """ Convert SEG-Y file to a different `format`: dtype of data values.
+ """ Convert SEG-Y file to a different ``format``: dtype of data values.
Keeps the same binary header (except for the 3225 byte, which stores the format).
Keeps the same header values for each trace: essentially, only the values of each trace are transformed.
@@ -334,22 +373,26 @@ def convert(self, path=None, format=8, transform=None, chunk_size=25_000, max_wo
Parameters
----------
path : str, optional
- Path to save file to. If not provided, we use the path of the current cube with an added postfix.
+ Path to the save file to. If not provided, we use the path of the current cube with an added postfix.
format : int
Target SEG-Y format.
- Refer to :attr:`SEGY_FORMAT_TO_TRACE_DATA_DTYPE` for list of available formats and their data value dtype.
+ Refer to :attr:`.SEGY_FORMAT_TO_TRACE_DATA_DTYPE` for list of available formats and their data value dtype.
transform : callable, optional
- Callable to transform data from the current file to the ones, saved in `path`.
- Must return the same dtype, as specified by `format`.
+ Callable to transform data from the current file to the ones, saved in ``path``.
+ Must return the same dtype, as specified by ``format``.
chunk_size : int
Maximum amount of traces in each chunk.
max_workers : int or None
Maximum number of parallel processes to spawn. If None, then the number of CPU cores is used.
pbar : bool, str
- If bool, then whether to display progress bar.
- If str, then type of progress bar to display: `'t'` for textual, `'n'` for widget.
+ If bool, then whether to display a progress bar.
+ If str, then the type of progress bar to display: ``'t'`` for textual, ``'n'`` for widget.
overwrite : bool
- Whether to overwrite existing `path` or raise an exception.
+ Whether to overwrite the existing ``path`` or raise an exception.
+
+ Return
+ ------
+ path : str
"""
#pylint: disable=redefined-builtin
# Default path
@@ -433,7 +476,7 @@ def read_chunk(path, shape, offset, mmap_dtype, buffer_dtype, headers, indices):
def convert_chunk(src_path, dst_path, shape, offset, src_dtype, dst_dtype, endian, transform, start, chunk_size):
- """ Copy the headers, transform and write data from one chunk.
+ """ Copy the headers, transform, and write data from one chunk.
We create all memory mappings anew in each worker, as it is easier and creates no significant overhead.
"""
# Deserialize `transform`
@@ -465,8 +508,8 @@ def convert_chunk(src_path, dst_path, shape, offset, src_dtype, dst_dtype, endia
@njit(nogil=True, parallel=True)
def ibm_to_ieee(hh, hl, lh, ll):
""" Convert 4 arrays representing individual bytes of IBM 4-byte floats into a single array of floats.
- Input arrays are ordered from most to least significant bytes and have `np.uint8` dtypes.
- The result is returned as an `np.float32` array.
+ Input arrays are ordered from most to least significant bytes and have ``numpy.uint8`` dtypes.
+ The result is returned as an ``numpy.float32`` array.
"""
# pylint: disable=not-an-iterable
res = np.empty_like(hh, dtype=np.float32)
diff --git a/segfast/segyio_loader.py b/segfast/segyio_loader.py
index dfc20a5..b28e50b 100755
--- a/segfast/segyio_loader.py
+++ b/segfast/segyio_loader.py
@@ -12,12 +12,15 @@
class SegyioLoader:
- """ A thin wrapper around `segyio` library for convenient loading of headers and traces.
+ """ A thin wrapper around **segyio** library for convenient loading of headers and traces.
+
+ Most of the methods directly call the public API of **segyio**.
+
+ For trace loading, we use private methods and attributes of :class:`segyio.SegyFile`, which allow:
+
+ * reading data into the pre-defined buffer
+ * read only parts of the trace
- Most of the methods directly call public API of `segyio`.
- For trace loading we use private methods and attributes of `segyio.SegyFile`, which allow:
- - reading data into pre-defined buffer
- - read only parts of the trace.
This gives up to 50% speed-up over public API for the scenario of loading sequence of traces,
and up to 15% over public API in case of loading full lines (inlines or crosslines).
"""
@@ -33,7 +36,7 @@ class SegyioLoader:
11: "u2",
12: "u8",
16: "u1",
- }
+ } #: :meta private:
ENDIANNESS_TO_SYMBOL = {
"big": ">",
@@ -41,7 +44,7 @@ class SegyioLoader:
"little": "<",
"lsb": "<",
- }
+ } #: :meta private:
def __init__(self, path, endian='big', strict=False, ignore_geometry=True):
# Parse arguments for errors
@@ -93,22 +96,23 @@ def delay(self):
def load_headers(self, headers, indices=None, reconstruct_tsf=True, sort_columns=True, return_specs=False,
tracewise=True, pbar=False, **kwargs):
""" Load requested trace headers from a SEG-Y file for each trace into a dataframe.
- If needed, we reconstruct the `'TRACE_SEQUENCE_FILE'` manually be re-indexing traces.
+ If needed, we reconstruct the ``'TRACE_SEQUENCE_FILE'`` manually be re-indexing traces.
Parameters
----------
headers : sequence
An array-like where each element can be:
- - str -- header name,
- - int -- header starting byte,
- - :class:~`.utils.TraceHeaderSpec` -- used as is,
- - tuple -- args to init :class:~`.utils.TraceHeaderSpec`,
- - dict -- kwargs to init :class:~`.utils.TraceHeaderSpec`.
+ - ``str`` -- header name,
+ - ``int`` -- header starting byte,
+ - :class:`~.trace_header_spec.TraceHeaderSpec` -- used as is,
+ - ``tuple`` -- args to init :class:`~.trace_header_spec.TraceHeaderSpec`,
+ - ``dict`` -- kwargs to init :class:`~.trace_header_spec.TraceHeaderSpec`.
+
Note that for :class:`.SegyioLoader` all nonstandard headers byte positions and dtypes will be ignored.
indices : sequence or None
Indices of traces to load trace headers for. If not given, trace headers are loaded for all traces.
reconstruct_tsf : bool
- Whether to reconstruct `TRACE_SEQUENCE_FILE` manually.
+ Whether to reconstruct ``TRACE_SEQUENCE_FILE`` manually.
sort_columns : bool
Whether to sort columns in the resulting dataframe by their starting bytes.
return_specs : bool
@@ -116,8 +120,12 @@ def load_headers(self, headers, indices=None, reconstruct_tsf=True, sort_columns
tracewise : bool
Whether to iterate over the file in a trace-wise manner, instead of header-wise.
pbar : bool, str
- If bool, then whether to display progress bar over the file sweep.
- If str, then type of progress bar to display: `'t'` for textual, `'n'` for widget.
+ If ``bool``, then whether to display the progress bar over the file sweep.
+ If ``str``, then type of progress bar to display: ``'t'`` for textual, ``'n'`` for widget.
+
+ Return
+ ------
+ ``pandas.DataFrame``
"""
_ = kwargs
headers = self.make_headers_specs(headers)
@@ -157,7 +165,10 @@ def load_header(self, header, indices=None, **kwargs):
@staticmethod
def postprocess_headers_dataframe(dataframe, headers, indices=None, reconstruct_tsf=True, sort_columns=True):
- """ Optionally add TSF header and sort columns of a headers dataframe. """
+ """ Optionally add ``'TRACE_SEQUENCE_FILE'`` header and sort columns of a headers dataframe.
+
+ :meta private:
+ """
if reconstruct_tsf:
if indices is None:
dtype = np.int32 if len(dataframe) < np.iinfo(np.int32).max else np.int64
@@ -174,7 +185,7 @@ def postprocess_headers_dataframe(dataframe, headers, indices=None, reconstruct_
return dataframe, headers
def make_headers_specs(self, headers):
- """ Make instances of TraceHeaderSpec. """
+ """ Transform headers list to list of :class:`~.trace_header_spec.TraceHeaderSpec` instances. """
byteorder = self.ENDIANNESS_TO_SYMBOL[self.endian]
if headers == 'all':
@@ -207,10 +218,10 @@ def load_traces(self, indices, limits=None, buffer=None):
Parameters
----------
indices : sequence
- Indices (TRACE_SEQUENCE_FILE) of the traces to read.
+ Indices (``TRACE_SEQUENCE_FILE``) of the traces to read.
limits : sequence of ints, slice, optional
Slice of the data along the depth axis.
- buffer : np.ndarray, optional
+ buffer : numpy.ndarray, optional
Buffer to read the data into. If possible, avoids copies.
"""
limits = self.process_limits(limits)
@@ -224,7 +235,7 @@ def load_traces(self, indices, limits=None, buffer=None):
return buffer
def process_limits(self, limits):
- """ Convert given `limits` to a `slice`. """
+ """ Convert given ``limits`` to a ``slice`` instance. """
if limits is None:
return slice(0, self.n_samples, 1)
if isinstance(limits, int):
@@ -241,7 +252,7 @@ def process_limits(self, limits):
return slice(*indices)
def load_trace(self, index, buffer, limits):
- """ Load one trace into buffer. """
+ """ Load one trace into the buffer. """
self.file_handler.xfd.gettr(buffer, index, 1, 1,
limits.start, limits.stop, limits.step,
buffer.size)
@@ -250,14 +261,18 @@ def load_trace(self, index, buffer, limits):
# Data loading: depth slices
def load_depth_slices(self, indices, buffer=None):
""" Load horizontal (depth) slices of the data.
- Requires a ~full sweep through SEG-Y, therefore is slow.
+ Requires an almost full sweep through SEG-Y, therefore is slow.
Parameters
----------
indices : sequence
Indices (ordinals) of the depth slices to read.
- buffer : np.ndarray, optional
+ buffer : numpy.ndarray, optional
Buffer to read the data into. If possible, avoids copies.
+
+ Return
+ ------
+ numpy.ndarray
"""
if buffer is None:
buffer = np.empty((len(indices), self.n_traces), dtype=self.dtype)
@@ -273,13 +288,13 @@ def load_depth_slice(self, index, buffer):
# Convenience and utility methods
def make_chunk_iterator(self, chunk_size=None, n_chunks=None, limits=None, buffer=None):
- """ Create on iterator over the entire file traces in chunks.
+ """ Create an iterator over the entire file traces in chunks.
- Each chunk contains no more than `chunk_size` traces.
- If `chunk_size` is not provided and `n_chunks` is given instead, there are no more than `n_chunks` chunks.
- One and only one of `chunk_size` and `n_chunks` should be provided.
+ Each chunk contains no more than ``chunk_size`` traces.
+ If ``chunk_size`` is not provided and ``n_chunks`` is given instead, there are no more than ``n_chunks`` chunks.
+ One and only one of ``chunk_size`` and ``n_chunks`` should be provided.
- Each element in the iterator is a dictionary with `'data'`, `'start'` and `'end'` keys.
+ Each element in the iterator is a dictionary with ``'data'``, ``'start'`` and ``'end'`` keys.
Parameters
----------
@@ -288,19 +303,20 @@ def make_chunk_iterator(self, chunk_size=None, n_chunks=None, limits=None, buffe
n_chunks : int, optional
Maximum number of chunks.
limits : sequence of ints, slice, optional
- Slice of the data along the depth (last) axis. Passed directly to :meth:`load_traces`.
- buffer : np.ndarray, optional
- Buffer to read the data into. If possible, avoids copies. Passed directly to :meth:`load_traces`.
+ Slice of the data along the depth (last) axis. Passed directly to :meth:`.load_traces`.
+ buffer : numpy.ndarray, optional
+ Buffer to read the data into. If possible, avoids copies. Passed directly to :meth:`.load_traces`.
- Returns
- -------
+ Return
+ ------
iterator, info : tuple with two elements
iterator : iterable
An iterator over the entire SEG-Y traces.
- Each element in the iterator is a dictionary with `'data'`, `'start'` and `'end'` keys.
+ Each element in the iterator is a dictionary with ``'data'``, ``'start'`` and ``'end'`` keys.
info : dict
- Description of the iterator with `'chunk_size'`, `'n_chunks'`, `'chunk_starts'` and `'chunk_ends'` keys.
+ Description of the iterator with ``'chunk_size'``, ``'n_chunks'``, ``'chunk_starts'`` and ``'chunk_ends'``
+ keys.
"""
# Parse input parameters
if chunk_size is None and n_chunks is None:
@@ -333,7 +349,7 @@ def make_chunk_iterator(self, chunk_size=None, n_chunks=None, limits=None, buffe
return iterator, info
def chunk_iterator(self, chunk_size=None, n_chunks=None, limits=None, buffer=None):
- """ A shorthand for :meth:`make_chunk_iterator` with no info returned. """
+ """ A shorthand for :meth:`.make_chunk_iterator` with no info returned. """
return self.make_chunk_iterator(chunk_size=chunk_size, n_chunks=n_chunks,
limits=limits, buffer=buffer)[0]
@@ -346,7 +362,7 @@ def __exit__(self, _, __, ___):
self.file_handler.close()
def __getstate__(self):
- """ Create pickling state from `__dict__` by setting SEG-Y file handler to `None`. """
+ """ Create a pickling state from ``__dict__`` by setting SEG-Y file handler to ``None``. """
state = copy(self.__dict__)
state["file_handler"] = None
return state
@@ -360,9 +376,9 @@ def __setstate__(self, state):
class SafeSegyioLoader(SegyioLoader):
- """ A thin wrapper around `segyio` library for convenient loading of headers and traces.
+ """ A thin wrapper around **segyio** library for convenient loading of headers and traces.
- Unlike :class:`SegyioLoader`, uses only public APIs to load traces.
+ Unlike :class:`.SegyioLoader`, uses only public APIs to load traces.
Used mainly for performance measurements.
"""
diff --git a/segfast/trace_header_spec.py b/segfast/trace_header_spec.py
index d36adf8..f0e5abc 100644
--- a/segfast/trace_header_spec.py
+++ b/segfast/trace_header_spec.py
@@ -4,29 +4,46 @@
import segyio
class TraceHeaderSpec:
- """ Trace header class to store its name and byte position. By default, byte position is defined by name
- accordingly to SEG-Y specification.
+ """ Trace header class to store its name, byte position and dtype (including endianness). By default, byte position
+ is defined by name according to SEG-Y specification.
Parameters
----------
name : str
Name of the header.
start_byte : int, optional
- Byte position of the header, by default None. If None, default byte position from the spec will be used.
+ Byte position of the header, by default ``None``. If ``None``, default byte position from the spec will be used.
dtype : int, str or dtype, optional
- dtype for header (e.g. 'i2', '>f4', `np.float32`) or its length in bytes (then is interpreted as integer type).
+ dtype for header (e.g. ``'i2'``, ``'>f4'``, ``numpy.float32``) or its length in bytes (then is interpreted
+ as integer type).
byteorder : '>' or '<', optional
- Endianness to use, if it's not defined by dtype. If None and dtype doesn't specify, architecture default
- will be used.
+ Endianness to use, if it's not defined by ``dtype``. If ``None`` and dtype doesn't specify it, architecture
+ default will be used.
"""
TRACE_HEADER_SIZE = 240
STANDARD_HEADER_TO_BYTE = segyio.tracefield.keys
+ """ Mapping from standard header name to its start byte.
+
+ :meta hide-value:
+ """
STANDARD_BYTE_TO_HEADER = {v: k for k, v in STANDARD_HEADER_TO_BYTE.items()}
+ """ Mapping from start byte to header name accordingly to standard.
+
+ :meta hide-value:
+ """
START_BYTES = sorted(STANDARD_HEADER_TO_BYTE.values())
+ """ List bytes positions for standard headers
+
+ :meta hide-value:
+ """
STANDARD_BYTE_TO_LEN = {start: end - start
for start, end in zip(START_BYTES, START_BYTES[1:] + [TRACE_HEADER_SIZE + 1])}
+ """ Mapping from start byte to length of header in bytes accordingly to standard.
+
+ :meta hide-value:
+ """
def __init__(self, name=None, start_byte=None, dtype=None, byteorder=None):
self.name = name or self.STANDARD_BYTE_TO_HEADER[start_byte]
@@ -64,7 +81,7 @@ def has_standard_location(self):
@property
def standard_name(self):
- """ The name from specification for header (if 'has_standard_location' is True). """
+ """ The name from the specification for the header (if ``has_standard_location`` is ``True``). """
if not self.has_standard_location:
raise ValueError("The header has non-standard start byte or dtype")
return self.STANDARD_BYTE_TO_HEADER[self.start_byte]
@@ -100,7 +117,7 @@ def __hash__(self):
return hash(self._spec_params)
def set_default_byteorder(self, byteorder):
- """ Set byteorder to use as default (if not specified by dtype). """
+ """ Set byteorder to use as a default, if not specified by ``dtype``. """
dtype = self.dtype.str
if not self.has_explicit_byteorder:
dtype = dtype[1:]
diff --git a/segfast/utils.py b/segfast/utils.py
index 3ebc8f3..f7699f8 100644
--- a/segfast/utils.py
+++ b/segfast/utils.py
@@ -49,8 +49,9 @@ def update(self, n=1):
class ForPoolExecutor(Executor):
""" A sequential executor of tasks in a for loop.
- Inherits `Executor` interface, so can serve as a drop-in replacement for either
- `ThreadPoolExecutor` or `ProcessPoolExecutor` when threads or processes spawning is undesirable.
+ Inherits :class:`concurrent.futures.Executor` interface, so can serve as a drop-in replacement for either
+ :class:`concurrent.futures.ThreadPoolExecutor` or :class:`concurrent.futures.ProcessPoolExecutor` when threads or
+ processes spawning is undesirable.
"""
def __init__(self, *args, **kwargs):