From d021e371305c7f8d587e74fbd72adfdaa5a363b3 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 28 Feb 2024 08:52:40 +0100 Subject: [PATCH 1/6] deps: Upgrade pre-commit hooks --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7cbcc401..7321dc52 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: types_or: [ python, pyi ] args: [--ignore-missing-imports, --scripts-are-modules] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.2.2 hooks: - id: ruff args: [ --fix, --exit-non-zero-on-fix ] @@ -29,11 +29,11 @@ repos: args: [-c, pyproject.toml] additional_dependencies: ["bandit[toml]"] - repo: https://github.com/jsh9/pydoclint - rev: 0.3.9 + rev: 0.4.1 hooks: - id: pydoclint - repo: https://github.com/jazzband/pip-tools - rev: 7.3.0 + rev: 7.4.0 hooks: - id: pip-compile name: pip-compile requirements-dev.txt From ce6ce759b2152e47d28eac14c7841d8e7f2b0d55 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 28 Feb 2024 11:53:04 +0100 Subject: [PATCH 2/6] chore: Use Python 3.11 by default for pre-commit hooks This prevents problems with pip-compile on systems where Python 3.12 is the default system interpreter. --- .pre-commit-config.yaml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7321dc52..5e9347c9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,5 @@ +default_language_version: + python: python3.11 repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 @@ -13,21 +15,21 @@ repos: rev: v1.8.0 hooks: # See https://github.com/pre-commit/mirrors-mypy/blob/main/.pre-commit-hooks.yaml - - id: mypy - types_or: [ python, pyi ] - args: [--ignore-missing-imports, --scripts-are-modules] + - id: mypy + types_or: [python, pyi] + args: [--ignore-missing-imports, --scripts-are-modules] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.2.2 hooks: - id: ruff - args: [ --fix, --exit-non-zero-on-fix ] + args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/PyCQA/bandit rev: 1.7.7 hooks: - - id: bandit - args: [-c, pyproject.toml] - additional_dependencies: ["bandit[toml]"] + - id: bandit + args: [-c, pyproject.toml] + additional_dependencies: ["bandit[toml]"] - repo: https://github.com/jsh9/pydoclint rev: 0.4.1 hooks: From d9ee37e26bb9c77218c075028c35638f8bb36101 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 28 Feb 2024 12:02:56 +0100 Subject: [PATCH 3/6] docs: Fix Polars integration code highlighting --- docs/_code/polars_example.py | 2 +- docs/guides/integrations.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/_code/polars_example.py b/docs/_code/polars_example.py index f585d12b..b90391ce 100644 --- a/docs/_code/polars_example.py +++ b/docs/_code/polars_example.py @@ -9,6 +9,6 @@ us_lakes = lakes.filter(pl.col("Country") == "United States of America") with fs.open(f"lakefs://quickstart/{tx.branch.id}/us_lakes.csv", "wb") as f: - us_lakes.write_csv(f) + us_lakes.write_csv(f) # (1)! tx.commit(message="Add US lakes") diff --git a/docs/guides/integrations.md b/docs/guides/integrations.md index ac9a1d2a..f36a84ab 100644 --- a/docs/guides/integrations.md +++ b/docs/guides/integrations.md @@ -48,8 +48,7 @@ The Python API wrapper for the Rust-based [Polars](https://pola-rs.github.io/pol Again, the following code example demonstrates how to read a Parquet file and save a modified version back in CSV format to a lakeFS repository from Polars in the context of a [transaction](transactions.md): - -```python hl_lines="10 13-14" +```python hl_lines="8 11-12" --8<-- "docs/_code/polars_example.py" ``` From 5705526f6f9411ef527bc339374e057cce8832ae Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 28 Feb 2024 12:03:40 +0100 Subject: [PATCH 4/6] docs: Fix Pandas integration code highlighting --- docs/guides/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/integrations.md b/docs/guides/integrations.md index f36a84ab..3de104b0 100644 --- a/docs/guides/integrations.md +++ b/docs/guides/integrations.md @@ -21,7 +21,7 @@ See the Pandas documentation on [reading/writing remote files](https://pandas.py The following code snippet illustrates how to read and write Pandas data frames in various formats from/to a lakeFS repository in the context of a [transaction](transactions.md): -```python hl_lines="10 12" +```python hl_lines="8 10" --8<-- "docs/_code/pandas_example.py" ``` From f8e281c51f413b9f39001adb1d173b52251a5885 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 28 Feb 2024 12:04:22 +0100 Subject: [PATCH 5/6] docs: Fix DuckDB integration code highlighting --- docs/guides/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/integrations.md b/docs/guides/integrations.md index 3de104b0..e950dcc5 100644 --- a/docs/guides/integrations.md +++ b/docs/guides/integrations.md @@ -32,7 +32,7 @@ This allows DuckDB to transparently query and store data located in lakeFS repos Similar to the example above, the following code snippet illustrates how to read and write data from/to a lakeFS repository in the context of a [transaction](transactions.md) through the [DuckDB Python API](https://duckdb.org/docs/api/python/overview.html){: target="_blank" rel="noopener"}: -```python hl_lines="6 11 13" +```python hl_lines="6 9 11" --8<-- "docs/_code/duckdb_example.py" ``` From 4d07d8a50bcee3e4af35a2300a8139eb49b8e841 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 28 Feb 2024 12:04:58 +0100 Subject: [PATCH 6/6] docs: Fix PyArrow integration code highlighting --- docs/guides/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/integrations.md b/docs/guides/integrations.md index e950dcc5..3f97dda4 100644 --- a/docs/guides/integrations.md +++ b/docs/guides/integrations.md @@ -62,6 +62,6 @@ PyArrow `read_*` and `write_*` functions take an explicit `filesystem` parameter The following example code illustrates the use of lakeFS-spec with PyArrow, reading a Parquet file and writing it back to a lakeFS repository as a partitioned CSV dataset in the context of a [transaction](transactions.md): -```python hl_lines="12 17" +```python hl_lines="10 15" --8<-- "docs/_code/pyarrow_example.py" ```