Skip to content

Commit 04e747f

Browse files
authored
Impala (#96)
* First draft to include impala in ci. * Use docker-compose. * Remove postgres. * Detach docker-compose. * Return early for debugging. * Debug db connection. * Remove redundant statement. * Wait for docker compose. * Use environment variable instead of hardcoded reference. * Remove wait flag again. * Fix impala incompatibilities. * Skip a whole lot of tests. * Add tmate for debugging. * Trim down ci for debugging. * Use tmate version 3.13. * Remove tmate. * Add --wait flag. * Reintroduce tmate. * Remove tmate. * Add tmate v3. * Change docker compose invocation. * Wait for specific service. * Remove tmate again. * Run integration tests only. * Fix column capitalization integration tests. * Reuse existing table. * Enable parallelization. * Disable stats tests for impala. * Reintroduce tmate. * Split up test execution. * Remove concurrency. * Add split up integration tests. * Add further integration tests. * Reinsert ordinary integration tests. * Fix date difference query. * Clean up tests. * Fix redundant clause. * Add impala entry to CHANGELOG.
1 parent 675043a commit 04e747f

File tree

11 files changed

+427
-18
lines changed

11 files changed

+427
-18
lines changed

.github/workflows/ci.yaml

+262-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ jobs:
2828
- name: Run pre-commit checks
2929
uses: pre-commit/[email protected]
3030

31-
3231
linux:
3332
name: "Linux - unit tests - Python ${{ matrix.PYTHON_VERSION }}"
3433
runs-on: ubuntu-latest
@@ -228,3 +227,265 @@ jobs:
228227
uses: codecov/[email protected]
229228
with:
230229
file: ./coverage.xml
230+
231+
linux-integration_tests-impala-column-pt1:
232+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt1"
233+
runs-on: ubuntu-20.04
234+
env:
235+
CI: True
236+
strategy:
237+
fail-fast: false
238+
matrix:
239+
PYTHON_VERSION: [ '3.8']
240+
241+
steps:
242+
- name: Checkout branch
243+
uses: actions/checkout@v3
244+
with:
245+
ref: ${{ github.head_ref }}
246+
- name: Fetch full git history
247+
run: git fetch --prune --unshallow
248+
- uses: conda-incubator/setup-miniconda@v2
249+
with:
250+
python-version: ${{ matrix.PYTHON_VERSION }}
251+
miniforge-variant: Mambaforge
252+
miniforge-version: 4.11.0-2
253+
use-mamba: true
254+
environment-file: environment.yml
255+
activate-environment: datajudge
256+
- name: Set up container
257+
run: docker compose up --wait impala
258+
- name: Run Integration Tests
259+
shell: bash -l {0}
260+
run: |
261+
flit install -s
262+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_column_capitalization.py
263+
- name: Generate code coverage report
264+
uses: codecov/[email protected]
265+
with:
266+
file: ./coverage.xml
267+
268+
269+
linux-integration_tests-impala-column-pt2:
270+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt2"
271+
runs-on: ubuntu-20.04
272+
env:
273+
CI: True
274+
strategy:
275+
fail-fast: false
276+
matrix:
277+
PYTHON_VERSION: [ '3.8']
278+
279+
steps:
280+
- name: Checkout branch
281+
uses: actions/checkout@v3
282+
with:
283+
ref: ${{ github.head_ref }}
284+
- name: Fetch full git history
285+
run: git fetch --prune --unshallow
286+
- uses: conda-incubator/setup-miniconda@v2
287+
with:
288+
python-version: ${{ matrix.PYTHON_VERSION }}
289+
miniforge-variant: Mambaforge
290+
miniforge-version: 4.11.0-2
291+
use-mamba: true
292+
environment-file: environment.yml
293+
activate-environment: datajudge
294+
- name: Set up container
295+
run: docker compose up --wait impala
296+
- name: Run Integration Tests
297+
shell: bash -l {0}
298+
run: |
299+
flit install -s
300+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_data_source.py
301+
- name: Generate code coverage report
302+
uses: codecov/[email protected]
303+
with:
304+
file: ./coverage.xml
305+
306+
linux-integration_tests-impala-column-pt3:
307+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt3"
308+
runs-on: ubuntu-20.04
309+
env:
310+
CI: True
311+
strategy:
312+
fail-fast: false
313+
matrix:
314+
PYTHON_VERSION: [ '3.8']
315+
316+
steps:
317+
- name: Checkout branch
318+
uses: actions/checkout@v3
319+
with:
320+
ref: ${{ github.head_ref }}
321+
- name: Fetch full git history
322+
run: git fetch --prune --unshallow
323+
- uses: conda-incubator/setup-miniconda@v2
324+
with:
325+
python-version: ${{ matrix.PYTHON_VERSION }}
326+
miniforge-variant: Mambaforge
327+
miniforge-version: 4.11.0-2
328+
use-mamba: true
329+
environment-file: environment.yml
330+
activate-environment: datajudge
331+
- name: Set up container
332+
run: docker compose up --wait impala
333+
- name: Run Integration Tests
334+
shell: bash -l {0}
335+
run: |
336+
flit install -s
337+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k row
338+
- name: Generate code coverage report
339+
uses: codecov/[email protected]
340+
with:
341+
file: ./coverage.xml
342+
343+
344+
linux-integration_tests-impala-column-pt4:
345+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt4"
346+
runs-on: ubuntu-20.04
347+
env:
348+
CI: True
349+
strategy:
350+
fail-fast: false
351+
matrix:
352+
PYTHON_VERSION: [ '3.8']
353+
354+
steps:
355+
- name: Checkout branch
356+
uses: actions/checkout@v3
357+
with:
358+
ref: ${{ github.head_ref }}
359+
- name: Fetch full git history
360+
run: git fetch --prune --unshallow
361+
- uses: conda-incubator/setup-miniconda@v2
362+
with:
363+
python-version: ${{ matrix.PYTHON_VERSION }}
364+
miniforge-variant: Mambaforge
365+
miniforge-version: 4.11.0-2
366+
use-mamba: true
367+
environment-file: environment.yml
368+
activate-environment: datajudge
369+
- name: Set up container
370+
run: docker compose up --wait impala
371+
- name: Run Integration Tests
372+
shell: bash -l {0}
373+
run: |
374+
flit install -s
375+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k uniques
376+
- name: Generate code coverage report
377+
uses: codecov/[email protected]
378+
with:
379+
file: ./coverage.xml
380+
381+
linux-integration_tests-impala-column-pt5:
382+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt5"
383+
runs-on: ubuntu-20.04
384+
env:
385+
CI: True
386+
strategy:
387+
fail-fast: false
388+
matrix:
389+
PYTHON_VERSION: [ '3.8']
390+
391+
steps:
392+
- name: Checkout branch
393+
uses: actions/checkout@v3
394+
with:
395+
ref: ${{ github.head_ref }}
396+
- name: Fetch full git history
397+
run: git fetch --prune --unshallow
398+
- uses: conda-incubator/setup-miniconda@v2
399+
with:
400+
python-version: ${{ matrix.PYTHON_VERSION }}
401+
miniforge-variant: Mambaforge
402+
miniforge-version: 4.11.0-2
403+
use-mamba: true
404+
environment-file: environment.yml
405+
activate-environment: datajudge
406+
- name: Set up container
407+
run: docker compose up --wait impala
408+
- name: Run Integration Tests
409+
shell: bash -l {0}
410+
run: |
411+
flit install -s
412+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k date
413+
- name: Generate code coverage report
414+
uses: codecov/[email protected]
415+
with:
416+
file: ./coverage.xml
417+
418+
linux-integration_tests-impala-column-pt6:
419+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt6"
420+
runs-on: ubuntu-20.04
421+
env:
422+
CI: True
423+
strategy:
424+
fail-fast: false
425+
matrix:
426+
PYTHON_VERSION: [ '3.8']
427+
428+
steps:
429+
- name: Checkout branch
430+
uses: actions/checkout@v3
431+
with:
432+
ref: ${{ github.head_ref }}
433+
- name: Fetch full git history
434+
run: git fetch --prune --unshallow
435+
- uses: conda-incubator/setup-miniconda@v2
436+
with:
437+
python-version: ${{ matrix.PYTHON_VERSION }}
438+
miniforge-variant: Mambaforge
439+
miniforge-version: 4.11.0-2
440+
use-mamba: true
441+
environment-file: environment.yml
442+
activate-environment: datajudge
443+
- name: Set up container
444+
run: docker compose up --wait impala
445+
- name: Run Integration Tests
446+
shell: bash -l {0}
447+
run: |
448+
flit install -s
449+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k varchar
450+
- name: Generate code coverage report
451+
uses: codecov/[email protected]
452+
with:
453+
file: ./coverage.xml
454+
455+
456+
linux-integration_tests-impala-column-pt7:
457+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt7"
458+
runs-on: ubuntu-20.04
459+
env:
460+
CI: True
461+
strategy:
462+
fail-fast: false
463+
matrix:
464+
PYTHON_VERSION: [ '3.8']
465+
466+
steps:
467+
- name: Checkout branch
468+
uses: actions/checkout@v3
469+
with:
470+
ref: ${{ github.head_ref }}
471+
- name: Fetch full git history
472+
run: git fetch --prune --unshallow
473+
- uses: conda-incubator/setup-miniconda@v2
474+
with:
475+
python-version: ${{ matrix.PYTHON_VERSION }}
476+
miniforge-variant: Mambaforge
477+
miniforge-version: 4.11.0-2
478+
use-mamba: true
479+
environment-file: environment.yml
480+
activate-environment: datajudge
481+
- name: Set up container
482+
run: docker compose up --wait impala
483+
- name: Run Integration Tests
484+
shell: bash -l {0}
485+
run: |
486+
flit install -s
487+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k numeric
488+
- name: Generate code coverage report
489+
uses: codecov/[email protected]
490+
with:
491+
file: ./coverage.xml

CHANGELOG.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Changelog
1515
- Implement :meth:`~datajudge.BetweenRequirement.add_column_type_constraint`. Previously, only the ``WithinRequirement`` method existed.
1616
- Implemented an option ``infer_pk`` to automatically retrieve and primary key definition as part of :meth:`datajudge.WithinRequirement.add_uniqueness_constraint`.
1717
- Added a ``name`` parameter to all ``add_x_constraint`` methods of ``WithinRequirement`` and ``BetweenRequirement``. This will give pytest test a custom name.
18+
- Added preliminary support for Impala.
1819

1920

2021
1.2.0 - 2022.10.21

docker-compose.yaml

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Taken from
2+
# https://github.com/ibis-project/ibis/blob/master/docker-compose.yml
3+
version: "3.4"
4+
services:
5+
impala:
6+
depends_on:
7+
- impala-postgres
8+
- kudu
9+
environment:
10+
PGPASSWORD: postgres
11+
healthcheck:
12+
interval: 30s
13+
retries: 20
14+
test:
15+
- CMD-SHELL
16+
- nc -z 127.0.0.1 21050 && nc -z 127.0.0.1 50070
17+
timeout: 10s
18+
hostname: localhost
19+
image: ibisproject/impala:latest
20+
ports:
21+
- 21050:21050
22+
networks:
23+
- impala
24+
impala-postgres:
25+
user: postgres
26+
hostname: postgres
27+
environment:
28+
POSTGRES_PASSWORD: postgres
29+
healthcheck:
30+
interval: 10s
31+
retries: 3
32+
test:
33+
- CMD
34+
- pg_isready
35+
timeout: 5s
36+
image: postgres:13.9-alpine
37+
networks:
38+
- impala
39+
kudu:
40+
cap_add:
41+
- SYS_TIME
42+
depends_on:
43+
- kudu-tserver
44+
environment:
45+
KUDU_MASTER: "true"
46+
image: ibisproject/kudu:latest
47+
ports:
48+
- 7051:7051
49+
- 8051:8051
50+
networks:
51+
- impala
52+
kudu-tserver:
53+
cap_add:
54+
- SYS_TIME
55+
environment:
56+
KUDU_MASTER: "false"
57+
image: ibisproject/kudu:latest
58+
ports:
59+
- 7050:7050
60+
- 8050:8050
61+
networks:
62+
- impala
63+
networks:
64+
impala:

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ dependencies:
2424
- flit-core
2525
- flit
2626
- sphinx-autodoc-typehints
27+
- impyla

src/datajudge/constraints/miscs.py

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ def __init__(self, ref, primary_keys: List[str], name: str = None):
1515
def retrieve(
1616
self, engine: sa.engine.Engine, ref: DataReference
1717
) -> Tuple[Set[str], OptionalSelections]:
18+
if db_access.is_impala(engine):
19+
raise NotImplementedError("Primary key retrieval does not work for Impala.")
1820
values, selections = db_access.get_primary_keys(engine, self.ref)
1921
return set(values), selections
2022

src/datajudge/constraints/row.py

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ def __init__(
2121
self.max_missing_fraction_getter = max_missing_fraction_getter
2222

2323
def test(self, engine: sa.engine.Engine) -> TestResult:
24+
if db_access.is_impala(engine):
25+
raise NotImplementedError("Currently not implemented for impala.")
2426
self.max_missing_fraction = self.max_missing_fraction_getter(engine)
2527
self.ref1_minus_ref2_sample, _ = db_access.get_row_difference_sample(
2628
engine, self.ref, self.ref2

0 commit comments

Comments
 (0)