Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,27 @@ jobs:
- name: Build with Maven
run: mvn -B install

- name: Set up Docker Compose
run: apt-get -y update && apt-get install -y docker-compose

- name: Build data-loader
run: sh build_data_loader.sh

- name: Build directory_sync_service
run: docker build --progress=plain -t samply/directory_sync_service .

- name: Remove any cached data
docker volume rm store-db-data || true

- name: Run integration test stack
run: docker-compose -f docker-compose.ci.yml up --abort-on-container-exit --exit-code-from directory_sync_service

- name: Clear up the containers created by docker-compose
run: docker-compose -f docker-compose.ci.yml down

- name: Compare output with standard
run: ./ci_compare_with_standard.sh

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

Expand Down
18 changes: 18 additions & 0 deletions build_data_loader.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

cd /tmp

# Check if the directory exists and either pull or clone as appropriate.
if [ -d "data-loader" ]; then
echo "'data-loader' exists. Pulling latest changes..."
cd data-loader
git pull
else
echo "'data-loader' does not exist. Cloning the repository..."
git clone https://github.com/samply/data-loader.git
cd data-loader
fi

echo "Building Docker image..."
docker build --progress=plain -t samply/data-loader .

61 changes: 61 additions & 0 deletions ci_compare_with_standard.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Preprocess standard fact table data
cat data/crc-cohort-facts-full.csv | \
# Remove column 8 (last_update), since this is is never the same.
awk -F';' 'BEGIN {OFS=";"} {for(i=1;i<=NF;i++) if(i!=8) printf "%s%s", $i, (i<NF ? OFS : ""); printf "\n"}' | \
# Correct ICD code
sed 's/urn:miriam:icd:urn:miriam:icd/urn:miriam:icd/' | \
# Remove arbitrary patient ID
sed 's/Cohort[^;]*;/Cohort;/' | \
# Remove quotes
#sed 's/"//g' | \
# Remove header row
grep -v "sample_type;number_of_donors;number_of_samples" | \
sort > crc-cohort_facts.processed.csv

# Preprocess generated output fact table data
cat test/DirectoryFactTables.csv | \
# Remove column 10 (national_node), which is not in Petr's fact tables.
awk -F';' 'BEGIN {OFS=";"} {for(i=1;i<=NF;i++) if(i!=10) printf "%s%s", $i, (i<NF ? OFS : ""); printf "\n"}' | \
# Remove the left-over semicolon at the end of each line
sed 's/;$//' | \
# Remove column 8 (last_update), since this is is never the same.
awk -F';' 'BEGIN {OFS=";"} {for(i=1;i<=NF;i++) if(i!=8) printf "%s%s", $i, (i<NF ? OFS : ""); printf "\n"}' | \
# Remove arbitrary patient ID
sed 's/Cohort[^;]*;/Cohort;/' | \
sed 's/_collection_/:collection:/' | \
# Remove header row
grep -v "sample_type;number_of_donors;number_of_samples" | \
sort > DirectoryFactTables.processed.csv

# Compare fact tables
if diff crc-cohort_facts.processed.csv DirectoryFactTables.processed.csv; then
echo "Fact table output matches against standard."
else
echo "Fact table output does not match against standard."
exit 1
fi

# Preprocess standard fact table data
cat data/crc-cohort-collections-full.csv | \
# Remove column 7 (last_update), since this is is never the same.
awk -F';' 'BEGIN {OFS=";"} {for(i=1;i<=NF;i++) if(i!=7) printf "%s%s", $i, (i<NF ? OFS : ""); printf "\n"}' | \
# Remove header row
grep -v "order_of_magnitude" | \
sort > crc-cohort_collections.processed.csv

# Preprocess generated output fact table data
cat test/DirectoryCollections.csv | \
# Remove column 7 (last_update), since this is is never the same.
awk -F';' 'BEGIN {OFS=";"} {for(i=1;i<=NF;i++) if(i!=7) printf "%s%s", $i, (i<NF ? OFS : ""); printf "\n"}' | \
# Remove header row
grep -v "order_of_magnitude" | \
sort > DirectoryCollections.processed.csv

# Compare collections
if diff crc-cohort_collections.processed.csv DirectoryCollections.processed.csv; then
echo "Collections output matches against standard."
else
echo "Collections output does not match against standard."
exit 1
fi

2 changes: 2 additions & 0 deletions data/crc-cohort-collections-full.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id;country;type;data_categories;order_of_magnitude;size;timestamp;number_of_donors;order_of_magnitude_donors;sex;diagnosis_available;age_low;age_high;materials;storage_temperatures;national_node
bbmri-eric:ID:EU_BBMRI-ERIC:collection:CRC-Cohort;EU;SAMPLE;BIOLOGICAL_SAMPLES;4;61698;2025-04-22T09:15:09;11217;4;FEMALE,MALE;urn:miriam:icd:C20,urn:miriam:icd:C18.7,urn:miriam:icd:C18.6,urn:miriam:icd:C18.5,urn:miriam:icd:C18.4,urn:miriam:icd:C18.3,urn:miriam:icd:C18.2,urn:miriam:icd:C19,urn:miriam:icd:C18.1,urn:miriam:icd:C18.0;35;100;OTHER,TISSUE_PARAFFIN_EMBEDDED,TISSUE_FROZEN;;EU
2 changes: 2 additions & 0 deletions data/crc-cohort-facts-1patient.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id;sex;disease;age_range;sample_type;number_of_donors;number_of_samples;last_update;collection
bbmri-eric:factID:EU_BBMRI-ERIC:collection:CRC-Cohort:663481554;FEMALE;urn:miriam:icd:C18.0;Adult;TISSUE_PARAFFIN_EMBEDDED;1;1;2025-04-14;bbmri-eric:ID:EU_BBMRI-ERIC:collection:CRC-Cohort
Loading