Skip to content

Commit

Permalink
feature: add CI workflow to push images for pdf2md service
Browse files Browse the repository at this point in the history
  • Loading branch information
skeptrunedev committed Nov 15, 2024
1 parent b34e5ba commit fdb9298
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 40 deletions.
6 changes: 1 addition & 5 deletions .github/ISSUE_TEMPLATE/issue-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@ assignees: ''

### Target(s)

<replace w/ one or more of the following options: `server`, `search`, `chat`>

### Requirement to close

<please describe what is required to close this issue here>
<replace w/ name of the service(s) which are associated with this issue>

### Community channels

Expand Down
149 changes: 149 additions & 0 deletions .github/workflows/push-pdf2md-server.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
name: Create PDF2MD Docker Images

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
cancel-in-progress: true

on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "pdf2md/server/**"

jobs:
pdf2md-server:
name: Push PDF2MD Server image
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
trieve/pdf2md-server
tags: |
type=raw,latest
type=sha
- name: Build and push Docker image
uses: useblacksmith/[email protected]
with:
platforms: ${{ matrix.platform }}
context: pdf2md/
file: ./pdf2md/server/Dockerfile.pdf2md-server
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

chunk-worker:
name: Push PDF2MD Chunk Worker image
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
trieve/chunk-worker
tags: |
type=raw,latest
type=sha
- name: Build and push Docker image
uses: useblacksmith/[email protected]
with:
platforms: ${{ matrix.platform }}
context: pdf2md/
file: ./pdf2md/server/Dockerfile.chunk-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

supervisor-worker:
name: Push PDF2MD Supervisor Worker image
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
trieve/supervisor-worker
tags: |
type=raw,latest
type=sha
- name: Build and push Docker image
uses: useblacksmith/[email protected]
with:
platforms: ${{ matrix.platform }}
context: pdf2md/
file: ./pdf2md/server/Dockerfile.supervisor-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
37 changes: 2 additions & 35 deletions pdf2md/server/src/workers/chunk-worker.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use chm::tools::migrations::{run_pending_migrations, SetupArgs};
use pdf2md_server::{
use file_chunker::{
errors::ServiceError,
get_env,
models::ChunkingTask,
Expand Down Expand Up @@ -100,7 +100,7 @@ async fn main() {
pub async fn chunk_sub_pdf(
task: ChunkingTask,
clickhouse_client: clickhouse::Client,
) -> Result<(), pdf2md_server::errors::ServiceError> {
) -> Result<(), file_chunker::errors::ServiceError> {
let bucket = get_aws_bucket()?;
let file_data = bucket
.get_object(task.file_name.clone())
Expand All @@ -115,38 +115,5 @@ pub async fn chunk_sub_pdf(
let result = chunk_pdf(file_data, task.clone(), task.page_range, &clickhouse_client).await?;
log::info!("Got {} pages for {:?}", result.len(), task.task_id);

let mut page_inserter = clickhouse_client.insert("file_chunks").map_err(|e| {
log::error!("Error inserting recommendations: {:?}", e);
ServiceError::InternalServerError(format!("Error inserting task: {:?}", e))
})?;

for page in &result {
page_inserter.write(page).await.map_err(|e| {
log::error!("Error inserting task: {:?}", e);
ServiceError::InternalServerError(format!("Error inserting task: {:?}", e))
})?;
}

page_inserter.end().await.map_err(|e| {
log::error!("Error inserting task: {:?}", e);
ServiceError::InternalServerError(format!("Error inserting task: {:?}", e))
})?;

let prev_task =
pdf2md_server::operators::clickhouse::get_task(task.task_id, &clickhouse_client).await?;

let pages_processed = prev_task.pages_processed + 1;

if pages_processed == prev_task.pages {
update_task_status(task.task_id, FileTaskStatus::Completed, &clickhouse_client).await?;
} else {
update_task_status(
task.task_id,
FileTaskStatus::ChunkingFile(result.len() as u32 + prev_task.pages_processed),
&clickhouse_client,
)
.await?;
}

Ok(())
}

0 comments on commit fdb9298

Please sign in to comment.