diff --git a/.github/ISSUE_TEMPLATE/issue-template.md b/.github/ISSUE_TEMPLATE/issue-template.md index ea5603bfc..9b7050b5d 100644 --- a/.github/ISSUE_TEMPLATE/issue-template.md +++ b/.github/ISSUE_TEMPLATE/issue-template.md @@ -13,11 +13,7 @@ assignees: '' ### Target(s) - - -### Requirement to close - - + ### Community channels diff --git a/.github/workflows/push-pdf2md-server.yml b/.github/workflows/push-pdf2md-server.yml new file mode 100644 index 000000000..e9ac0bd0e --- /dev/null +++ b/.github/workflows/push-pdf2md-server.yml @@ -0,0 +1,149 @@ +name: Create PDF2MD Docker Images + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }} + cancel-in-progress: true + +on: + workflow_dispatch: + push: + branches: + - "main" + paths: + - "pdf2md/server/**" + +jobs: + pdf2md-server: + name: Push PDF2MD Server image + runs-on: ${{ matrix.runner }} + strategy: + matrix: + runner: [blacksmith-8vcpu-ubuntu-2204] + platform: [linux/amd64] + exclude: + - runner: blacksmith-8vcpu-ubuntu-2204 + platform: linux/arm64 + - runner: blacksmith-8vcpu-ubuntu-2204-arm + platform: linux/amd64 + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Setup buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + trieve/pdf2md-server + tags: | + type=raw,latest + type=sha + + - name: Build and push Docker image + uses: useblacksmith/build-push-action@v1.0.0-beta + with: + platforms: ${{ matrix.platform }} + context: pdf2md/ + file: ./pdf2md/server/Dockerfile.pdf2md-server + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + chunk-worker: + name: Push PDF2MD Chunk Worker image + runs-on: ${{ matrix.runner }} + strategy: + matrix: + runner: [blacksmith-8vcpu-ubuntu-2204] + platform: [linux/amd64] + exclude: + - runner: blacksmith-8vcpu-ubuntu-2204 + platform: linux/arm64 + - runner: blacksmith-8vcpu-ubuntu-2204-arm + platform: linux/amd64 + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Setup buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + trieve/chunk-worker + tags: | + type=raw,latest + type=sha + + - name: Build and push Docker image + uses: useblacksmith/build-push-action@v1.0.0-beta + with: + platforms: ${{ matrix.platform }} + context: pdf2md/ + file: ./pdf2md/server/Dockerfile.chunk-worker + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + supervisor-worker: + name: Push PDF2MD Supervisor Worker image + runs-on: ${{ matrix.runner }} + strategy: + matrix: + runner: [blacksmith-8vcpu-ubuntu-2204] + platform: [linux/amd64] + exclude: + - runner: blacksmith-8vcpu-ubuntu-2204 + platform: linux/arm64 + - runner: blacksmith-8vcpu-ubuntu-2204-arm + platform: linux/amd64 + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Setup buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + trieve/supervisor-worker + tags: | + type=raw,latest + type=sha + + - name: Build and push Docker image + uses: useblacksmith/build-push-action@v1.0.0-beta + with: + platforms: ${{ matrix.platform }} + context: pdf2md/ + file: ./pdf2md/server/Dockerfile.supervisor-worker + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/pdf2md/server/src/workers/chunk-worker.rs b/pdf2md/server/src/workers/chunk-worker.rs index dd31f9c7a..2e5b106d5 100644 --- a/pdf2md/server/src/workers/chunk-worker.rs +++ b/pdf2md/server/src/workers/chunk-worker.rs @@ -1,5 +1,5 @@ use chm::tools::migrations::{run_pending_migrations, SetupArgs}; -use pdf2md_server::{ +use file_chunker::{ errors::ServiceError, get_env, models::ChunkingTask, @@ -100,7 +100,7 @@ async fn main() { pub async fn chunk_sub_pdf( task: ChunkingTask, clickhouse_client: clickhouse::Client, -) -> Result<(), pdf2md_server::errors::ServiceError> { +) -> Result<(), file_chunker::errors::ServiceError> { let bucket = get_aws_bucket()?; let file_data = bucket .get_object(task.file_name.clone()) @@ -115,38 +115,5 @@ pub async fn chunk_sub_pdf( let result = chunk_pdf(file_data, task.clone(), task.page_range, &clickhouse_client).await?; log::info!("Got {} pages for {:?}", result.len(), task.task_id); - let mut page_inserter = clickhouse_client.insert("file_chunks").map_err(|e| { - log::error!("Error inserting recommendations: {:?}", e); - ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) - })?; - - for page in &result { - page_inserter.write(page).await.map_err(|e| { - log::error!("Error inserting task: {:?}", e); - ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) - })?; - } - - page_inserter.end().await.map_err(|e| { - log::error!("Error inserting task: {:?}", e); - ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) - })?; - - let prev_task = - pdf2md_server::operators::clickhouse::get_task(task.task_id, &clickhouse_client).await?; - - let pages_processed = prev_task.pages_processed + 1; - - if pages_processed == prev_task.pages { - update_task_status(task.task_id, FileTaskStatus::Completed, &clickhouse_client).await?; - } else { - update_task_status( - task.task_id, - FileTaskStatus::ChunkingFile(result.len() as u32 + prev_task.pages_processed), - &clickhouse_client, - ) - .await?; - } - Ok(()) }