diff --git a/src/server/modules/ContentChunk/index.ts b/src/server/modules/ContentChunk/index.ts index ce61616968cb8..4972883cdfcc5 100644 --- a/src/server/modules/ContentChunk/index.ts +++ b/src/server/modules/ContentChunk/index.ts @@ -3,6 +3,11 @@ import { Strategy } from 'unstructured-client/sdk/models/shared'; import { NewChunkItem, NewUnstructuredChunkItem } from '@/database/schemas'; import { ChunkingStrategy, Unstructured } from '@/libs/unstructured'; +import { getEnvironment } from '../../utils/env'; +import { knowledgeEnv } from '@/config/knowledge'; + +// Get environment variable to control PDF processing method +const USE_UNSTRUCTURED_FOR_PDF = getEnvironment('USE_UNSTRUCTURED_FOR_PDF') === 'true'; export interface ChunkContentParams { content: Uint8Array; @@ -26,7 +31,10 @@ export class ContentChunk { } isUsingUnstructured(params: ChunkContentParams) { - return params.fileType === 'application/pdf' && params.mode === 'hi-res'; + return params.fileType === 'application/pdf' && + USE_UNSTRUCTURED_FOR_PDF && + !!knowledgeEnv.UNSTRUCTURED_API_KEY && + !!knowledgeEnv.UNSTRUCTURED_SERVER_URL; } async chunkContent(params: ChunkContentParams): Promise { diff --git a/src/server/utils/env.test.ts b/src/server/utils/env.test.ts new file mode 100644 index 0000000000000..a58fccf56aae7 --- /dev/null +++ b/src/server/utils/env.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { getEnvironment, isDev, isOnServerSide } from './env'; + +describe('env utils', () => { + describe('getEnvironment', () => { + it('should return empty string if process is undefined', () => { + const originalProcess = global.process; + // @ts-ignore + global.process = undefined; + + expect(getEnvironment('TEST_KEY')).toBe(''); + + global.process = originalProcess; + }); + + it('should return empty string if env var not found', () => { + expect(getEnvironment('NON_EXISTENT_KEY')).toBe(''); + }); + + it('should return env var value if exists', () => { + const originalEnv = process.env.TEST_KEY; + process.env.TEST_KEY = 'test-value'; + expect(getEnvironment('TEST_KEY')).toBe('test-value'); + process.env.TEST_KEY = originalEnv; + }); + }); + + describe('isDev', () => { + const originalNodeEnv = process.env.NODE_ENV; + + beforeEach(() => { + vi.resetModules(); + }); + + afterEach(() => { + // @ts-ignore + process.env.NODE_ENV = originalNodeEnv; + }); + + it('should be true in development environment', async () => { + // @ts-ignore + process.env.NODE_ENV = 'development'; + const { isDev } = await import('./env'); + expect(isDev).toBe(true); + }); + + it('should be false in production environment', async () => { + // @ts-ignore + process.env.NODE_ENV = 'production'; + const { isDev } = await import('./env'); + expect(isDev).toBe(false); + }); + }); + + describe('isOnServerSide', () => { + const originalWindow = global.window; + + beforeEach(() => { + vi.resetModules(); + }); + + afterEach(() => { + // @ts-ignore + global.window = originalWindow; + }); + + it('should be true when window is undefined', async () => { + // @ts-ignore + delete global.window; + const { isOnServerSide } = await import('./env'); + expect(isOnServerSide).toBe(true); + }); + + it('should be false when window is defined', async () => { + // @ts-ignore + global.window = {}; + const { isOnServerSide } = await import('./env'); + expect(isOnServerSide).toBe(false); + }); + }); +}); diff --git a/src/server/utils/env.ts b/src/server/utils/env.ts new file mode 100644 index 0000000000000..465b8e25028cc --- /dev/null +++ b/src/server/utils/env.ts @@ -0,0 +1,12 @@ +export const isDev = process.env.NODE_ENV === 'development'; + +export const isOnServerSide = typeof window === 'undefined'; +/** + * Get environment variable value + * @param key - Environment variable key + * @returns Environment variable value or empty string if not found + */ +export const getEnvironment = (key: string): string => { + if (typeof process === 'undefined') return ''; + return process.env[key] || ''; +};