Skip to content

Commit

Permalink
feature: add OpenAPI specs to crawling setup
Browse files Browse the repository at this point in the history
  • Loading branch information
skeptrunedev committed Oct 2, 2024
1 parent 20c3241 commit 4318911
Show file tree
Hide file tree
Showing 12 changed files with 478 additions and 38 deletions.
29 changes: 28 additions & 1 deletion clients/ts-sdk/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"name": "BSL",
"url": "https://github.com/devflowinc/trieve/blob/main/LICENSE.txt"
},
"version": "0.11.12"
"version": "0.11.13"
},
"servers": [
{
Expand Down Expand Up @@ -7193,12 +7193,31 @@
},
"CrawlInterval": {
"type": "string",
"description": "Interval at which specified site should be re-scraped",
"enum": [
"daily",
"weekly",
"monthly"
]
},
"CrawlOpenAPIOptions": {
"type": "object",
"description": "Options for including an openapi spec in the crawl",
"required": [
"openapi_schema_url",
"openapi_tag"
],
"properties": {
"openapi_schema_url": {
"type": "string",
"description": "OpenAPI json schema to be processed alongside the site crawl"
},
"openapi_tag": {
"type": "string",
"description": "Tag to look for to determine if a page should create an openapi route chunk instead of chunks from heading-split of the HTML"
}
}
},
"CrawlOptions": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -7259,6 +7278,14 @@
"description": "How many levels deep to crawl, defaults to 10",
"nullable": true
},
"openapi_options": {
"allOf": [
{
"$ref": "#/components/schemas/CrawlOpenAPIOptions"
}
],
"nullable": true
},
"site_url": {
"type": "string",
"description": "The URL to crawl",
Expand Down
18 changes: 18 additions & 0 deletions clients/ts-sdk/src/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,25 @@ export type CountChunksReqPayload = {

export type CountSearchMethod = 'fulltext' | 'semantic' | 'bm25';

/**
* Interval at which specified site should be re-scraped
*/
export type CrawlInterval = 'daily' | 'weekly' | 'monthly';

/**
* Options for including an openapi spec in the crawl
*/
export type CrawlOpenAPIOptions = {
/**
* OpenAPI json schema to be processed alongside the site crawl
*/
openapi_schema_url: string;
/**
* Tag to look for to determine if a page should create an openapi route chunk instead of chunks from heading-split of the HTML
*/
openapi_tag: string;
};

export type CrawlOptions = {
/**
* Boost titles such that keyword matches in titles are prioritized in search results. Strongly recommended to leave this on. Defaults to true.
Expand Down Expand Up @@ -430,6 +447,7 @@ export type CrawlOptions = {
* How many levels deep to crawl, defaults to 10
*/
max_depth?: (number) | null;
openapi_options?: ((CrawlOpenAPIOptions) | null);
/**
* The URL to crawl
*/
Expand Down
123 changes: 110 additions & 13 deletions frontends/dashboard/src/components/NewDatasetModal.tsx

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions frontends/search/src/components/ChunkMetadataDisplay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
Switch,
Match,
useContext,
createEffect,
} from "solid-js";
import {
indirectHasOwnProperty,
Expand Down Expand Up @@ -65,6 +66,8 @@ const ChunkMetadataDisplay = (props: ChunkMetadataDisplayProps) => {
const [deleted, setDeleted] = createSignal(false);
const [showMetadata, setShowMetadata] = createSignal(false);
const [expandMetadata, setExpandMetadata] = createSignal(false);
const [imageLinks, setImageLinks] = createSignal<string[] | null>(null);

const $currentDataset = datasetAndUserContext.currentDataset;

const location = useLocation();
Expand Down Expand Up @@ -103,6 +106,17 @@ const ChunkMetadataDisplay = (props: ChunkMetadataDisplayProps) => {
props.setShowConfirmModal(true);
};

createEffect(() => {
if (
!props.chunk.metadata ||
!indirectHasOwnProperty(props.chunk, "image_urls")
) {
return null;
}

setImageLinks(props.chunk.image_urls);
});

const useExpand = createMemo(() => {
if (!props.chunk.chunk_html) return false;
return props.chunk.chunk_html.split(" ").length > 20 * 15;
Expand Down Expand Up @@ -257,6 +271,11 @@ const ChunkMetadataDisplay = (props: ChunkMetadataDisplayProps) => {
</span>
</div>
</Show>
<Show when={imageLinks() != null}>
<For each={imageLinks() ?? []}>
{(link) => <img class="w-40" src={link ?? ""} alt={link} />}
</For>
</Show>
<Show when={Object.keys(props.chunk.metadata ?? {}).length > 0}>
<button
class="mt-2 flex w-fit items-center space-x-1 rounded-md border bg-neutral-200/50 px-2 py-1 font-semibold text-magenta-500 hover:bg-neutral-200/90 dark:bg-neutral-700/60 dark:text-magenta-400"
Expand Down Expand Up @@ -310,6 +329,7 @@ const ChunkMetadataDisplay = (props: ChunkMetadataDisplayProps) => {
</div>
<div class="mb-1 h-1 w-full border-b border-neutral-300 dark:border-neutral-600" />
<div
id="score-chunk-html"
classList={{
"line-clamp-4 gradient-mask-b-0": useExpand() && !expanded(),
"text-ellipsis max-w-[100%] break-words space-y-5 leading-normal !text-black dark:!text-white":
Expand Down
2 changes: 1 addition & 1 deletion frontends/search/src/components/ResultsPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ const ResultsPage = (props: ResultsPageProps) => {
},
slim_chunks: props.search.debounced.slimChunks ?? false,
page_size: props.search.debounced.pageSize ?? 10,
get_total_pages: props.search.debounced.getTotalPages ?? false,
get_total_pages: props.search.debounced.getTotalPages ?? true,
typo_options: {
correct_typos: props.search.debounced.correctTypos,
one_typo_word_range: {
Expand Down
11 changes: 6 additions & 5 deletions frontends/search/src/components/ScoreChunk.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ const ScoreChunk = (props: ScoreChunkProps) => {
const [expandMetadata, setExpandMetadata] = createSignal(
props.defaultShowMetadata ?? false,
);
const [imageLink, setImageLink] = createSignal<string | null>(null);
const [imageLinks, setImageLinks] = createSignal<string[] | null>(null);

createEffect(() => {
if (
Expand All @@ -97,8 +97,7 @@ const ScoreChunk = (props: ScoreChunkProps) => {
return null;
}

const imageLink = props.chunk.image_urls?.[0] as string;
setImageLink(imageLink);
setImageLinks(props.chunk.image_urls);
});

createEffect(() => {
Expand Down Expand Up @@ -390,8 +389,10 @@ const ScoreChunk = (props: ScoreChunkProps) => {
</span>
</div>
</Show>
<Show when={imageLink() != null}>
<img class="w-40" src={imageLink() ?? ""} alt="" />
<Show when={imageLinks() != null}>
<For each={imageLinks() ?? []}>
{(link) => <img class="w-40" src={link ?? ""} alt={link} />}
</For>
</Show>
<Show when={Object.keys(props.chunk.metadata ?? {}).length > 0}>
<button
Expand Down
Loading

0 comments on commit 4318911

Please sign in to comment.