Skip to content

Optional summarization for web-scraping flow block #3496

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions frontend/src/pages/Admin/AgentBuilder/index.jsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import React, { useState, useEffect, useRef } from "react";
import { useNavigate, useParams } from "react-router-dom";
import { Tooltip } from "react-tooltip";

import BlockList, { BLOCK_TYPES, BLOCK_INFO } from "./BlockList";
import AddBlockMenu from "./AddBlockMenu";
Expand Down Expand Up @@ -288,18 +289,6 @@ export default function AgentBuilder() {
});
};

// const runFlow = async (uuid) => {
// try {
// const { success, error, _results } = await AgentFlows.runFlow(uuid);
// if (!success) throw new Error(error);

// showToast("Flow executed successfully!", "success", { clear: true });
// } catch (error) {
// console.error(error);
// showToast("Failed to run agent flow", "error", { clear: true });
// }
// };

const clearFlow = () => {
if (!!flowId) navigate(paths.agents.builder());
setAgentName("");
Expand Down Expand Up @@ -356,6 +345,21 @@ export default function AgentBuilder() {
</div>
</div>
</div>
<Tooltip
id="content-summarization-tooltip"
place="top"
delayShow={300}
className="tooltip !text-xs z-99"
>
<p className="text-sm">
When enabled, long webpage content will be automatically summarized to
reduce token usage.
<br />
<br />
Note: This may affect data quality and remove specific details from
the original content.
</p>
</Tooltip>
</div>
);
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { Info } from "@phosphor-icons/react";
import React from "react";

export default function WebScrapingNode({
Expand Down Expand Up @@ -31,7 +32,9 @@ export default function WebScrapingNode({
</label>
<select
value={config.captureAs}
onChange={(e) => onConfigChange({ captureAs: e.target.value })}
onChange={(e) =>
onConfigChange({ ...config, captureAs: e.target.value })
}
className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5"
>
{[
Expand Down Expand Up @@ -60,13 +63,44 @@ export default function WebScrapingNode({
</p>
<input
value={config.querySelector}
onChange={(e) => onConfigChange({ querySelector: e.target.value })}
onChange={(e) =>
onConfigChange({ ...config, querySelector: e.target.value })
}
placeholder=".article-content, #content, .main-content, etc."
className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5"
/>
</div>
)}

<div className="flex justify-between items-center">
<div className="flex flex-row items-center gap-x-1 mb-2">
<label className="block text-sm font-medium text-theme-text-primary">
Content Summarization
</label>
<Info
size={16}
className="text-theme-text-secondary cursor-pointer"
data-tooltip-id="content-summarization-tooltip"
/>
</div>
<div className="flex items-center gap-2 mb-2">
<label className="relative inline-flex items-center cursor-pointer">
<input
type="checkbox"
checked={config.enableSummarization ?? true}
onChange={(e) =>
onConfigChange({
...config,
enableSummarization: e.target.checked,
})
}
className="sr-only peer"
aria-label="Toggle content summarization"
/>
<div className="w-11 h-6 bg-theme-settings-input-bg peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-primary-button/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-primary-button"></div>
</label>
</div>
</div>
<div>
<label className="block text-sm font-medium text-theme-text-primary mb-2">
Result Variable
Expand Down
15 changes: 13 additions & 2 deletions server/utils/agentFlows/executors/web-scraping.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
* @returns {Promise<string>} Scraped content
*/
async function executeWebScraping(config, context) {
const { url, captureAs = "text" } = config;
const { url, captureAs = "text", enableSummarization = true } = config;
const { introspect, logger, aibitat } = context;
logger(
`\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`
Expand Down Expand Up @@ -40,14 +40,25 @@ async function executeWebScraping(config, context) {
throw new Error("There was no content to be collected or read.");
}

if (!enableSummarization) {
logger(`Returning raw content as summarization is disabled`);
return content;
}

const tokenCount = new TokenManager(
aibitat.defaultProvider.model
).countFromString(content);
const contextLimit = Provider.contextLimit(
aibitat.defaultProvider.provider,
aibitat.defaultProvider.model
);
if (tokenCount < contextLimit) return content;

if (tokenCount < contextLimit) {
logger(
`Content within token limit (${tokenCount}/${contextLimit}). Returning raw content.`
);
return content;
}

introspect(
`This page's content is way too long (${tokenCount} tokens). I will summarize it right now.`
Expand Down