Skip to content

Commit

Permalink
ENH: Add history, model selection, minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
sanand0 committed Oct 14, 2024
1 parent f9f594f commit c299750
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 29 deletions.
11 changes: 8 additions & 3 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ <h5 class="card-title">Clinical Trials</h5>
</button>
</div>
<div class="col-md-2 mb-3">
<label for="sample" class="form-label fw-bold">Sample</label>
<label for="sample" class="form-label fw-bold">Sample size</label>
<input class="form-control" type="number" id="sample" min="1" value="5" aria-label="Number of samples to generate prompt">
</div>
<div class="col-md-6 mb-3">
Expand Down Expand Up @@ -196,7 +196,7 @@ <h5 class="card-title">Clinical Trials</h5>
</button>
</div>
<div class="col-md-2 mb-3">
<label for="examples" class="form-label fw-bold">Examples</label>
<label for="examples" class="form-label fw-bold"># Examples</label>
<input class="form-control" type="number" id="examples" min="1" value="5" aria-label="Number of examples to revise prompt">
</div>
<div class="col-md-6 mb-3">
Expand All @@ -207,7 +207,7 @@ <h5 class="card-title">Clinical Trials</h5>
<option value="chatgpt-4o-latest">ChatGPT 4o ($5)</option>
</select>
</div>
<div class="col-12 mb-3" id="revised-prompt" style="white-space: pre-wrap;"></div>
<div class="col-12 mb-3 border rounded py-3" id="revised-prompt" style="white-space: pre-wrap;"></div>
</div>

<div class="row">
Expand All @@ -217,6 +217,11 @@ <h5 class="card-title">Clinical Trials</h5>
</button>
</div>
</div>

<div class="row"></div>
<h2 class="h4">Experiment history</h2>
<div class="table-responsive" id="experiments"></div>
</div>
</div>

<footer class="my-5 vh-100 d-flex align-items-center justify-content-center">
Expand Down
117 changes: 91 additions & 26 deletions script.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { sumBy, sortBy } from "https://cdn.jsdelivr.net/npm/lodash-es@4/+esm";
import { diffWords } from "https://cdn.jsdelivr.net/npm/diff@7/+esm";

const $prompt = document.querySelector("#prompt");
const $promptModel = document.querySelector("#prompt-model");
const $outputModel = document.querySelector("#output-model");
const $generatePrompt = document.querySelector("#generate-prompt");
const $generateOutput = document.querySelector("#generate-output");
Expand All @@ -17,16 +18,21 @@ const $score = document.querySelector("#score");
const $outputProgress = document.querySelector("#output-progress");
const $data = document.querySelector("#data");
const $embeddingSimilarity = document.querySelector("#embedding-similarity");
const $evaluationModel = document.querySelector("#evaluation-model");
const $criteria = document.querySelector("#criteria");
const $evaluatePrompt = document.querySelector("#evaluate-prompt");
const $evaluateCancel = document.querySelector("#evaluate-cancel");
const $revisePrompt = document.querySelector("#revise-prompt");
const $revisionModel = document.querySelector("#revision-model");
const $revisedPrompt = document.querySelector("#revised-prompt");
const $applyPrompt = document.querySelector("#apply-prompt");
const $experiments = document.querySelector("#experiments");
const $itemModal = document.querySelector("#item-modal");
const modal = new bootstrap.Modal($itemModal);

const shuffler = d3.shuffler(d3.randomLcg(12345));

const experiments = {};
let sample;
let data;
let criteria = [];
Expand Down Expand Up @@ -88,7 +94,7 @@ $generatePrompt.addEventListener("click", async () => {
$generatePrompt.querySelector(".loading").classList.remove("d-none");
$generatePrompt.disabled = true;
for await (const { content, usage } of llmStream({
model: "gpt-4o-mini",
model: $promptModel.value,
messages: generatePromptMessages(sample),
})) {
const promptMatch = content.match(/<PROMPT>([\s\S]*?)<\/PROMPT>/);
Expand Down Expand Up @@ -162,13 +168,11 @@ function drawTable() {
${pc1(row.embeddingSimilarity)}
</td>`}
${criteria.length && firstRow[criteria[0]]
? html`<td class="text-end">
${num2(row.embeddingSimilarity + sumBy(criteria, (c) => (row[c]?.success ? 1 : 0)))}
</td>`
? html`<td class="text-end">${num2(getScore(row))}</td>`
: null}
${criteria.map((c) =>
row[c]
? html`<td class="text-center" title="${row[c].explanation}">${row[c].success ? "✅" : "❌"}</td>`
? html`<td class="text-center" title="${row[c]?.explanation}">${row[c]?.success ? "✅" : "❌"}</td>`
: null
)}
</tr>
Expand All @@ -177,10 +181,7 @@ function drawTable() {
</tbody>`,
$outputTable
);
const score = data.reduce(
(acc, row) => acc + (row.embeddingSimilarity + sumBy(criteria, (c) => (row[c]?.success ? 1 : 0))),
0
);
const score = sumBy(data, getScore);
if (score) render(html`<div class="text-end">Score: ${num2(score)}</div>`, $score);
$score.classList.toggle("d-none", !score);
}
Expand All @@ -204,7 +205,7 @@ $generateOutput.addEventListener("click", async () => {
// Generate outputs for each row
for (const [index, row] of data.entries()) {
drawTable();
$outputProgress.style.width = `${((index) / data.length) * 100}%`;
$outputProgress.style.width = `${(index / data.length) * 100}%`;
if (generateCancel) break;
try {
for await (const { content } of llmStream({
Expand Down Expand Up @@ -271,7 +272,7 @@ $outputTable.addEventListener("click", (event) => {
<td>${column}</td>
<td>
${typeof rowData[column] === "object"
? html`${rowData[column].success ? "✅" : "❌"} ${rowData[column].explanation}`
? html`${rowData[column]?.success ? "✅" : "❌"} ${rowData[column]?.explanation}`
: rowData[column]}
</td>
</tr>
Expand Down Expand Up @@ -355,7 +356,7 @@ $evaluatePrompt.addEventListener("click", async () => {
for (const row of data) {
if (evaluateCancel) break;
for await (const { content, usage } of llmStream({
model: "gpt-4o-mini",
model: $evaluationModel.value,
messages: [
{ role: "system", content: `Given the <EXPECTED> text and the <GENERATED> output, evaluate the criteria.` },
{
Expand All @@ -377,6 +378,14 @@ $evaluatePrompt.addEventListener("click", async () => {
}
}

// Save result
experiments[$prompt.value] = {
score: sumBy(data, getScore),
data: JSON.parse(JSON.stringify(data)),
criteria: JSON.parse(JSON.stringify(criteria)),
};
drawExperiments();

// Hide loading indicator and enable generate, disable cancel, buttons.
$evaluatePrompt.querySelector(".loading").classList.add("d-none");
$evaluateCancel.querySelector(".loading").classList.add("d-none");
Expand All @@ -386,6 +395,51 @@ $evaluatePrompt.addEventListener("click", async () => {
drawTable();
});

function drawExperiments() {
render(
html`
<table class="table">
<thead>
<tr>
<th class="text-end">Score</th>
<th>Prompt</th>
</tr>
</thead>
<tbody>
${Object.entries(experiments).map(([prompt, { score, criteria, data }]) => {
return html`<tr>
<td class="text-end" style="white-space: pre-wrap">${num2(score)}</td>
<td>
${prompt}
<p class="mt-4"><strong>Criteria:</strong></p>
<div class="table-responsive">
<table class="table table-sm">
<tbody>
${criteria.map(
(c) => html`
<tr>
<th scope="row">${c}</th>
<td>
${data.map(
(d) => html`<span title="${d[c]?.explanation}">${d[c]?.success ? "✅" : "❌"}</span>`
)}
</td>
</tr>
`
)}
</tbody>
</table>
</div>
</td>
</tr>`;
})}
</tbody>
</table>
`,
$experiments
);
}

// When cancel is clicked, trigger cancellation via cancel=true.
// Show loading indicator to suggest cancellation is in progress and disable the button.
$evaluateCancel.addEventListener("click", () => {
Expand All @@ -403,15 +457,15 @@ $revisePrompt.addEventListener("click", async () => {
// Sort a copy of data by score = embeddingSimilarity + sum(criteria[].success)
const sortedData = sortBy(data, (d) => d.embeddingSimilarity + sumBy(criteria, (term) => (d[term]?.success ? 1 : 0)));
const examples = sortedData.slice(0, +document.querySelector("#examples").value);

$revisedPrompt.innerHTML = /* html */ `<div class="spinner-border" role="status"><span class="visually-hidden">Loading...</span></div>`;
for await (const { content, usage } of llmStream({
model: "gpt-4o-mini",
model: $revisionModel.value,
messages: [
...generatePromptMessages(sample),
{ role: "assistant", content: `<PROMPT>${$prompt.value}</PROMPT>` },
{
role: "user",
content: `Improve this prompt using feedback from these evals. Think step by step.
content: `Improve this prompt using feedback from these evals and try to improve similarity. Think step by step.
${examples
.map(
Expand All @@ -424,7 +478,7 @@ ${criteria
.map(
(c) => `<CHECK>
<CRITERION>${c}</CRITERION>
<RESULT>${d[c].success ? "YES" : "NO"} ${d[c].explanation}</RESULT>
<RESULT>${d[c]?.success ? "YES" : "NO"} ${d[c]?.explanation}</RESULT>
</CHECK>`
)
.join("\n")}
Expand All @@ -440,7 +494,20 @@ ${criteria
revisedPrompt = $revisedPrompt.textContent;
const diffs = diffWords($prompt.value, revisedPrompt);
$revisedPrompt.innerHTML = diffs
.map(({ added, removed, value }) => (added ? `<ins>${value}</ins>` : removed ? `<del>${value}</del>` : value))
.map(({ added, removed, value }) => {
const val = value.replace(
/[&<>'"]/g,
(char) =>
({
"&": "&amp;",
"<": "&lt;",
">": "&gt;",
"'": "&#39;",
'"': "&quot;",
}[char])
);
return added ? `<ins>${val}</ins>` : removed ? `<del>${val}</del>` : val;
})
.join("");
});

Expand All @@ -464,18 +531,16 @@ $embeddingSimilarity.addEventListener("input", (event) => {
drawTable();
});

const getScore = (row) => row.embeddingSimilarity + sumBy(criteria, (c) => (row[c]?.success ? 1 : 0));

const savedInput = localStorage.getItem("promptEvalsInput");
if (savedInput) {
$data.value = savedInput;
$data.dispatchEvent(new Event("input", { bubbles: true }));
}
if (savedInput) $data.value = savedInput;
$data.dispatchEvent(new Event("input", { bubbles: true }));

const savedCriteria = localStorage.getItem("promptEvalsCriteria");
if (savedCriteria) {
$criteria.value = savedCriteria;
$criteria.dispatchEvent(new Event("input", { bubbles: true }));
}
if (savedCriteria) $criteria.value = savedCriteria;
$criteria.dispatchEvent(new Event("input", { bubbles: true }));

// TODO: Use the correct models
// TODO: Graceful fetch error handling
// TODO: Graceful cancellation / weird data error handling
// TODO: Refactor for readability

0 comments on commit c299750

Please sign in to comment.