Skip to content

Commit

Permalink
Serialize the source excerpts from and to pure json
Browse files Browse the repository at this point in the history
Signed-off-by: Adam Treat <[email protected]>
  • Loading branch information
manyoso committed Aug 1, 2024
1 parent 1a0b483 commit 7cbff80
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 185 deletions.
9 changes: 2 additions & 7 deletions gpt4all-chat/chatllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,10 +798,8 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
QString docsContext;
if (!localDocsExcerpts.isEmpty()) {
// FIXME(adam): we should be using the new tool template if available otherwise this I guess
QStringList results;
for (const SourceExcerpt &info : localDocsExcerpts)
results << u"Collection: %1\nPath: %2\nExcerpt: %3"_s.arg(info.collection, info.path, info.text);
docsContext = u"### Context:\n%1\n\n"_s.arg(results.join("\n\n"));
QString json = SourceExcerpt::toJson(localDocsExcerpts);
docsContext = u"### Context:\n%1\n\n"_s.arg(json);
}

int n_threads = MySettings::globalInstance()->threadCount();
Expand Down Expand Up @@ -910,9 +908,6 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
emit sourceExcerptsChanged(sourceExcerpts);
}

// Erase the context of the tool call
m_ctx.n_past = std::max(0, m_ctx.n_past);
m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
m_promptResponseTokens = 0;
m_promptTokens = 0;
m_response = std::string();
Expand Down
149 changes: 39 additions & 110 deletions gpt4all-chat/chatmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ struct ChatItem
Q_PROPERTY(bool thumbsUpState MEMBER thumbsUpState)
Q_PROPERTY(bool thumbsDownState MEMBER thumbsDownState)
Q_PROPERTY(QList<SourceExcerpt> sources MEMBER sources)
Q_PROPERTY(QList<SourceExcerpt> consolidatedSources MEMBER consolidatedSources)

public:
// TODO: Maybe we should include the model name here as well as timestamp?
Expand All @@ -39,7 +38,6 @@ struct ChatItem
QString prompt;
QString newResponse;
QList<SourceExcerpt> sources;
QList<SourceExcerpt> consolidatedSources;
bool currentResponse = false;
bool stopped = false;
bool thumbsUpState = false;
Expand All @@ -65,8 +63,7 @@ class ChatModel : public QAbstractListModel
StoppedRole,
ThumbsUpStateRole,
ThumbsDownStateRole,
SourcesRole,
ConsolidatedSourcesRole
SourcesRole
};

int rowCount(const QModelIndex &parent = QModelIndex()) const override
Expand Down Expand Up @@ -102,8 +99,6 @@ class ChatModel : public QAbstractListModel
return item.thumbsDownState;
case SourcesRole:
return QVariant::fromValue(item.sources);
case ConsolidatedSourcesRole:
return QVariant::fromValue(item.consolidatedSources);
}

return QVariant();
Expand All @@ -122,7 +117,6 @@ class ChatModel : public QAbstractListModel
roles[ThumbsUpStateRole] = "thumbsUpState";
roles[ThumbsDownStateRole] = "thumbsDownState";
roles[SourcesRole] = "sources";
roles[ConsolidatedSourcesRole] = "consolidatedSources";
return roles;
}

Expand Down Expand Up @@ -200,34 +194,17 @@ class ChatModel : public QAbstractListModel
}
}

QList<SourceExcerpt> consolidateSources(const QList<SourceExcerpt> &sources) {
QMap<QString, SourceExcerpt> groupedData;
for (const SourceExcerpt &info : sources) {
QString key = !info.file.isEmpty() ? info.file : info.url;
if (groupedData.contains(key)) {
groupedData[key].text += "\n---\n" + info.text;
} else {
groupedData[key] = info;
}
}
QList<SourceExcerpt> consolidatedSources = groupedData.values();
return consolidatedSources;
}

Q_INVOKABLE void updateSources(int index, const QList<SourceExcerpt> &sources)
{
if (index < 0 || index >= m_chatItems.size()) return;

ChatItem &item = m_chatItems[index];
if (sources.isEmpty()) {
item.sources.clear();
item.consolidatedSources.clear();
} else {
item.sources << sources;
item.consolidatedSources << consolidateSources(sources);
}
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {SourcesRole});
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ConsolidatedSourcesRole});
}

Q_INVOKABLE void updateThumbsUpState(int index, bool b)
Expand Down Expand Up @@ -278,61 +255,7 @@ class ChatModel : public QAbstractListModel
stream << c.stopped;
stream << c.thumbsUpState;
stream << c.thumbsDownState;
if (version > 7) {
stream << c.sources.size();
for (const SourceExcerpt &info : c.sources) {
Q_ASSERT(!info.file.isEmpty());
stream << info.collection;
stream << info.path;
stream << info.file;
stream << info.title;
stream << info.author;
stream << info.date;
stream << info.text;
stream << info.page;
stream << info.from;
stream << info.to;
if (version > 9) {
stream << info.url;
stream << info.favicon;
}
}
} else if (version > 2) {
QList<QString> references;
QList<QString> referencesContext;
int validReferenceNumber = 1;
for (const SourceExcerpt &info : c.sources) {
if (info.file.isEmpty())
continue;

QString reference;
{
QTextStream stream(&reference);
stream << (validReferenceNumber++) << ". ";
if (!info.title.isEmpty())
stream << "\"" << info.title << "\". ";
if (!info.author.isEmpty())
stream << "By " << info.author << ". ";
if (!info.date.isEmpty())
stream << "Date: " << info.date << ". ";
stream << "In " << info.file << ". ";
if (info.page != -1)
stream << "Page " << info.page << ". ";
if (info.from != -1) {
stream << "Lines " << info.from;
if (info.to != -1)
stream << "-" << info.to;
stream << ". ";
}
stream << "[Context](context://" << validReferenceNumber - 1 << ")";
}
references.append(reference);
referencesContext.append(info.text);
}

stream << references.join("\n");
stream << referencesContext;
}
stream << SourceExcerpt::toJson(c.sources);
}
return stream.status() == QDataStream::Ok;
}
Expand All @@ -352,31 +275,36 @@ class ChatModel : public QAbstractListModel
stream >> c.stopped;
stream >> c.thumbsUpState;
stream >> c.thumbsDownState;
if (version > 7) {
if (version > 9) {
QList<SourceExcerpt> sources;
QString json;
stream >> json;
QString errorString;
sources = SourceExcerpt::fromJson(json, errorString);
Q_ASSERT(errorString.isEmpty());
c.sources = sources;
} else if (version > 7) {
qsizetype count;
stream >> count;
QList<SourceExcerpt> sources;
for (int i = 0; i < count; ++i) {
SourceExcerpt info;
stream >> info.collection;
stream >> info.path;
stream >> info.file;
stream >> info.title;
stream >> info.author;
stream >> info.date;
stream >> info.text;
stream >> info.page;
stream >> info.from;
stream >> info.to;
if (version > 9) {
stream >> info.url;
stream >> info.favicon;
}
sources.append(info);
SourceExcerpt source;
stream >> source.collection;
stream >> source.path;
stream >> source.file;
stream >> source.title;
stream >> source.author;
stream >> source.date;
Excerpt excerpt;
stream >> excerpt.text;
stream >> excerpt.page;
stream >> excerpt.from;
stream >> excerpt.to;
source.excerpts = QList{ excerpt };
sources.append(source);
}
c.sources = sources;
c.consolidatedSources = consolidateSources(sources);
}else if (version > 2) {
} else if (version > 2) {
QString references;
QList<QString> referencesContext;
stream >> references;
Expand All @@ -398,7 +326,8 @@ class ChatModel : public QAbstractListModel
for (int j = 0; j < referenceList.size(); ++j) {
QString reference = referenceList[j];
QString context = referencesContext[j];
SourceExcerpt info;
SourceExcerpt source;
Excerpt excerpt;
QTextStream refStream(&reference);
QString dummy;
int validReferenceNumber;
Expand All @@ -407,36 +336,36 @@ class ChatModel : public QAbstractListModel
if (reference.contains("\"")) {
int startIndex = reference.indexOf('"') + 1;
int endIndex = reference.indexOf('"', startIndex);
info.title = reference.mid(startIndex, endIndex - startIndex);
source.title = reference.mid(startIndex, endIndex - startIndex);
}

// Extract author (after "By " and before the next period)
if (reference.contains("By ")) {
int startIndex = reference.indexOf("By ") + 3;
int endIndex = reference.indexOf('.', startIndex);
info.author = reference.mid(startIndex, endIndex - startIndex).trimmed();
source.author = reference.mid(startIndex, endIndex - startIndex).trimmed();
}

// Extract date (after "Date: " and before the next period)
if (reference.contains("Date: ")) {
int startIndex = reference.indexOf("Date: ") + 6;
int endIndex = reference.indexOf('.', startIndex);
info.date = reference.mid(startIndex, endIndex - startIndex).trimmed();
source.date = reference.mid(startIndex, endIndex - startIndex).trimmed();
}

// Extract file name (after "In " and before the "[Context]")
if (reference.contains("In ") && reference.contains(". [Context]")) {
int startIndex = reference.indexOf("In ") + 3;
int endIndex = reference.indexOf(". [Context]", startIndex);
info.file = reference.mid(startIndex, endIndex - startIndex).trimmed();
source.file = reference.mid(startIndex, endIndex - startIndex).trimmed();
}

// Extract page number (after "Page " and before the next space)
if (reference.contains("Page ")) {
int startIndex = reference.indexOf("Page ") + 5;
int endIndex = reference.indexOf(' ', startIndex);
if (endIndex == -1) endIndex = reference.length();
info.page = reference.mid(startIndex, endIndex - startIndex).toInt();
excerpt.page = reference.mid(startIndex, endIndex - startIndex).toInt();
}

// Extract lines (after "Lines " and before the next space or hyphen)
Expand All @@ -446,18 +375,18 @@ class ChatModel : public QAbstractListModel
if (endIndex == -1) endIndex = reference.length();
int hyphenIndex = reference.indexOf('-', startIndex);
if (hyphenIndex != -1 && hyphenIndex < endIndex) {
info.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt();
info.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt();
excerpt.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt();
excerpt.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt();
} else {
info.from = reference.mid(startIndex, endIndex - startIndex).toInt();
excerpt.from = reference.mid(startIndex, endIndex - startIndex).toInt();
}
}
info.text = context;
sources.append(info);
excerpt.text = context;
source.excerpts = QList{ excerpt };
sources.append(source);
}

c.sources = sources;
c.consolidatedSources = consolidateSources(sources);
}
}
beginInsertRows(QModelIndex(), m_chatItems.size(), m_chatItems.size());
Expand Down
18 changes: 9 additions & 9 deletions gpt4all-chat/qml/ChatView.qml
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ Rectangle {
Layout.preferredWidth: childrenRect.width
Layout.preferredHeight: childrenRect.height
visible: {
if (consolidatedSources.length === 0)
if (sources.length === 0)
return false
if (!MySettings.localDocsShowReferences)
return false
Expand Down Expand Up @@ -1134,9 +1134,9 @@ Rectangle {
sourceSize.height: 24
mipmap: true
source: {
if (typeof consolidatedSources === 'undefined'
|| typeof consolidatedSources[0] === 'undefined'
|| consolidatedSources[0].url === "")
if (typeof sources === 'undefined'
|| typeof sources[0] === 'undefined'
|| sources[0].url === "")
return "qrc:/gpt4all/icons/db.svg";
else
return "qrc:/gpt4all/icons/globe.svg";
Expand All @@ -1151,7 +1151,7 @@ Rectangle {
}

Text {
text: qsTr("%1 Sources").arg(consolidatedSources.length)
text: qsTr("%1 Sources").arg(sources.length)
padding: 0
font.pixelSize: theme.fontSizeLarge
font.bold: true
Expand Down Expand Up @@ -1199,7 +1199,7 @@ Rectangle {
Layout.column: 1
Layout.topMargin: 5
visible: {
if (consolidatedSources.length === 0)
if (sources.length === 0)
return false
if (!MySettings.localDocsShowReferences)
return false
Expand Down Expand Up @@ -1240,9 +1240,9 @@ Rectangle {
id: flow
Layout.fillWidth: true
spacing: 10
visible: consolidatedSources.length !== 0
visible: sources.length !== 0
Repeater {
model: consolidatedSources
model: sources

delegate: Rectangle {
radius: 10
Expand Down Expand Up @@ -1361,7 +1361,7 @@ Rectangle {
return false;
if (MySettings.suggestionMode === 2) // Off
return false;
if (MySettings.suggestionMode === 0 && consolidatedSources.length === 0) // LocalDocs only
if (MySettings.suggestionMode === 0 && sources.length === 0) // LocalDocs only
return false;
return currentChat.responseState === Chat.GeneratingQuestions || currentChat.generatedQuestions.length !== 0;
}
Expand Down
16 changes: 4 additions & 12 deletions gpt4all-chat/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,12 +408,8 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
message.insert("role", "assistant");
message.insert("content", result);
choice.insert("message", message);
if (MySettings::globalInstance()->localDocsShowReferences()) {
QJsonArray references;
for (const auto &ref : infos)
references.append(ref.toJson());
choice.insert("references", references);
}
if (MySettings::globalInstance()->localDocsShowReferences())
choice.insert("references", SourceExcerpt::toJson(infos));
choices.append(choice);
}
} else {
Expand All @@ -426,12 +422,8 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
choice.insert("index", index++);
choice.insert("logprobs", QJsonValue::Null); // We don't support
choice.insert("finish_reason", responseTokens == max_tokens ? "length" : "stop");
if (MySettings::globalInstance()->localDocsShowReferences()) {
QJsonArray references;
for (const auto &ref : infos)
references.append(ref.toJson());
choice.insert("references", references);
}
if (MySettings::globalInstance()->localDocsShowReferences())
choice.insert("references", SourceExcerpt::toJson(infos));
choices.append(choice);
}
}
Expand Down
Loading

0 comments on commit 7cbff80

Please sign in to comment.