Skip to content

Commit db58003

Browse files
authored
chat: fix localdocs breakage in v3.5.2 (#3302)
Signed-off-by: Jared Van Bortel <[email protected]>
1 parent 38d92cb commit db58003

File tree

6 files changed

+37
-23
lines changed

6 files changed

+37
-23
lines changed

gpt4all-chat/CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
66

7+
## [Unreleased]
8+
9+
### Fixed
10+
- Fix LocalDocs not using information from sources in v3.5.2 ([#3302](https://github.com/nomic-ai/gpt4all/pull/3302))
11+
712
## [3.5.2] - 2024-12-13
813

914
### Added
@@ -223,6 +228,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
223228
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
224229
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
225230

231+
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.5.2...HEAD
226232
[3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2
227233
[3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
228234
[3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0

gpt4all-chat/CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ include(../common/common.cmake)
44

55
set(APP_VERSION_MAJOR 3)
66
set(APP_VERSION_MINOR 5)
7-
set(APP_VERSION_PATCH 2)
7+
set(APP_VERSION_PATCH 3)
88
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
9-
set(APP_VERSION "${APP_VERSION_BASE}")
9+
set(APP_VERSION "${APP_VERSION_BASE}-dev0")
1010

1111
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
1212

gpt4all-chat/src/chatllm.cpp

+21-17
Original file line numberDiff line numberDiff line change
@@ -852,32 +852,29 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const ChatItem> items) const
852852
}
853853

854854
auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
855-
std::optional<QList<ChatItem>> chat) -> ChatPromptResult
855+
std::optional<std::pair<int, int>> subrange) -> ChatPromptResult
856856
{
857857
Q_ASSERT(isModelLoaded());
858858
Q_ASSERT(m_chatModel);
859859

860+
// Return a (ChatModelAccessor, std::span) pair where the span represents the relevant messages for this chat.
861+
// "subrange" is used to select only local server messages from the current chat session.
862+
auto getChat = [&]() {
863+
auto items = m_chatModel->chatItems(); // holds lock
864+
std::span view(items);
865+
if (subrange)
866+
view = view.subspan(subrange->first, subrange->second);
867+
Q_ASSERT(view.size() >= 2);
868+
return std::pair(std::move(items), view);
869+
};
870+
860871
// copy messages for safety (since we can't hold the lock the whole time)
861872
std::optional<std::pair<int, QString>> query;
862-
std::vector<ChatItem> chatItems;
863873
{
864-
std::optional<ChatModelAccessor> items;
865-
std::span<const ChatItem> view;
866-
if (chat) {
867-
view = *chat;
868-
} else {
869-
items = m_chatModel->chatItems(); // holds lock
870-
Q_ASSERT(!items->empty());
871-
view = *items;
872-
}
873-
Q_ASSERT(view.size() >= 2); // should be prompt/response pairs
874-
875874
// Find the prompt that represents the query. Server chats are flexible and may not have one.
876-
auto response = view.end() - 1;
877-
if (auto peer = m_chatModel->getPeer(view, response))
875+
auto [_, view] = getChat(); // holds lock
876+
if (auto peer = m_chatModel->getPeer(view, view.end() - 1)) // peer of response
878877
query = { *peer - view.begin(), (*peer)->value };
879-
880-
chatItems.assign(view.begin(), view.end() - 1); // exclude last
881878
}
882879

883880
QList<ResultInfo> databaseResults;
@@ -889,6 +886,13 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL
889886
emit databaseResultsChanged(databaseResults);
890887
}
891888

889+
// copy messages for safety (since we can't hold the lock the whole time)
890+
std::vector<ChatItem> chatItems;
891+
{
892+
auto [_, view] = getChat(); // holds lock
893+
chatItems.assign(view.begin(), view.end() - 1); // exclude new response
894+
}
895+
892896
auto result = promptInternal(chatItems, ctx, !databaseResults.isEmpty());
893897
return {
894898
/*PromptResult*/ {

gpt4all-chat/src/chatllm.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ public Q_SLOTS:
251251
};
252252

253253
ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
254-
std::optional<QList<ChatItem>> chat = {});
254+
std::optional<std::pair<int, int>> subrange = {});
255255
// passing a string_view directly skips templating and uses the raw string
256256
PromptResult promptInternal(const std::variant<std::span<const ChatItem>, std::string_view> &prompt,
257257
const LLModel::PromptContext &ctx,

gpt4all-chat/src/chatmodel.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ class ChatModel : public QAbstractListModel
362362

363363
// Used by Server to append a new conversation to the chat log.
364364
// Appends a new, blank response to the end of the input list.
365-
void appendResponseWithHistory(QList<ChatItem> &history)
365+
// Returns an (offset, count) pair representing the indices of the appended items, including the new response.
366+
std::pair<int, int> appendResponseWithHistory(QList<ChatItem> &history)
366367
{
367368
if (history.empty())
368369
throw std::invalid_argument("at least one message is required");
@@ -378,9 +379,11 @@ class ChatModel : public QAbstractListModel
378379
beginInsertRows(QModelIndex(), startIndex, endIndex - 1 /*inclusive*/);
379380
bool hadError;
380381
QList<ChatItem> newItems;
382+
std::pair<int, int> subrange;
381383
{
382384
QMutexLocker locker(&m_mutex);
383385
hadError = hasErrorUnlocked();
386+
subrange = { m_chatItems.size(), history.size() };
384387
m_chatItems.reserve(m_chatItems.size() + history.size());
385388
for (auto &item : history)
386389
m_chatItems << item;
@@ -390,6 +393,7 @@ class ChatModel : public QAbstractListModel
390393
// Server can add messages when there is an error because each call is a new conversation
391394
if (hadError)
392395
emit hasErrorChanged(false);
396+
return subrange;
393397
}
394398

395399
void truncate(qsizetype size)

gpt4all-chat/src/server.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
781781
case Assistant: chatItems.emplace_back(ChatItem::response_tag, message.content); break;
782782
}
783783
}
784-
m_chatModel->appendResponseWithHistory(chatItems);
784+
auto subrange = m_chatModel->appendResponseWithHistory(chatItems);
785785

786786
// FIXME(jared): taking parameters from the UI inhibits reproducibility of results
787787
LLModel::PromptContext promptCtx {
@@ -801,7 +801,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
801801
for (int i = 0; i < request.n; ++i) {
802802
ChatPromptResult result;
803803
try {
804-
result = promptInternalChat(m_collections, promptCtx, chatItems);
804+
result = promptInternalChat(m_collections, promptCtx, subrange);
805805
} catch (const std::exception &e) {
806806
emit responseChanged(e.what());
807807
emit responseStopped(0);

0 commit comments

Comments
 (0)