Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chains: simplesequentialchain and sequentialchain implementation #183

Merged
merged 3 commits into from
Jul 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions chains/chains_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,47 @@ import (
"fmt"
"sync"
"testing"
"time"

"github.com/stretchr/testify/require"
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/prompts"
"github.com/tmc/langchaingo/schema"
)

// testLanguageModel is a struct that implement the language model interface
// and returns the prompt value as a string.
type testLanguageModel struct{}
type testLanguageModel struct {
// expected result of the language model
expResult string
// simulate work by sleeping for this duration
simulateWork time.Duration
// record the prompt that was passed to the language model
recordedPrompt []schema.PromptValue
}

func (l *testLanguageModel) GeneratePrompt(_ context.Context, promptValue []schema.PromptValue, _ ...llms.CallOption) (llms.LLMResult, error) { //nolint:lll
l.recordedPrompt = promptValue
if l.simulateWork > 0 {
time.Sleep(l.simulateWork)
}

func (l testLanguageModel) GeneratePrompt(_ context.Context, promptValue []schema.PromptValue, _ ...llms.CallOption) (llms.LLMResult, error) { //nolint:lll
var llmResult string
if l.expResult != "" {
llmResult = l.expResult
} else {
llmResult = promptValue[0].String()
}
return llms.LLMResult{
Generations: [][]*llms.Generation{{&llms.Generation{
Text: promptValue[0].String(),
Text: llmResult,
}}},
}, nil
}

func (l testLanguageModel) GetNumTokens(text string) int {
func (l *testLanguageModel) GetNumTokens(text string) int {
return len(text)
}

var _ llms.LanguageModel = testLanguageModel{}
var _ llms.LanguageModel = &testLanguageModel{}

func TestApply(t *testing.T) {
t.Parallel()
Expand All @@ -42,7 +59,7 @@ func TestApply(t *testing.T) {
}
}

c := NewLLMChain(testLanguageModel{}, prompts.NewPromptTemplate("{{.text}}", []string{"text"}))
c := NewLLMChain(&testLanguageModel{}, prompts.NewPromptTemplate("{{.text}}", []string{"text"}))
results, err := Apply(context.Background(), c, inputs, maxWorkers)
require.NoError(t, err)
require.Equal(t, inputs, results, "inputs and results not equal")
Expand All @@ -57,7 +74,7 @@ func TestApplyWithCanceledContext(t *testing.T) {
ctx, cancelFunc := context.WithCancel(context.Background())
wg := sync.WaitGroup{}
wg.Add(1)
c := NewLLMChain(testLanguageModel{}, prompts.NewPromptTemplate("test", nil))
c := NewLLMChain(&testLanguageModel{simulateWork: time.Second}, prompts.NewPromptTemplate("test", nil))

go func() {
defer wg.Done()
Expand Down
2 changes: 2 additions & 0 deletions chains/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,6 @@ var (
// ErrMultipleOutputsInPredict is returned if a chain has multiple return values
// in predict.
ErrMultipleOutputsInPredict = errors.New("predict is not supported with a chain that returns multiple values")
// ErrChainNotInitialized is returned if a chain is not initialized appropriately.
ErrChainInitialization = errors.New("error initializing chain")
)
2 changes: 1 addition & 1 deletion chains/llm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func TestLLMChainWithChatPromptTemplate(t *testing.T) {
t.Parallel()

c := NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewChatPromptTemplate([]prompts.MessageFormatter{
prompts.NewAIMessagePromptTemplate("{{.foo}}", []string{"foo"}),
prompts.NewHumanMessagePromptTemplate("{{.boo}}", []string{"boo"}),
Expand Down
8 changes: 4 additions & 4 deletions chains/map_reduce_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ func TestMapReduceInputVariables(t *testing.T) {

c := MapReduceDocuments{
LLMChain: NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewPromptTemplate("{{.text}} {{.foo}}", []string{"text", "foo"}),
),
ReduceChain: NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewPromptTemplate("{{.texts}} {{.baz}}", []string{"texts", "baz"}),
),
ReduceDocumentVariableName: "texts",
Expand All @@ -36,12 +36,12 @@ func TestMapReduce(t *testing.T) {

c := NewMapReduceDocuments(
NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewPromptTemplate("{{.context}}", []string{"context"}),
),
NewStuffDocuments(
NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewPromptTemplate("{{.context}}", []string{"context"}),
),
),
Expand Down
4 changes: 2 additions & 2 deletions chains/map_rerank_documents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ func TestMapRerankInputVariables(t *testing.T) {
t.Parallel()

mapRerankLLMChain := NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewPromptTemplate("{{.text}} {{.foo}}", []string{"text", "foo"}),
)

Expand All @@ -33,7 +33,7 @@ func TestMapRerankDocumentsCall(t *testing.T) {
t.Parallel()

mapRerankLLMChain := NewLLMChain(
testLanguageModel{},
&testLanguageModel{},
prompts.NewPromptTemplate("{{.context}}", []string{"context"}),
)

Expand Down
178 changes: 178 additions & 0 deletions chains/sequential.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
package chains

import (
"context"
"errors"
"fmt"

"github.com/tmc/langchaingo/internal/util"
"github.com/tmc/langchaingo/memory"
"github.com/tmc/langchaingo/schema"
)

// SequentialChain is a chain that runs multiple chains in sequence,
// where the output of one chain is the input of the next.
type SequentialChain struct {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe export the memory field?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I planned to add memory support on another PR to keep this one scoped. There are some additional validations that need to occur when using memory e.g. validating there are no clashes with the memory keys and the input keys to the SequentialChain.
It might be simple enough to add on this PR - can do that if you think it's worth doing.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, didn't think about the extra validations. This PR is already kind of large so you can add it a second if you think that is best.

chains []Chain
inputKeys []string
outputKeys []string
memory schema.Memory
}

func NewSequentialChain(chains []Chain, inputKeys []string, outputKeys []string) (*SequentialChain, error) {
if err := validateSeqChain(chains, inputKeys, outputKeys); err != nil {
return nil, err
}

return &SequentialChain{
chains: chains,
inputKeys: inputKeys,
outputKeys: outputKeys,
memory: memory.NewSimple(),
}, nil
}

func validateSeqChain(chain []Chain, inputKeys []string, outputKeys []string) error {
knownKeys := util.ToSet(inputKeys)

for i, c := range chain {
// Check that chain has input keys that are in knownKeys
missingKeys := util.Difference(c.GetInputKeys(), knownKeys)
if len(missingKeys) > 0 {
return fmt.Errorf(
"%w: chain at index %d is missing required input keys: %v",
ErrChainInitialization, i, missingKeys,
)
}

// Check that chain does not have output keys that are already in knownKeys
overlappingKeys := util.Intersection(c.GetOutputKeys(), knownKeys)
if len(overlappingKeys) > 0 {
return fmt.Errorf(
"%w: chain at index %d has output keys that already exist: %v",
ErrChainInitialization, i, overlappingKeys,
)
}

// Add the chain's output keys to knownKeys
for _, key := range c.GetOutputKeys() {
knownKeys[key] = struct{}{}
}
}

// Check that outputKeys are in knownKeys
for _, key := range outputKeys {
if _, ok := knownKeys[key]; !ok {
return fmt.Errorf("%w: output key %s is not in the known keys", ErrChainInitialization, key)
}
}

return nil
}

// Call runs the logic of the chains and returns the outputs. This method should
// not be called directly. Use rather the Call, Run or Predict functions that
// handles the memory and other aspects of the chain.
func (c *SequentialChain) Call(ctx context.Context, inputs map[string]any, options ...ChainCallOption) (map[string]any, error) { //nolint:lll
var outputs map[string]any
var err error
for _, chain := range c.chains {
outputs, err = Call(ctx, chain, inputs, options...)
if err != nil {
return nil, err
}
// Set the input for the next chain to the output of the current chain
inputs = outputs
}
return outputs, nil
}

// GetMemory gets the memory of the chain.
func (c *SequentialChain) GetMemory() schema.Memory {
return c.memory
}

// InputKeys returns the input keys the chain expects.
func (c *SequentialChain) GetInputKeys() []string {
return c.inputKeys
}

// OutputKeys returns the output keys the chain returns.
func (c *SequentialChain) GetOutputKeys() []string {
return c.outputKeys
}

const (
input = "input"
output = "output"
)

var (
ErrInvalidInputNumberInSimpleSeq = errors.New("single input expected for chains supplied to SimpleSequentialChain")
ErrInvalidOutputNumberInSimpleSeq = errors.New("single output expected for chains supplied to SimpleSequentialChain")
)

// SimpleSequentialChain is a chain that runs multiple chains in sequence,
// where the output of one chain is the input of the next.
// All the chains must have a single input and a single output.
type SimpleSequentialChain struct {
chains []Chain
memory schema.Memory
}

func NewSimpleSequentialChain(chains []Chain) (*SimpleSequentialChain, error) {
if err := validateSimpleSeq(chains); err != nil {
return nil, err
}

return &SimpleSequentialChain{chains: chains, memory: memory.NewSimple()}, nil
}

func validateSimpleSeq(chains []Chain) error {
for i, chain := range chains {
if len(chain.GetInputKeys()) != 1 {
return fmt.Errorf(
"%w: chain at index [%d] has input keys: %v",
ErrInvalidInputNumberInSimpleSeq, i, chain.GetInputKeys(),
)
}

if len(chain.GetOutputKeys()) != 1 {
return fmt.Errorf(
"%w: chain at index [%d] has output keys: %v",
ErrInvalidOutputNumberInSimpleSeq, i, chain.GetOutputKeys(),
)
}
}
return nil
}

// Call runs the logic of the chains and returns the output.
// This method should not be called directly.
// Use the Run function that handles the memory and other aspects of the chain.
func (c *SimpleSequentialChain) Call(ctx context.Context, inputs map[string]any, options ...ChainCallOption) (map[string]any, error) { //nolint:lll
input := inputs[input]
for _, chain := range c.chains {
var err error
input, err = Run(ctx, chain, input, options...)
if err != nil {
return nil, err
}
}
return map[string]any{output: input}, nil
}

// GetMemory gets the memory of the chain.
func (c *SimpleSequentialChain) GetMemory() schema.Memory {
return c.memory
}

// InputKeys returns the input keys of the first chain.
func (c *SimpleSequentialChain) GetInputKeys() []string {
return []string{input}
}

// OutputKeys returns the output keys of the last chain.
func (c *SimpleSequentialChain) GetOutputKeys() []string {
return []string{output}
}
Loading
Loading