symflower · bauersimon · Oct 14, 2024 · Oct 18, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -21,9 +21,8 @@
 			"request": "launch",
 			"mode": "auto",
 			"program": "cmd/eval-dev-quality",
-			"args": [
-				"${input:args}",
-			]
+			"args": "${input:args}",
+			"cwd": "${workspaceFolder}"
 		},
 	],
 	"inputs": [
@@ -54,7 +53,7 @@
 			"command": "memento.promptString",
 			"args": {
 				"id": "args",
-				"description": "Arguments? (Make sure to use absolute paths!)",
+				"description": "Arguments?",
 				"default": "",
 			},
 		},

diff --git a/README.md b/README.md
@@ -226,6 +226,45 @@ Each repository can contain a configuration file `repository.json` in its root d
 
 For the evaluation of the repository only the specified tasks are executed. If no `repository.json` file exists, all tasks are executed.
 
+Depending on the task, it can be beneficial to exclude parts of the repository from explicit evaluation. To give a concrete example: Spring controller tests can never be executed on their own but need a supporting [`Application` class](https://docs.spring.io/spring-boot/reference/testing/spring-boot-applications.html#testing.spring-boot-applications.using-main). But [such a file](testdata/java/spring-plain/src/main/java/com/example/Application.java) should never be used itself to prompt models for tests. Therefore, it can be excluded through the `repository.json` configuration:
+
+```json
+{
+  "tasks": ["write-tests"],
+  "ignore": ["src/main/java/com/example/Application.java"]
+}
+```
+
+This `ignore` setting is currently only respected for the test generation task `write-tests`.
+
+It is possible to configure some model prompt parameters through `repository.json`:
+
+```json
+{
+  "tasks": ["write-tests"],
+  "prompt": {
+    "test-framework": "JUnit 5 for Spring Boot" // Overwrite the default test framework in the prompt.
+  }
+}
+```
+
+This `prompt.test-framework` setting is currently only respected for the test generation task `write-tests`.
+
+When task results are validated, some repositories might require custom logic. For example: generating tests for a Spring Boot project requires ensuring that the tests used an actual Spring context (i.e. Spring Boot was initialized when the tests were executed). Therefore, the `repository.json` supports adding rudimentary custom validation:
+
+```json
+{
+  "tasks": ["write-tests"],
+  "validation": {
+    "execution": {
+      "stdout": "Initializing Spring" // Ensure the string "Initializing Spring" is contained in the execution output.
+    }
+  }
+}
+```
+
+This `validation.execution.stdout` setting is currently only respected for the test generation task `write-tests`.
+
 ## Tasks
 
 ### Task: Test Generation

diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -36,6 +36,7 @@ import (
 	openaiapi "github.com/symflower/eval-dev-quality/provider/openai-api"
 	_ "github.com/symflower/eval-dev-quality/provider/openrouter" // Register provider.
 	_ "github.com/symflower/eval-dev-quality/provider/symflower"  // Register provider.
+	"github.com/symflower/eval-dev-quality/task"
 	"github.com/symflower/eval-dev-quality/tools"
 	"github.com/symflower/eval-dev-quality/util"
 )
@@ -317,7 +318,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
 				command.logger.Panicf("ERROR: %s", err)
 			}
 			for _, r := range repositories {
-				config, err := evaltask.LoadRepositoryConfiguration(filepath.Join(command.TestdataPath, r))
+				config, err := task.LoadRepositoryConfiguration(filepath.Join(command.TestdataPath, r), evaltask.AllIdentifiers)
 				if err != nil {
 					command.logger.Panicf("ERROR: %s", err)
 				}

diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go
@@ -3,7 +3,6 @@ package evaluate
 import (
 	"os"
 	"path/filepath"
-	"strings"
 
 	"github.com/symflower/eval-dev-quality/evaluate/metrics"
 	"github.com/symflower/eval-dev-quality/evaluate/report"
@@ -134,7 +133,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore) {
 						r.SetQueryAttempts(ctx.QueryAttempts)
 					}
 
-					for _, taskIdentifier := range temporaryRepository.SupportedTasks() {
+					for _, taskIdentifier := range temporaryRepository.Configuration().Tasks {
 						task, err := evaluatetask.ForIdentifier(taskIdentifier)
 						if err != nil {
 							logger.Fatal(err)
@@ -207,7 +206,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore) {
 		}
 		for _, repositoryPath := range relativeRepositoryPaths {
 			// Do not include "plain" repositories in this step of the evaluation, because they have been checked with the common check before.
-			if !repositoriesLookup[repositoryPath] || strings.HasSuffix(repositoryPath, RepositoryPlainName) {
+			if !repositoriesLookup[repositoryPath] || filepath.Base(repositoryPath) == RepositoryPlainName {
 				continue
 			}
 

diff --git a/evaluate/task/repository.go b/evaluate/task/repository.go
@@ -2,7 +2,6 @@ package task
 
 import (
 	"context"
-	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
@@ -18,55 +17,9 @@ import (
 	"github.com/symflower/eval-dev-quality/util"
 )
 
-// RepositoryConfiguration holds the configuration of a repository.
-type RepositoryConfiguration struct {
-	Tasks []task.Identifier
-}
-
-// LoadRepositoryConfiguration loads a repository configuration from the given path.
-func LoadRepositoryConfiguration(path string) (config *RepositoryConfiguration, err error) {
-	if osutil.FileExists(path) != nil { // If we don't get a valid file, assume it is a repository directory and target the default configuration file name.
-		path = filepath.Join(path, RepositoryConfigurationFileName)
-	}
-
-	data, err := os.ReadFile(path)
-	if errors.Is(err, os.ErrNotExist) {
-		// Set default configuration.
-		return &RepositoryConfiguration{
-			Tasks: AllIdentifiers,
-		}, nil
-	} else if err != nil {
-		return nil, pkgerrors.Wrap(err, path)
-	}
-
-	config = &RepositoryConfiguration{}
-	if err := json.Unmarshal(data, &config); err != nil {
-		return nil, pkgerrors.Wrap(err, path)
-	} else if err := config.validate(); err != nil {
-		return nil, err
-	}
-
-	return config, nil
-}
-
-// validate validates the configuration.
-func (rc *RepositoryConfiguration) validate() (err error) {
-	if len(rc.Tasks) == 0 {
-		return pkgerrors.Errorf("empty list of tasks in configuration")
-	}
-
-	for _, taskIdentifier := range rc.Tasks {
-		if !LookupIdentifier[taskIdentifier] {
-			return pkgerrors.Errorf("task identifier %q unknown", taskIdentifier)
-		}
-	}
-
-	return nil
-}
-
 // Repository holds data about a repository.
 type Repository struct {
-	RepositoryConfiguration
+	task.RepositoryConfiguration
 
 	// name holds the name of the repository.
 	name string
@@ -76,14 +29,11 @@ type Repository struct {
 
 var _ task.Repository = (*Repository)(nil)
 
-// RepositoryConfigurationFileName holds the file name for a repository configuration.
-const RepositoryConfigurationFileName = "repository.json"
-
 // loadConfiguration loads the configuration from the dedicated configuration file.
 func (r *Repository) loadConfiguration() (err error) {
-	configurationFilePath := filepath.Join(r.dataPath, RepositoryConfigurationFileName)
+	configurationFilePath := filepath.Join(r.dataPath, task.RepositoryConfigurationFileName)
 
-	configuration, err := LoadRepositoryConfiguration(configurationFilePath)
+	configuration, err := task.LoadRepositoryConfiguration(configurationFilePath, AllIdentifiers)
 	if err != nil {
 		return err
 	}
@@ -103,14 +53,9 @@ func (r *Repository) DataPath() (dataPath string) {
 	return r.dataPath
 }
 
-// SupportedTasks returns the list of task identifiers the repository supports.
-func (r *Repository) SupportedTasks() (tasks []task.Identifier) {
-	return r.Tasks
-}
-
 // Validate checks it the repository is well-formed.
 func (r *Repository) Validate(logger *log.Logger, language language.Language) (err error) {
-	for _, taskIdentifier := range r.SupportedTasks() {
+	for _, taskIdentifier := range r.RepositoryConfiguration.Tasks {
 		switch taskIdentifier {
 		case IdentifierCodeRepair:
 			return validateCodeRepairRepository(logger, r.DataPath(), language)
@@ -163,6 +108,11 @@ func (r *Repository) Reset(logger *log.Logger) (err error) {
 	return nil
 }
 
+// Configuration returns the configuration of a repository.
+func (r *Repository) Configuration() *task.RepositoryConfiguration {
+	return &r.RepositoryConfiguration
+}
+
 // TemporaryRepository creates a temporary repository and initializes a git repo in it.
 func TemporaryRepository(logger *log.Logger, testDataPath string, repositoryPathRelative string) (repository *Repository, cleanup func(), err error) {
 	repositoryPathAbsolute := filepath.Join(testDataPath, repositoryPathRelative)

diff --git a/evaluate/task/repository_test.go b/evaluate/task/repository_test.go
@@ -218,3 +218,55 @@ func TestRepositoryLoadConfiguration(t *testing.T) {
 		},
 	})
 }
+
+func TestRepositoryConfigurationIsFilePathIgnored(t *testing.T) {
+	type testCase struct {
+		Name string
+
+		IgnoredPaths []string
+		FilePath     string
+
+		ExpectedBool bool
+	}
+
+	validate := func(t *testing.T, tc *testCase) {
+		t.Run(tc.Name, func(t *testing.T) {
+			actualBool := (&task.RepositoryConfiguration{
+				IgnorePaths: tc.IgnoredPaths,
+			}).IsFilePathIgnored(tc.FilePath)
+
+			assert.Equal(t, tc.ExpectedBool, actualBool)
+		})
+	}
+
+	validate(t, &testCase{
+		Name: "Exact Match",
+
+		IgnoredPaths: []string{
+			"foo/bar.txt",
+		},
+		FilePath: "foo/bar.txt",
+
+		ExpectedBool: true,
+	})
+	validate(t, &testCase{
+		Name: "No Match",
+
+		IgnoredPaths: []string{
+			"foo/bar.txt",
+		},
+		FilePath: "foo/baz.txt",
+
+		ExpectedBool: false,
+	})
+	validate(t, &testCase{
+		Name: "Folder",
+
+		IgnoredPaths: []string{
+			"foo",
+		},
+		FilePath: "foo/bar.txt",
+
+		ExpectedBool: true,
+	})
+}
diff --git a/evaluate/task/symflower.go b/evaluate/task/symflower.go
@@ -41,6 +41,7 @@ func symflowerTemplate(logger *log.Logger, repositoryPath string, language langu
 			"--language", language.ID(),
 			"--workspace", repositoryPath,
 			"--test-style", "basic",
+			"--code-disable-fetch-dependencies",
 			filePath,
 		},
 

diff --git a/evaluate/task/task.go b/evaluate/task/task.go
@@ -10,24 +10,22 @@ import (
 	"github.com/symflower/eval-dev-quality/language"
 	"github.com/symflower/eval-dev-quality/log"
 	evaltask "github.com/symflower/eval-dev-quality/task"
+	"github.com/symflower/eval-dev-quality/util"
 )
 
 var (
 	// AllIdentifiers holds all available task identifiers.
 	AllIdentifiers []evaltask.Identifier
-	// LookupIdentifier holds a map of all available task identifiers.
-	LookupIdentifier = map[evaltask.Identifier]bool{}
 )
 
 // registerIdentifier registers the given identifier and makes it available.
 func registerIdentifier(name string) (identifier evaltask.Identifier) {
-	identifier = evaltask.Identifier(name)
-	AllIdentifiers = append(AllIdentifiers, identifier)
-
-	if _, ok := LookupIdentifier[identifier]; ok {
+	if _, ok := util.Set(AllIdentifiers)[identifier]; ok {
 		panic(fmt.Sprintf("task identifier already registered: %s", identifier))
 	}
-	LookupIdentifier[identifier] = true
+
+	identifier = evaltask.Identifier(name)
+	AllIdentifiers = append(AllIdentifiers, identifier)
 
 	return identifier
 }