Skip to content

Commit c852b1d

Browse files
author
Adam Holm
committed
Update namespaces, add YAML eval methods, and docs
Updated namespaces to HillPhelmuth.SemanticKernel.LlmAsJudgeEvals across multiple files. Added new methods in EvalService.cs for handling evaluation functions from YAML. Updated dependencies in PromptFlowEvalsAsPlugins.csproj to version 1.18.2. Added a new Readme.md with detailed documentation.
1 parent 0906c3b commit c852b1d

File tree

14 files changed

+127
-45
lines changed

14 files changed

+127
-45
lines changed

Demo/PromptFlowEvalsAsPlugins.Demo/Components/EvalInputs/EvalDisplay.razor

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
<RadzenDataGrid @ref=_grid Data="EvalResultDisplays" TItem="EvalResultDisplay" AllowSorting=true AllowFiltering="true" FilterCaseSensitivity="FilterCaseSensitivity.CaseInsensitive" Density="Density.Compact" AllowPaging=true PageSize="12">
1+
@using HillPhelmuth.SemanticKernel.LlmAsJudgeEvals
2+
<RadzenDataGrid @ref=_grid Data="EvalResultDisplays" TItem="EvalResultDisplay" AllowSorting=true AllowFiltering="true" FilterCaseSensitivity="FilterCaseSensitivity.CaseInsensitive" Density="Density.Compact" AllowPaging=true PageSize="12">
23
<Template Context="display">
34
<RadzenTabs>
45
<Tabs>

Demo/PromptFlowEvalsAsPlugins.Demo/Components/EvalInputs/QnAGenerator.razor.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System;
33
using System.Collections.Generic;
44
using System.Linq;
5+
using HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
56

67
namespace PromptFlowEvalsAsPlugins.Demo.Components.EvalInputs;
78

Demo/PromptFlowEvalsAsPlugins.Demo/Components/EvalsRag/AddRagContent.razor.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System;
33
using System.Collections.Generic;
44
using System.Linq;
5+
using HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
56

67
namespace PromptFlowEvalsAsPlugins.Demo.Components.EvalsRag;
78

Demo/PromptFlowEvalsAsPlugins.Demo/EvalManager.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.Text;
22
using System.Text.Json;
3+
using HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
34
using Microsoft.SemanticKernel;
45
using Microsoft.SemanticKernel.ChatCompletion;
56
using Microsoft.SemanticKernel.Connectors.OpenAI;

PromptFlowEvalsAsPlugins/EvalService.cs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
using Microsoft.Extensions.DependencyInjection;
1+
using System.Text.Json;
2+
using System.Text.Json.Serialization;
3+
using Microsoft.Extensions.DependencyInjection;
24
using Microsoft.SemanticKernel;
35
using Microsoft.SemanticKernel.ChatCompletion;
46
using Microsoft.SemanticKernel.Connectors.OpenAI;
57
using Microsoft.SemanticKernel.TextGeneration;
6-
using System.Reflection;
7-
using System.Text.Json.Serialization;
8-
using System.Text.Json;
9-
using Azure.AI.OpenAI;
8+
using OpenAI.Chat;
109

11-
namespace PromptFlowEvalsAsPlugins;
10+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
1211

1312
public class EvalService
1413
{
@@ -33,7 +32,19 @@ public void AddEvalFunction(string name, KernelFunction function, bool overrideE
3332
else
3433
EvalFunctions.TryAdd(name, function);
3534
}
36-
public async Task<ResultScore> ExecuteEval(IInputModel inputModel)
35+
public void AddEvalFunctionFromYaml(Stream yamlStream, string name, bool overrideExisting = false)
36+
{
37+
var yamlText = new StreamReader(yamlStream).ReadToEnd();
38+
var function = _kernel.CreateFunctionFromPromptYaml(yamlText);
39+
AddEvalFunction(name, function, overrideExisting);
40+
}
41+
42+
public void AddEvalFunctionFromYaml(string yamlText, string name, bool overrideExisting = false)
43+
{
44+
var function = _kernel.CreateFunctionFromPromptYaml(yamlText);
45+
AddEvalFunction(name, function, overrideExisting);
46+
}
47+
public async Task<ResultScore> ExecuteEval(IInputModel inputModel)
3748
{
3849
var currentKernel = _kernel.Clone();
3950
if (currentKernel.Services.GetService<IChatCompletionService>() is null && currentKernel.Services.GetService<ITextGenerationService>() is null)
@@ -51,11 +62,11 @@ public async Task<ResultScore> ExecuteEval(IInputModel inputModel)
5162
Logprobs = true,
5263
TopLogprobs = 5
5364
};
54-
65+
5566
var kernelArgs = new KernelArguments(inputModel.RequiredInputs, new Dictionary<string, PromptExecutionSettings> { { "default", settings } });
5667
var result = await currentKernel.InvokeAsync(evalPlugin[inputModel.FunctionName], kernelArgs);
57-
var logProbs = result.Metadata?["LogProbabilityInfo"] as ChatChoiceLogProbabilityInfo;
58-
var tokenStrings = logProbs.TokenLogProbabilityResults.AsTokenStrings()[0];
68+
var logProbs = result.Metadata?["ContentTokenLogProbabilities"] as IReadOnlyList<ChatTokenLogProbabilityInfo>;
69+
var tokenStrings = logProbs.AsTokenStrings()[0];
5970
return new ResultScore(inputModel.FunctionName, tokenStrings);
6071
}
6172

@@ -90,8 +101,8 @@ public async Task<ResultScore> ExecuteScorePlusEval(IInputModel inputModel)
90101
};
91102
var finalArgs = new KernelArguments(inputModel.RequiredInputs, new Dictionary<string, PromptExecutionSettings> { { PromptExecutionSettings.DefaultServiceId, settings } });
92103
var result = await kernel.InvokeAsync(evalPlugin[inputModel.FunctionName], finalArgs);
93-
var logProbs = result.Metadata?["LogProbabilityInfo"] as ChatChoiceLogProbabilityInfo;
94-
var tokenStrings = logProbs.TokenLogProbabilityResults.AsTokenStrings();
104+
var logProbs = result.Metadata?["LogProbabilityInfo"] as IReadOnlyList<ChatTokenLogProbabilityInfo>;
105+
var tokenStrings = logProbs?.AsTokenStrings();
95106
var scoreResult = result.GetTypedResult<ScorePlusResponse>();
96107
return new ResultScore(inputModel.FunctionName, scoreResult, tokenStrings);
97108
}

PromptFlowEvalsAsPlugins/EvalType.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
namespace PromptFlowEvalsAsPlugins;
1+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
22

33
/// <summary>
44
/// Represents the evaluation type.

PromptFlowEvalsAsPlugins/Extentions.cs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Text.Json;
6-
using System.Threading.Tasks;
1+
using System.Text.Json;
72
using Microsoft.SemanticKernel;
8-
using static PromptFlowEvalsAsPlugins.Helpers;
3+
using static HillPhelmuth.SemanticKernel.LlmAsJudgeEvals.Helpers;
94

10-
namespace PromptFlowEvalsAsPlugins;
5+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
116

127
public static class Extentions
138
{

PromptFlowEvalsAsPlugins/Helpers.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
using Microsoft.SemanticKernel;
2-
using System.Reflection;
1+
using System.Reflection;
32
using System.Text.Json;
3+
using Microsoft.SemanticKernel;
44

5-
namespace PromptFlowEvalsAsPlugins;
5+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
66

77
public static class Helpers
88
{

PromptFlowEvalsAsPlugins/IInputModel.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using Microsoft.SemanticKernel;
22

3-
namespace PromptFlowEvalsAsPlugins;
3+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
44

55
public interface IInputModel
66
{

PromptFlowEvalsAsPlugins/InputModels.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
using Microsoft.SemanticKernel;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
5-
using System.Text;
6-
using System.Threading.Tasks;
72

8-
namespace PromptFlowEvalsAsPlugins;
3+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
94

105
/// <summary>
116
/// Represents an input model for evaluation.

PromptFlowEvalsAsPlugins/PromptFlowEvalsAsPlugins.csproj

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,21 @@
55
<ImplicitUsings>enable</ImplicitUsings>
66
<Nullable>enable</Nullable>
77
<NoWarn>SKEXP0010</NoWarn>
8-
9-
<Title>PromptFlow Eval Flows or Custom Evals as SemanticKernel Plugins</Title>
8+
<RootNamespace>HillPhelmuth.SemanticKernel.LlmAsJudgeEvals</RootNamespace>
9+
<Title>Run "LLM as Judge" Evals Using Semantic Kernel</Title>
1010
<PackageProjectUrl>https://github.com/HillPhelmuth/PromptFlowEvalsAsPlugins</PackageProjectUrl>
1111
<RepositoryUrl>https://github.com/HillPhelmuth/PromptFlowEvalsAsPlugins</RepositoryUrl>
1212
<PackageOutputPath>C:\Users\adamh\source\LocalPackages</PackageOutputPath>
1313
<GeneratePackageOnBuild>False</GeneratePackageOnBuild>
1414
<PackageIcon>packageIcon.png</PackageIcon>
15+
<Version>0.0.1-beta</Version>
16+
<Authors>HillPhelmuth</Authors>
17+
<Description>Enable seamless execution of LLM (Large Language Model) evaluations using Semantic Kernel. This library provides tools and abstractions for running automated assessments where LLMs serve as judges, offering structured, consistent, and scalable evaluation methods. Ideal for AI-driven projects that require evaluative feedback, scoring, or comparative analysis across various use cases. Easily integrates with Semantic Kernel for smooth, flexible LLM operations in .NET environments.</Description>
1518

1619
</PropertyGroup>
1720
<PropertyGroup Condition="'$(Configuration)' == 'Release'">
1821
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
19-
<Version>0.0.3-beta</Version>
22+
<Version>0.0.4-beta</Version>
2023
</PropertyGroup>
2124
<ItemGroup>
2225
<None Remove="EvalPluginYaml\Empathy.yaml" />
@@ -49,9 +52,9 @@
4952

5053

5154
<ItemGroup>
52-
<PackageReference Include="Microsoft.SemanticKernel" Version="1.17.1" />
53-
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.17.1-alpha" />
54-
<PackageReference Include="Microsoft.SemanticKernel.Yaml" Version="1.17.1" />
55+
<PackageReference Include="Microsoft.SemanticKernel" Version="1.18.2" />
56+
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.18.2-alpha" />
57+
<PackageReference Include="Microsoft.SemanticKernel.Yaml" Version="1.18.2" />
5558
</ItemGroup>
5659

5760
<ItemGroup>

PromptFlowEvalsAsPlugins/Readme.md

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# LlmAsJudgeEvals
2+
3+
This library provides a service for evaluating responses from Large Language Models (LLMs) using the LLM itself as a judge. It leverages Semantic Kernel to define and execute evaluation functions based on prompt templates.
4+
5+
## Installation
6+
7+
Install the package via NuGet:
8+
9+
```
10+
Install-Package HillPhelmuth.SemanticKernel.LlmAsJudgeEvals
11+
```
12+
13+
## Usage
14+
15+
### Built-in Evaluation Functions
16+
17+
```csharp
18+
19+
// Initialize the Semantic Kernel
20+
var kernel = Kernel.CreateBuilder().AddOpenAIChatCompletion("openai-model-name", "openai-apiKey").Build();
21+
22+
// Create an instance of the EvalService
23+
var evalService = new EvalService(kernel);
24+
25+
// Create an input model for the built-in evaluation function
26+
var coherenceInput = InputModel.CoherenceModel("This is the answer to evaluate.", "This is the question or prompt that generated the answer");
27+
28+
// Execute the evaluation
29+
var result = await evalService.ExecuteEval(inputModel);
30+
31+
32+
Console.WriteLine($"Evaluation score: {result.Score}");
33+
34+
```
35+
36+
### Custom Evaluation Functions
37+
38+
```csharp
39+
40+
// Initialize the Semantic Kernel
41+
var kernel = Kernel.CreateBuilder().AddOpenAIChatCompletion("openai-model-name", "openai-apiKey").Build();
42+
43+
// Create an instance of the EvalService
44+
var evalService = new EvalService(kernel);
45+
46+
// Add an evaluation function (optional)
47+
evalService.AddEvalFunction("MyEvalFunction", "This is the prompt for my evaluation function.", new PromptExecutionSettings());
48+
49+
// Create an input model for the evaluation function
50+
var inputModel = new InputModel
51+
{
52+
FunctionName = "MyEvalFunction", // Replace with the name of your evaluation function
53+
RequiredInputs = new Dictionary<string, string>
54+
{
55+
{ "input", "This is the text to evaluate." }
56+
}
57+
};
58+
59+
// Execute the evaluation
60+
var result = await evalService.ExecuteEval(inputModel);
61+
62+
63+
Console.WriteLine($"Evaluation score: {result.Score}");
64+
```
65+
66+
## Features
67+
68+
* **Define evaluation functions using prompt templates:** You can define evaluation functions using prompt templates written in YAML.
69+
* **Execute evaluations:** The `EvalService` provides methods for executing evaluations on input data.
70+
* **Aggregate results:** The `EvalService` can aggregate evaluation scores across multiple inputs.
71+
* **Built-in evaluation functions:** The package includes a set of pre-defined evaluation functions based on common evaluation metrics.
72+
73+
74+

PromptFlowEvalsAsPlugins/ResultScore.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using Microsoft.SemanticKernel;
22

3-
namespace PromptFlowEvalsAsPlugins;
3+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
44

55
public class ResultScore
66
{

PromptFlowEvalsAsPlugins/TokenString.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
using Azure.AI.OpenAI;
1+
using OpenAI.Chat;
22

3-
namespace PromptFlowEvalsAsPlugins;
3+
namespace HillPhelmuth.SemanticKernel.LlmAsJudgeEvals;
44

55
public record TokenString
66
{
@@ -35,15 +35,15 @@ public override string ToString()
3535
}
3636
public static class LogProbExts
3737
{
38-
public static List<TokenString> AsTokenStrings(this IEnumerable<ChatTokenLogProbabilityResult> logProbContentItems)
38+
public static List<TokenString> AsTokenStrings(this IReadOnlyList<ChatTokenLogProbabilityInfo> logProbContentItems)
3939
{
4040
var result = new List<TokenString>();
4141
foreach (var logProb in logProbContentItems)
4242
{
4343
var tokenString = new TokenString(logProb.Token, logProb.ToLinearProb());
44-
if (logProb.TopLogProbabilityEntries is { Count: > 0 })
44+
if (logProb.TopLogProbabilities is { Count: > 0 })
4545
{
46-
var innerResult = logProb.TopLogProbabilityEntries.Select(item => new TokenString(item.Token, item.ToLinearProb())).ToList();
46+
var innerResult = logProb.TopLogProbabilities.Select(item => new TokenString(item.Token, item.ToLinearProb())).ToList();
4747
tokenString.TopLogProbs = innerResult;
4848
}
4949
result.Add(tokenString);
@@ -87,7 +87,7 @@ public static IEnumerable<TokenProb> NormalizeValues(this IEnumerable<TokenProb>
8787
}
8888
//return tokenProbs.Select(token => new TokenProb(token.StringValue, token.Probability / sum));
8989
}
90-
public static double ToLinearProb(this ChatTokenLogProbabilityResult logProbabilityResult) => Math.Exp(logProbabilityResult.LogProbability);
90+
public static double ToLinearProb(this ChatTokenTopLogProbabilityInfo logProbabilityResult) => Math.Exp(logProbabilityResult.LogProbability);
9191

9292
public static double ToLinearProb(this ChatTokenLogProbabilityInfo logProbInfo) => Math.Exp(logProbInfo.LogProbability);
9393
}

0 commit comments

Comments
 (0)