augcog
diff --git a/‎.github/workflows/ci.yml
+1-1 b/‎.github/workflows/ci.yml
+1-1
diff --git a/‎ai_course_bot/ai_chatbot_backend/.env.example
+5-1 b/‎ai_course_bot/ai_chatbot_backend/.env.example
+5-1
diff --git a/‎evaluation/README.md
+63 b/‎evaluation/README.md
+63
diff --git a/‎evaluation/dataset_generate/analyze.py
+162 b/‎evaluation/dataset_generate/analyze.py
+162
@@ -10,7 +10,7 @@ on:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu 24.04
 
     strategy:
       matrix:
 
@@ -1 +1,5 @@
-OPENAI_API_KEY=xxx
+ENV=dev
+# LLM_MODE can be explicitly set to local, remote, or mock.
+# For testing, you can set ENV=test (defaulting LLM mode to mock) or override LLM_MODE directly.
+# LLM_MODE=local
+REMOTE_MODEL_URL=https://tai.berkeley.edu/api/chat
@@ -0,0 +1,63 @@
+# Evaluation Pipeline
+
+TAI is equipped with the ability to measure the reliability and accuracy of its underlying Retrieval-Augmented Generation (RAG) agent. To simplify dataset creation and evaluation, this module provides customized evaluation functionality, ranging from creating evaluation datasets to implementing evaluation algorithms specifically designed for TAI.
+
+## Features
+
+- **Dataset Generation**: Seamlessly generate evaluation datasets tailored to the needs of the TAI system.
+- **Analysis Tools**: Analyze generated datasets to uncover biases and visualize relationships using Sankey graphs.
+
+## Setup
+
+1. **Install Requirements**: Install the required dependencies by running the following command:
+   ```sh
+   pip install -r requirements.txt
+   ```
+
+2. **Set Environment Variables**: Ensure the `OPENAI_API_KEY` is stored as an environment variable. You can add it to your `.bashrc`, `.zshrc`, or `.env` file for persistent configuration:
+   ```sh
+   export OPENAI_API_KEY="your_api_key_here"
+   ```
+
+3. **Prepare Input Data**: Place your input JSON file in the following directory:
+   ```
+   /evaluation/dataset_generate/input
+   ```
+
+## Dataset Generation
+
+To generate an evaluation dataset, run the following command:
+```sh
+python -m evaluation.dataset_generate.generate <input_filename> [--num_pairs] [--quiet]
+```
+
+### Arguments:
+- `<input_filename>`: Name of the input JSON file located in `/evaluation/dataset_generate/input`.
+- `--num_pairs`: (Optional) Specify the number of pairs to generate.
+- `--quiet`: (Optional) Suppress output logs for a cleaner console experience.
+
+Example:
+```sh
+python -m evaluation.dataset_generate.generate sample_input.json --num_pairs 50
+```
+
+## Dataset Analysis
+
+To analyze the generated dataset for bias statistics and visualize relationships using a Sankey graph, use the following command:
+```sh
+python -m evaluation.dataset_analyze.analyze <input_filename> [--graph]
+```
+
+### Arguments:
+- `<input_filename>`: Name of the input dataset file to analyze biases based on the input and output dataset labels.
+- `--graph`: Option to generate the Sankey graph visualization. 
+
+Example:
+```sh
+python -m evaluation.dataset_analyze.analyze generated_dataset.json --graph
+```
+
+## Output Files
+
+- **Generated Datasets**: Saved in `/evaluation/dataset_generate/output`.
+- **Analysis Results**: Saved in `/evaluation/dataset_analyze/output`.
@@ -0,0 +1,162 @@
+import json
+import os
+from pprint import pprint
+import argparse
+import plotly.graph_objects as go
+from evaluation.dataset_generate.generate import generate_qa_pairs
+from rag.file_conversion_router.conversion.ed_converter import json_kb_filter
+
+def compute_bias(data):
+
+    categories = []
+    category_dict = {}
+    total_count = 0
+
+    for entry in data:
+
+        category = entry['category']
+        if category not in categories and category not in category_dict:
+            categories.append(category)
+            category_dict[category] = 1
+        else:
+            category_dict[category] += 1
+        total_count += 1
+
+    biases = {key: value / total_count for key, value in category_dict.items()}
+    
+    return biases
+
+
+def analyze(input_filename, graph=True):
+
+    original_file_path = os.path.join("evaluation", "dataset_generate", "input", input_filename)
+    generated_file_path = os.path.join("evaluation", "dataset_generate", "output", f"evaluation_dataset_{input_filename}")
+
+    with open(original_file_path, 'r') as file:
+            original_file = json.load(file)
+
+    with open(generated_file_path, 'r') as file:
+        generated_dataset = json.load(file)
+    
+    original_cleaned = json_kb_filter(original_file)
+    original_dataset = generate_qa_pairs(original_cleaned)
+
+    biases_before = compute_bias(original_dataset)
+    biases_after = compute_bias(generated_dataset)
+
+    biases_stats = {
+        "Before": biases_before,
+        "After": biases_after
+    }
+
+    after_dict = {(entry["question"], entry["answer"]): entry["category"] for entry in generated_dataset}
+
+    change = []
+
+    for entry in original_dataset:
+        question, answer, category_before = entry["question"], entry["answer"], entry["category"]
+        new_category = after_dict.get((question, answer))
+
+        if new_category:
+            change_status = {
+                "question": question,
+                "answer": answer,
+                "before_category": category_before,
+                "after_category": new_category
+            }
+            change.append(change_status)
+        else:
+            invalid_pair = {
+                "question": question,
+                "answer": answer,
+                "before_category": category_before,
+                "after_category": "Invalid"
+            }
+            change.append(invalid_pair)
+
+    result = {
+        "biases": biases_stats,
+        "results_comparison": change
+    }
+
+    pprint(biases_stats)
+
+    output_path = os.path.join("evaluation", "dataset_generate", "output", f"datasets_analysis_{input_filename}")
+    with open(output_path, 'w') as file:
+        json.dump(result, file, indent=4)
+
+    if graph:
+        before_categories = set()
+        after_categories = set()
+        transitions = {}
+
+        category_colors = {
+            "General": "#aec7e8",      # Light Blue
+            "Problem Sets": "#ffbb78", # Light Orange
+            "Assignments": "#98df8a",  # Light Green
+            "Lectures": "#ff9896",     # Light Red
+            "Sections": "#c5b0d5",     # Light Purple
+            "Social": "#c49c94"        # Light Brown
+        }
+
+        for entry in change:
+            before = entry.get("before_category")
+            after = entry.get("after_category")
+
+            before_categories.add(before)
+            after_categories.add(after)
+
+            if (before, after) in transitions:
+                transitions[(before, after)] += 1
+            else:
+                transitions[(before, after)] = 1
+
+        before_labels = sorted(before_categories)
+        after_labels = sorted(after_categories)
+        all_labels = before_labels + after_labels
+        node_indices = {label: i for i, label in enumerate(all_labels)}
+
+        link_sources = []
+        link_targets = []
+        link_values = []
+        link_colors = []
+
+        for (before, after), count in transitions.items():
+            link_sources.append(node_indices[before])
+            link_targets.append(node_indices[after])
+            link_values.append(count)
+            link_colors.append(category_colors.get(before, "#d3d3d3"))
+
+        fig = go.Figure(go.Sankey(
+            node=dict(
+                pad=15,
+                thickness=20,
+                line=dict(color="black", width=0.5),
+                label=all_labels,
+                color=["#a0c4ff"] * len(before_labels) + ["#ffc09f"] * len(after_labels)
+            ),
+            link=dict(
+                source=link_sources,
+                target=link_targets,
+                value=link_values,
+                color=link_colors
+            )
+        ))
+        
+        output_path = os.path.join("evaluation", "dataset_generate", "output", f"category_flow_diagram_{os.path.splitext(input_filename)[0]}.png")
+
+        fig.update_layout(title_text="Diagram of Category Transitions", font_size=10)
+        fig.write_image(output_path)
+        
+        print(f"Sankey diagram saved to {output_path}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate evaluation dataset")
+    parser.add_argument("input_filename", type=str, help="The input JSON file")
+    parser.add_argument("--graph", action="store_true", help="Generate Graph")
+
+    args = parser.parse_args()
+    analyze(args.input_filename, args.graph)
+
+   
+