Merge branch 'dev'

derjogi · derjogi · commit 04e789db6b8a · 2025-03-03T12:12:28.000+13:00
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,23 @@
+# SimScore API Dev Guide
+
+## Commands
+- **Setup:** `poetry install --no-root && poetry shell`
+- **Local Dev:** `supabase start && fastapi dev`
+- **Tests:** 
+  - All: `pytest tests/`
+  - Single file: `pytest tests/api/v1/routes/test_ideas.py`
+  - Single test: `pytest tests/api/v1/routes/test_ideas.py::test_function_name`
+- **Lint/Format:** `black app/ tests/`
+- **Type Check:** `mypy app/ tests/`
+
+## Code Style
+- **Imports:** stdlib → third-party → project (alphabetically within groups)
+- **Naming:** `snake_case` for variables/functions, `PascalCase` for classes, `UPPER_CASE` for constants
+- **Types:** Use type annotations for all function parameters and return values
+- **Error Handling:** Use FastAPI `HTTPException` with appropriate status codes
+- **Documentation:** Google-style docstrings with triple double-quotes
+- **Architecture:** Follow FastAPI patterns with routes, models, services separation
+- **Formatting:** Project uses Black with default settings
+
+## Environment
+This project uses Poetry for dependency management and FastAPI for the API framework.
diff --git a/app/api/v1/routes/ideas.py b/app/api/v1/routes/ideas.py
@@ -93,8 +93,19 @@ async def rank_ideas(
             ideaRequest, response, user_id, ideas, plot_data, num_ideas, total_bytes
         )
 
-    print('Results calculated successfully!\n', response)
-    return AnalysisResponse(**response)
+    results = AnalysisResponse(**response)
+    
+    print('Results calculated successfully!')
+    if results.ranked_ideas:
+        print('First 5 ranked ideas:', results.ranked_ideas[:5])
+    if results.pairwise_similarity_matrix:
+        print('First 5 similarity scores:', results.pairwise_similarity_matrix[:5])
+    if results.cluster_names:
+        print('First 5 cluster names:', results.cluster_names[:5])
+    if results.relationship_graph:
+        print('First 5 graph nodes & edges:', results.relationship_graph.nodes[:5], results.relationship_graph.edges[:5])    
+    
+    return results
 
 def _generate_edges(ranked_ideas: List[RankedIdea], similarity_matrix: List[List[float]]) -> List[dict]:
     """
@@ -134,12 +145,13 @@ async def build_base_response(ideas: List[str], results: Results, plot_data: Plo
     
     ranked_ideas = [
         RankedIdea(
-            id=str(idea_to_input[idea].id) if idea_to_input[idea].id is not None else str(idx),
+            id=str(idea_to_input[idea].id) if idea_to_input[idea].id is not None else str(index),
             idea=idea,
-            similarity_score=results["similarity"][idx],
-            cluster_id=plot_data["kmeans_data"]["cluster"][idx],
+            author_id=str(idea_to_input[idea].author_id) if idea_to_input[idea].author_id is not None else '',
+            similarity_score=results["similarity"][index],
+            cluster_id=plot_data["kmeans_data"]["cluster"][index],
         )
-        for idx, idea in enumerate(results["ideas"])
+        for index, idea in enumerate(results["ideas"])
     ]
     
     ranked_ideas.sort(key=lambda x: x.similarity_score, reverse=True)
diff --git a/aux_tools/convert_file.py b/aux_tools/convert_file.py
@@ -48,8 +48,9 @@ def convert_harmonica_to_request(
 
 def convert_spreadsheet_to_request(
     file_path: str,
-    id_column: str,
     data_column: str,
+    id_column: str = None,
+    author_column: str = None,
     advanced_features: AdvancedFeatures = {}
 ) -> IdeaRequest:
     """
@@ -60,8 +61,9 @@ def convert_spreadsheet_to_request(
     
     Args:
         file_path: Path to the spreadsheet file (xlsx, csv, etc)
-        id_column: Name of the column containing IDs
         data_column: Name of the column containing idea text
+        id_column: Name of the column containing IDs
+        author_column: Name of the column containing Author names
         advanced_features: Whether to include advanced analysis features
         
     Returns:
@@ -74,18 +76,31 @@ def convert_spreadsheet_to_request(
         df = pd.read_excel(file_path)
     
     # Validate columns exist
-    if id_column not in df.columns or data_column not in df.columns:
-        raise ValueError(f"Columns {id_column} and/or {data_column} not found in spreadsheet")
+    if data_column not in df.columns:
+        raise ValueError(f"Data Column {data_column} not found in spreadsheet")
+    
+    
     
     # Convert rows to IdeaInput objects
-    ideas = [
-        IdeaInput(
-            id=str(row[id_column]),  # Convert to string to handle various ID formats
-            idea=str(row[data_column]).strip(),
-        )
-        for _, row in df.iterrows()
-        if pd.notna(row[data_column])  # Skip rows with empty ideas
-    ]
+    ideas = []
+    for idx, row in df.iterrows():
+        if pd.notna(row[data_column]):
+            idea_input = {
+                'idea': str(row[data_column]).strip(),
+            }
+            
+            # Add ID if column specified and value exists
+            if id_column and id_column in df.columns:
+                idea_input['id'] = str(row[id_column]) if pd.notna(row[id_column]) else str(idx)
+            else:
+                idea_input['id'] = str(idx)
+                
+            # Add author if column specified and value exists    
+            if author_column and author_column in df.columns and pd.notna(row[author_column]):
+                idea_input['author_id'] = str(row[author_column])
+                
+            ideas.append(IdeaInput(**idea_input))
+    
     
     # Create request object
     request = IdeaRequest(
@@ -131,29 +146,35 @@ def convert_request_to_spreadsheet(request_data: dict, output_path: str = "outpu
 if __name__ == "__main__":
     import argparse
     
+    print("Starting conversion")
+    
     parser = argparse.ArgumentParser(description='Convert file to IdeaRequest format')
     parser.add_argument('file_path', help='Path to the input file (xlsx, csv, or json)')
-    parser.add_argument('--id_column', help='Name of the column containing IDs (for spreadsheets)')
     parser.add_argument('--data_column', help='Name of the column containing idea text (for spreadsheets)')
+    parser.add_argument('--id_column', help='Name of the column containing IDs (for spreadsheets)')
+    parser.add_argument('--author_column', help='Name of the column containing Author names (for spreadsheets)')
     parser.add_argument('--save_as_spreadsheet', help='Save the output as spreadsheet (for json input)')
     
     args = parser.parse_args()
     
     try:
         if args.file_path.endswith('.json'):
+            print("Convertin Harmonica JSON to SimScore JSON...")
             request = convert_harmonica_to_request(args.file_path, 
                           AdvancedFeatures(relationship_graph=True, 
                                            pairwise_similarity_matrix=True, 
                                            cluster_names=True)
                       )
             print(f"Successfully converted {len(request.ideas)} paragraphs from chat data.")
         else:
-            if not args.id_column or not args.data_column:
-                raise ValueError("id_column and data_column are required for spreadsheet conversion")
+            print("Converting Spreadsheet to SimScore")
+            if not args.data_column:
+                raise ValueError("data_column is required for spreadsheet conversion")
             request = convert_spreadsheet_to_request(
                 file_path=args.file_path,
+                data_column=args.data_column,
                 id_column=args.id_column,
-                data_column=args.data_column
+                author_column=args.author_column
             )
             print(f"Successfully converted {len(request.ideas)} ideas from spreadsheet.")
         
diff --git a/aux_tools/google/ColumnSelector.html b/aux_tools/google/ColumnSelector.html
@@ -0,0 +1,160 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <base target="_top">
+    <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&display=swap" rel="stylesheet">
+    <style>
+      body {
+        font-family: 'Google Sans', sans-serif;
+      }
+    </style>
+
+    <style>
+      .form-group {
+        margin: 10px 0;
+      }
+      select {
+        width: 100%;
+        padding: 5px;
+      }
+      .required {
+        color: red;
+      }
+    </style>
+
+    <style>
+      .spinner {
+        width: 40px;
+        height: 40px;
+        border: 4px solid #f3f3f3;
+        border-top: 4px solid #3498db;
+        border-radius: 50%;
+        animation: spin 1s linear infinite;
+        margin: 20px auto;
+      }
+      
+      @keyframes spin {
+        0% { transform: rotate(0deg); }
+        100% { transform: rotate(360deg); }
+      }
+      
+      .loading-message {
+        margin: 10px;
+        font-style: italic;
+        color: #666;
+      }
+    </style>
+
+
+  </head>
+  <body>
+    <form onsubmit="handleSubmit(this); return false;">
+      
+      <div class="form-group">
+        <label>Ideas Column <span class="required">*</span></label>
+        <select name="ideaColumn" required>
+          <option value="">Select column...</option>
+          <? headers.forEach(function(header) { ?>
+            <option value="<?= header ?>"><?= header ?></option>
+          <? }); ?>
+        </select>
+      </div>
+
+      <div class="form-group">
+        <label>ID Column (optional)</label>
+        <select name="idColumn">
+          <option value="">None</option>
+          <? headers.forEach(function(header) { ?>
+            <option value="<?= header ?>"><?= header ?></option>
+          <? }); ?>
+        </select>
+      </div>
+
+      <div class="form-group">
+        <label>Author Column (optional)</label>
+        <select name="authorColumn">
+          <option value="">None</option>
+          <? headers.forEach(function(header) { ?>
+            <option value="<?= header ?>"><?= header ?></option>
+          <? }); ?>
+        </select>
+      </div>
+
+      <div class="form-group">
+        <label>Number of Ranked Results </label>
+        <input type="number" name="resultCount" min="1" max="500" value="10" required>
+      </div>
+      
+      <button type="submit">Analyze</button>
+    </form>
+    
+    <div id="loadingIndicator" style="display: none; text-align: center; margin-top: 20px;">
+      <div class="spinner"></div>
+      <div id="loadingMessage" class="loading-message">Initializing SimScore analysis...</div>
+    </div>
+    
+    <script>
+      const loadingMessages = [
+        "Optimizing neural pathways...",
+        "Calibrating relativity biases...",
+        "Measuring cosine distances in vector space...",
+        "Aligning semantic tensors...",
+        "Computing similarity matrices...",
+        "Discovering idea clusters...",
+        "Calculating innovation potential...",
+        "Synchronizing thought vectors..."
+      ];
+    
+      function updateLoadingMessage() {
+        const messageElement = document.getElementById('loadingMessage');
+        let currentIndex = 0;
+        
+        return setInterval(() => {
+          messageElement.textContent = loadingMessages[currentIndex];
+          currentIndex = (currentIndex + 1) % loadingMessages.length;
+        }, 5000);
+      }
+    
+      function handleSubmit(form) {
+        document.querySelector('form').style.display = 'none';
+        document.getElementById('loadingIndicator').style.display = 'block';
+        
+        // Start cycling through messages
+        const messageInterval = updateLoadingMessage();
+        
+        const data = {
+          idColumn: form.idColumn.value,
+          ideaColumn: form.ideaColumn.value,
+          authorColumn: form.authorColumn.value,
+          resultCount: parseInt(form.resultCount.value) || 10
+        };
+        
+        google.script.run
+          .withSuccessHandler(() => {
+            clearInterval(messageInterval);
+            closeDialog();
+          })
+          .withFailureHandler((error) => {
+            clearInterval(messageInterval);
+            handleError(error);
+          })
+          .processSelectedColumns(data);
+      }
+
+      function closeDialog() {
+        google.script.host.close();
+      }
+      
+      function handleError(error) {
+        // Show form again if there's an error
+        document.querySelector('form').style.display = 'block';
+        document.getElementById('loadingIndicator').style.display = 'none';
+        showError(error);
+      }
+      
+      function showError(error) {
+        alert('Error: ' + error);
+      }
+    </script>
+  </body>
+</html>
diff --git a/aux_tools/google/code.gs b/aux_tools/google/code.gs
diff --git a/fly.prod.toml b/fly.prod.toml