Merge pull request #24 from Codium-ai/15-feature-request-support-loca…

…lly-running-llm Moving to LiteLLM with env var approach. Closes #15.
qodo-ai · May 22, 2024 · e9654cc · e9654cc
2 parents 71cf0df + 20c7c5f
commit e9654cc
Show file tree

Hide file tree

Showing 8 changed files with 1,087 additions and 251 deletions.
diff --git a/.github/workflows/ci_pipeline.yml b/.github/workflows/ci_pipeline.yml
@@ -58,11 +58,33 @@ jobs:
           pythonLocation: /opt/hostedtoolcache/Python/3.12.2/x64
           LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.12.2/x64/lib
 
+  package-test:
+    needs: test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: Install Poetry
+        run: pip install poetry
+      - name: Install dependencies using Poetry
+        run: poetry install
+      - name: Build, Install and Test Package from Different Location
+        run: |
+          poetry build
+          pip install dist/*.whl
+          cd /tmp
+          cover-agent --help        
+
   build:
     needs: test
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+#        os: [ubuntu-latest, windows-latest, macos-latest]
+         os: [ ubuntu-latest, macos-latest ]
+
     runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout@v2
@@ -77,6 +99,13 @@ jobs:
     - name: Build Executable
       run: |
         poetry run pyinstaller --add-data "cover_agent/prompt_template.md:." --add-data "cover_agent/version.txt:." --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --onefile --name cover-agent-${{ matrix.os }} cover_agent/main.py
+    - name: Test Executable
+      run: |
+        if [ "${{ matrix.os }}" = "windows-latest" ]; then
+          ./dist/cover-agent-${{ matrix.os }}.exe --help
+        else
+          ./dist/cover-agent-${{ matrix.os }} --help
+        fi
     - name: Upload Executable
       uses: actions/upload-artifact@v2
       with:
@@ -95,7 +124,7 @@ jobs:
         path: dist
     - name: Extract version
       run: |
-        echo "VERSION=$(cat cover_agent/version.txt)" >> $GITHUB_ENV  
+        echo "VERSION=$(cat cover_agent/version.txt)" >> $GITHUB_ENV
     - name: Create Release
       id: create_release
       uses: actions/create-release@v1

diff --git a/README.md b/README.md
@@ -133,7 +133,7 @@ poetry run cover-agent \
   --coverage-type "cobertura" \
   --desired-coverage 70 \
   --max-iterations 1 \
-  --openai-model "gpt-4o"
+  --model "gpt-4o"
 ```
 
 Note: If you are using Poetry then use the `poetry run cover-agent` command instead of the `cover-agent` run command.
@@ -150,6 +150,22 @@ A few debug files will be outputted locally within the repository (that are part
   * `stdout`
   * Generated test
 
+### Using other LLMs
+This project uses LiteLLM to communicate with OpenAI and other hosted LLMs (supporting 100+ LLMs to date). To use a different model other than the OpenAI default you'll need to:
+1. Export any environment variables needed by the supported LLM [following the LiteLLM instructions](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms).
+2. Call the name of the model using the `--model` option when calling Cover Agent.
+
+For example (as found in the [LiteLLM Quick Start guide](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms)):
+```shell
+export VERTEX_PROJECT="hardy-project"
+export VERTEX_LOCATION="us-west"
+
+cover-agent \
+  ...
+  --model "vertex_ai/gemini-pro"
+```
+
+
 ## Development
 This section discusses the development of this project.
 

diff --git a/cover_agent/AICaller.py b/cover_agent/AICaller.py
@@ -1,8 +1,5 @@
-import os
 import time
-import tiktoken
-from openai import OpenAI
-
+import litellm
 
 class AICaller:
     def __init__(self, model):
@@ -11,74 +8,35 @@ def __init__(self, model):
 
         Parameters:
             model (str): The name of the model to be used.
-
-        Raises:
-            ValueError: If the OPENAI_API_KEY environment variable is not found or if there is an error in getting the encoding.
-
         """
-        self.api_key = os.getenv("OPENAI_API_KEY")
-        if not self.api_key:
-            raise ValueError("OPENAI_API_KEY environment variable not found.")
-
         self.model = model
-        self.openai_client = OpenAI(api_key=self.api_key)
-
-        # Initialize the encoding for the model
-        try:
-            self.encoding = tiktoken.get_encoding("cl100k_base")
-        except Exception as e:
-            raise ValueError(f"Failed to get encoding: {e}")
 
     def call_model(self, prompt, max_tokens=4096):
         """
-        Calls the OpenAI API with streaming enabled to get completions for a given prompt
-        and streams the output to the console in real time, while also accumulating the response to return.
+        Call the language model with the provided prompt and retrieve the response.
 
         Parameters:
-            prompt (str): The prompt to send to the model.
-            max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 4096.
+            prompt (str): The prompt to provide to the language model.
+            max_tokens (int, optional): The maximum number of tokens to generate in the response. Defaults to 4096.
 
         Returns:
-            str: The text generated by the model.
+            tuple: A tuple containing the response generated by the language model, the number of tokens used from the prompt, and the total number of tokens in the response.
         """
-        response = self.openai_client.chat.completions.create(
-            messages=[{"role": "user", "content": prompt}],
-            model=self.model,
-            temperature=0,
-            max_tokens=max_tokens,
-            stream=True,
-        )
+        messages = [{"role": "user", "content": prompt}]
+        response = litellm.completion(model=self.model, messages=messages, max_tokens=max_tokens, stream=True)
 
-        full_text = ""
+        chunks = []
         print("Streaming results from LLM model...")
         try:
             for chunk in response:
-                if chunk.choices[0].delta and chunk.choices[0].delta.content:
-                    content = chunk.choices[0].delta.content
-                    print(
-                        content, end="", flush=True
-                    )  # Print each part of the text as it arrives
-                    full_text += content  # Accumulate the content in full_text
-                    time.sleep(
-                        0.01
-                    )  # Optional: Delay to simulate more 'natural' response pacing
+                print(chunk.choices[0].delta.content or "", end="", flush=True)
+                chunks.append(chunk)
+                time.sleep(0.01)  # Optional: Delay to simulate more 'natural' response pacing
         except Exception as e:
             print(f"Error during streaming: {e}")
         print("\n")
 
-        return full_text.strip()
+        model_response = litellm.stream_chunk_builder(chunks, messages=messages)
 
-    def count_tokens(self, text):
-        """
-        Counts the number of tokens in the given text using the model's encoding.
-
-        Parameters:
-            text (str): The text to encode.
-
-        Returns:
-            int: The number of tokens.
-        """
-        try:
-            return len(self.encoding.encode(text))
-        except Exception as e:
-            raise ValueError(f"Error encoding text: {e}")
+        # Returns: Response, Prompt token count, and Response token count
+        return model_response['choices'][0]['message']['content'], int(model_response['usage']['prompt_tokens']), int(model_response['usage']['completion_tokens'])
diff --git a/cover_agent/UnitTestGenerator.py b/cover_agent/UnitTestGenerator.py
@@ -166,16 +166,16 @@ def generate_tests(self, LLM_model="gpt-4o", max_tokens=4096, dry_run=False):
 
         self.prompt = self.build_prompt()
 
-        self.logger.info(
-            f"Token count for LLM model {LLM_model}: {ai_caller.count_tokens(self.prompt)}"
-        )
         if dry_run:
             # Provide a canned response. Used for testing.
             response = "```def test_something():\n    pass```\n```def test_something_else():\n    pass```\n```def test_something_different():\n    pass```"
         else:
             # Tests should return with triple backticks in between tests.
             # We want to remove them and split up the tests into a list of tests
-            response = ai_caller.call_model(prompt=self.prompt, max_tokens=max_tokens)
+            response, prompt_token_count, response_token_count = ai_caller.call_model(prompt=self.prompt, max_tokens=max_tokens)
+        self.logger.info(
+            f"Total token used count for LLM model {LLM_model}: {prompt_token_count + response_token_count}"
+        )
 
         # Split the response into a list of tests and strip off the trailing whitespaces
         # (as we sometimes anticipate indentations in the returned code from the LLM)

diff --git a/cover_agent/main.py b/cover_agent/main.py
@@ -66,9 +66,9 @@ def parse_args():
         help="Any additional instructions you wish to append at the end of the prompt. Default: %(default)s.",
     )
     parser.add_argument(
-        "--openai-model",
+        "--model",
         default="gpt-4o",
-        help="Which OpenAI LLM model to use. Default: %(default)s.",
+        help="Which LLM model to use. Default: %(default)s.",
     )
     parser.add_argument(
         "--prompt-only",
@@ -141,7 +141,7 @@ def main():
 
             # Generate tests by making a call to the LLM
             generated_tests = test_gen.generate_tests(
-                LLM_model=args.openai_model, max_tokens=4096
+                LLM_model=args.model, max_tokens=4096
             )
 
             # Write test_gen.prompt to a debug markdown file