Shaokai/0.1.3 (#55)

yeshaokai · MMathisLab · web-flow · commit cf27b78ef734 · 2024-08-12T15:45:43.000+02:00
* corrected broken links and broken references

* Update setup.cfg

- bump to stable v1 and v0.1.1

* Update pyproject.toml

* Update version.py

* Corrected typo. Added config yamls in setup

* Removed config files that are no longer needed

* changed work from to pull the repo from git

* Added comments to remind people to pay attentino to data folder in the demo notebooks

* fixed pypi typo

* Fixed a bug in create_project. Changed default use_vlm to False. Updated demo notebooks

* removed WIP 3d keypoints

* Fixed one more

* WIP

* enforcing the use of create_project in demo notebooks and modified the test

* 3D supported. Better tests. More flexible identifier

* black and isort

* added dlc to test requirement

* Made test use stronger gpt. Added dummy video

* easier superanimal test

* Better 3D prompt and fixed self-debug

* preventing infinite loop

* better prompt for 3D

* better prompt for 3D

* better prompt

* updates

* fixed serialization

* extension to support animation. Made self-debugging work with bigger output. Allowing to skip code execution in parse result

* better interpolation and corrected x,y,z convention

* incorporated suggestions

* add a test plot keypoint label

* Fixed a bug. Changed hardcoded path to relative path in notebooks

* updated vlm prompt to be more robust

* deleted y axis inversion prompt

* Added animation support and added animation in horse demo

* edited readme

---------

Co-authored-by: Mackenzie Mathis &lt;mathis@rowland.harvard.edu&gt;
diff --git a/README.md b/README.md
@@ -73,7 +73,7 @@ You can git clone (or download) this repo to grab a copy and go. We provide exam
 ### Here are a few demos that could fuel your own work, so please check them out!
 
 1) [Draw a region of interest (ROI) and ask, "when is the animal in the ROI?"](https://github.com/AdaptiveMotorControlLab/AmadeusGPT/tree/main/notebooks/EPM_demo.ipynb)
-2) [Use a DeepLabCut SuperAnimal pose model to do video inference](https://github.com/AdaptiveMotorControlLab/AmadeusGPT/tree/main/notebooks/custom_mouse_demo.ipynb) - (make sure you use a GPU if you don't have corresponding DeepLabCut keypoint files already!
+2) [Use your own data](https://github.com/AdaptiveMotorControlLab/AmadeusGPT/tree/main/notebooks/YourData.ipynb) - (make sure you use a GPU to run SuperAnimal if you don't have corresponding DeepLabCut keypoint files already!
 3) [Write you own integration modules and use them](https://github.com/AdaptiveMotorControlLab/AmadeusGPT/tree/main/notebooks/Horse_demo.ipynb). Bonus: [source code](amadeusgpt/integration_modules). Make sure you delete the cached modules_embedding.pickle if you add new modules!
 4) [Multi-Animal social interactions](https://github.com/AdaptiveMotorControlLab/AmadeusGPT/tree/main/notebooks/MABe_demo.ipynb)
 5) [Reuse the task program generated by LLM and run it on different videos](https://github.com/AdaptiveMotorControlLab/AmadeusGPT/tree/main/notebooks/MABe_demo.ipynb)
@@ -126,6 +126,8 @@ the key dependencies that need installed are:
 pip install notebook
 conda install hdf5
 conda install pytables==3.8
+# pip install deeplabcut==3.0.0rc4 if you want to use SuperAnimal on your own videos
+
 pip install amadeusgpt
 ```
 ## Citation
diff --git a/amadeusgpt/analysis_objects/visualization.py b/amadeusgpt/analysis_objects/visualization.py
@@ -143,6 +143,7 @@ def __init__(
         n_individuals: int,
         average_keypoints: Optional[bool] = True,
         events: Optional[List[BaseEvent]] = None,
+        use_3d: Optional[bool] = False,
     ):
         assert len(keypoints.shape) == 3
         super().__init__(axs)
diff --git a/amadeusgpt/integration_modules/embedding/__init__.py b/amadeusgpt/integration_modules/embedding/__init__.py
@@ -1,2 +1,7 @@
-from .cebra import *
+try:
+    import cebra
+    from .cebra import *
+except:
+    print ('not able to import cebra')
+
 from .umap import *
diff --git a/amadeusgpt/managers/visual_manager.py b/amadeusgpt/managers/visual_manager.py
@@ -47,8 +47,21 @@ def __init__(
         self.animal_manager = animal_manager
         self.object_manager = object_manager
 
-    def get_scene_image(self):
-        scene_frame_index = self.config["video_info"].get("scene_frame_number", 1)
+    @register_core_api
+    def get_scene_image(self, scene_frame_index: int |None = None)-> np.ndarray:
+        """
+        Returns the frame given the index in the video.
+        Parameter
+        ---------
+        scene_frame_index: int (optional) that specifies the index of the video frame.
+        Returns
+        -------
+        An ndarray image
+
+        For visualizing keypoints or keypoint labels, it's nice to overlay the keypoints on the scene image.
+        """
+        if scene_frame_index is None:
+            scene_frame_index = self.config["video_info"].get("scene_frame_number", 1)
         if os.path.exists(self.video_file_path):
             cap = cv2.VideoCapture(self.video_file_path)
             cap.set(cv2.CAP_PROP_POS_FRAMES, scene_frame_index)
diff --git a/amadeusgpt/project.py b/amadeusgpt/project.py
@@ -14,7 +14,11 @@ def create_project(data_folder, result_folder, **kwargs):
             "result_folder": result_folder,
             "video_suffix": ".mp4",
         },
-        "llm_info": {"max_tokens": 4096, "temperature": 0.0, "keep_last_n_messages": 2},
+        "llm_info": {"max_tokens": 4096, 
+                     "temperature": 0.0, 
+                     # let's use the best model by default
+                     "gpt_model": "gpt-4o",
+                     "keep_last_n_messages": 2},
         "object_info": {"load_objects_from_disk": False, "use_grid_objects": False},
         "keypoint_info": {
             "use_3d": False,
diff --git a/amadeusgpt/system_prompts/code_generator.py b/amadeusgpt/system_prompts/code_generator.py
@@ -85,12 +85,13 @@ def get_watching_events(identifier):
 4) Make sure you do not import any libraries in your code. All needed libraries are imported already.
 5) Make sure you disintuigh positional and keyword arguments when you call functions in api docs
 6) If you are writing code that uses matplotlib to plot, make sure you comment shape of the data to be plotted to double-check
-7) if your plotting code plots coordinates of keypoints, make sure you invert y axis (only during plotting) so that the plot is consistent with the image
-8) make sure the xlim and ylim covers the whole image. The image (h,w) is ({image_h},{image_w})    
-9) Do not define your own objects (including grid objects). Only use  objects that are given to you.
-10) You MUST use the index from get_keypoint_names to access the keypoint data of specific keyponit names. Do not assume the order of the bodypart.
-11) You MUST call functions in api docs on the analysis object.
-12) For api functions that require min_window and max_window, make sure you leave them as default values unless you are asked to change them.
+7) make sure the xlim and ylim covers the whole image. The image (h,w) is ({image_h},{image_w})    
+8) Do not define your own objects (including grid objects). Only use  objects that are given to you.
+9) You MUST use the index from get_keypoint_names to access the keypoint data of specific keyponit names. Do not assume the order of the bodypart.
+10) You MUST call functions in api docs on the analysis object.
+11) For api functions that require min_window and max_window, make sure you leave them as default values unless you are asked to change them.
+12) When making plots of keypoints of making animation about keypoints, try to overlap the plots with the scene frame if feasible.
+
 
 HOW TO AVOID BUGS:
 You should always comment the shape of the any numpy array you are working with to avoid bugs. YOU MUST DO IT.
diff --git a/amadeusgpt/system_prompts/visual_llm.py b/amadeusgpt/system_prompts/visual_llm.py
@@ -11,7 +11,7 @@ def _get_system_prompt():
     ```
     The "description" has high level description of the image.
     The "individuals" indicates the number of animals in the image
-    The "species" indicates the species of the animals in the image. You can only choose from one of "topview_mouse", "sideview_quadruped" or "others".
+    The "species" indicates the species of the animals in the image. You can only choose from one of "topview_mouse", "sideview_quadruped" or "others". Note all quadruped animals should be considered as sideview_quadruped.
     The "background_objects" is a list of background objects in the image. 
     Explain your answers before you fill the answers. Make sure you only return one json string.    
     """
diff --git a/amadeusgpt/utils.py b/amadeusgpt/utils.py
@@ -212,7 +212,7 @@ def create_qa_message(query: str, video_file_paths: list[str]) -> QA_Message:
     return QA_Message(query, video_file_paths)
 
 
-from IPython.display import Markdown, Video, display
+from IPython.display import Markdown, Video, display, HTML
 
 
 def parse_result(amadeus, qa_message, use_ipython=True, skip_code_execution=False):
@@ -231,13 +231,20 @@ def parse_result(amadeus, qa_message, use_ipython=True, skip_code_execution=Fals
         )
         if use_ipython:
             if len(qa_message.out_videos) > 0:
-                for video_path, event_videos in qa_message.out_videos.items():
+                for identifier, event_videos in qa_message.out_videos.items():
                     for event_video in event_videos:
                         display(Video(event_video, embed=True))
 
     if use_ipython:
+        from matplotlib.animation import FuncAnimation
         if len(qa_message.function_rets) > 0:
-            for video_file_path in qa_message.function_rets:
-                display(Markdown(str(qa_message.function_rets[video_file_path])))
+            for identifier, rets in qa_message.function_rets.items():
+                if not isinstance(rets, (tuple, list)):
+                    rets = [rets]
+                for ret in rets:
+                    if isinstance(ret, FuncAnimation):
+                        display(HTML(ret.to_jshtml()))
+                    else:
+                        display(Markdown(str(qa_message.function_rets[identifier])))
 
     return qa_message
diff --git a/notebooks/EPM_demo.ipynb b/notebooks/EPM_demo.ipynb
@@ -84,7 +84,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "behavior_analysis = amadeus.get_behavior_analysis('/Users/shaokaiye/AmadeusGPT-dev/examples/EPM/EPM_11.mp4')\n",
+    "behavior_analysis = amadeus.get_behavior_analysis('../examples/EPM/EPM_11.mp4')\n",
     "behavior_analysis.gui_manager.add_roi_from_video_selection()"
    ]
   },
@@ -174,9 +174,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "amadeusgpt-minimal",
+   "display_name": "amadeusgpt-cpu",
    "language": "python",
-   "name": "python3"
+   "name": "amadeusgpt-cpu"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/notebooks/Horse_demo.ipynb b/notebooks/Horse_demo.ipynb
@@ -42,6 +42,10 @@
     "\n",
     "kwargs = {   \n",
     "    \"video_info.scene_frame_number\" : scene_frame_number,\n",
+    "    \"llm_info\": {\n",
+    "                \"gpt_model\": \"gpt-4o\",\n",
+    "    }\n",
+    "\n",
     "}\n",
     "\n",
     "config = create_project(data_folder = \"../examples/Horse\",\n",
@@ -61,7 +65,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "behavior_analysis = amadeus.get_behavior_analysis('/Users/shaokaiye/AmadeusGPT-dev/examples/Horse/BrownHorseinShadow.mp4')\n",
+    "behavior_analysis = amadeus.get_behavior_analysis(video_file_path = '../examples/Horse/BrownHorseinShadow.mp4')\n",
     "scene_image = behavior_analysis.visual_manager.get_scene_image()\n",
     "plt.imshow(scene_image)"
    ]
@@ -84,7 +88,11 @@
    "id": "e394c4e0",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "query = \"\"\" make an animation of the horse keypoints over time. Overlap the image frame on it. Save the animation on the disk. \"\"\"\n",
+    "qa_message = amadeus.step(query)\n",
+    "qa_message = parse_result(amadeus, qa_message)"
+   ]
   }
  ],
  "metadata": {
diff --git a/notebooks/MABe_demo.ipynb b/notebooks/MABe_demo.ipynb
@@ -64,7 +64,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "behavior_analysis = amadeus.get_behavior_analysis('/Users/shaokaiye/AmadeusGPT-dev/examples/MABe/EGS8X2MN4SSUGFWAV976.mp4')\n",
+    "behavior_analysis = amadeus.get_behavior_analysis(video_file_path='../examples/MABe/EGS8X2MN4SSUGFWAV976.mp4',\n",
+    "                                                  keypoint_file_path='../examples/MABe/EGS8X2MN4SSUGFWAV976.h5')\n",
     "scene_image = behavior_analysis.visual_manager.get_scene_image()\n",
     "plt.imshow(scene_image)"
    ]
diff --git a/notebooks/MausHaus_demo.ipynb b/notebooks/MausHaus_demo.ipynb
@@ -65,7 +65,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "behavior_analysis = amadeus.get_behavior_analysis('/Users/shaokaiye/AmadeusGPT-dev/examples/MausHaus/maushaus_trimmed.mp4')\n",
+    "behavior_analysis = amadeus.get_behavior_analysis(video_file_path='../examples/MausHaus/maushaus_trimmed.mp4',\n",
+    "                                                  keypoint_file_path='../examples/MausHaus/maushaus_trimmed.h5')\n",
+    "\n",
     "behavior_analysis.gui_manager.add_roi_from_video_selection()"
    ]
   },
diff --git a/notebooks/YourData.ipynb b/notebooks/YourData.ipynb
@@ -21,12 +21,8 @@
    "outputs": [],
    "source": [
     "from amadeusgpt import AMADEUS\n",
-    "from amadeusgpt.config import Config\n",
     "from amadeusgpt.utils import parse_result\n",
-    "import amadeusgpt\n",
-    "from amadeusgpt import create_project\n",
-    "import matplotlib.pyplot as plt\n",
-    "import cv2"
+    "from amadeusgpt import create_project"
    ]
   },
   {
@@ -35,7 +31,7 @@
    "metadata": {},
    "source": [
     "### Note that unlike other notebooks, we don't have keypoint_file_path here (as it's not provided)\n",
-    "### By default, we use gpt-4o to determine which SuperAnimal models to run and it will run SuperAnimal in the first time the keypoints related queries are asked\n",
+    "### By default, we use gpt-4o to determine which SuperAnimal models to run and it will run SuperAnimal in the first time the keypoints related queries are asked. Note to use superanimal, you will need to install the newest DeepLabCut.\n",
     "### Make sure you use a short video clips if you are not using GPUs in Linux (Mac silicon support to be added)"
    ]
   },
@@ -46,16 +42,44 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "scene_frame_number = 400\n",
-    "\n",
     "# where you store you video and (optionally) keypoint files\n",
-    "data_folder = \"temp_data_folder\"\n",
+    "# If you don't have keypoint files, we would try to run SuperAnimal on your video\n",
+    "# If you have pair of video and keypoint files, make sure they follow the naming convention as following:\n",
+    "\n",
+    "# your_folder\n",
+    "#   - cat.mp4\n",
+    "#   - cat.h5 (DLC output)\n",
+    "#   - dog.mp4\n",
+    "#   - dog.h5 (DLC output)\n",
+    "\n",
+    "data_folder = \"../examples/Horse\"\n",
     "result_folder = \"temp_result_folder\"\n",
     "video_suffix = \".mp4\"\n",
     "\n",
-    "config = create_project(data_folder, result_folder, video_suffix = video_suffix)\n",
+    "# if you want to overwrite the default config, you can do it here\n",
+    "kwargs = {\n",
+    "        \"data_info\": {\n",
+    "            \"data_folder\": data_folder,\n",
+    "            \"result_folder\": result_folder,\n",
+    "            # can only locate videos specified in video_suffix\n",
+    "            \"video_suffix\": \".mp4\",\n",
+    "        },\n",
+    "        \n",
+    "        \"llm_info\": {\"max_tokens\": 4096, \n",
+    "                     \"temperature\": 0.0, \n",
+    "                     # one can swtich this to gpt-4o-mini for cheaper inference with the cost of worse performance.\n",
+    "                     \"gpt_model\": \"gpt-4o\",\n",
+    "                     # We only keep conversation history of 2. You can make it longer with more cost. We are switching to a different form of long-term memory.\n",
+    "                     \"keep_last_n_messages\": 2},\n",
+    "        \"keypoint_info\": {\n",
+    "            # only set True if you work with 3D keypoint \n",
+    "            \"use_3d\": False,\n",
+    "        },\n",
+    "        # this is the frame index for gpt-4o to match the right superanimal model.\n",
+    "        \"video_info\": {\"scene_frame_number\": 1},\n",
+    "    }\n",
     "\n",
-    "config[\"scene_frame_number\"] = scene_frame_number\n",
+    "config = create_project(data_folder, result_folder, video_suffix = video_suffix, **kwargs)\n",
     "\n",
     "amadeus = AMADEUS(config, use_vlm = True)\n",
     "video_file_paths = amadeus.get_video_file_paths()\n",
@@ -89,9 +113,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "amadeusgpt-cpu",
+   "display_name": "amadeusgpt-minimal",
    "language": "python",
-   "name": "amadeusgpt-cpu"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/tests/test_3d.py b/tests/test_3d.py
@@ -30,4 +30,4 @@ def test_3d_maushaus():
 
     qa_message = amadeus.step(query)
 
-    parse_result(amadeus, qa_message, use_ipython=False)
+    parse_result(amadeus, qa_message, use_ipython=False)

Original file line number	Diff line number	Diff line change
`@@ -30,4 +30,4 @@ def test_3d_maushaus():`
`30`	`30`
`31`	`31`	`qa_message = amadeus.step(query)`
`32`	`32`
`33`		`- parse_result(amadeus, qa_message, use_ipython=False)`
	`33`	`+ parse_result(amadeus, qa_message, use_ipython=False)`