update the end of file char

MSCetin37 · MSCetin37 · commit bba84042027b · 2024-11-08T12:05:35.000-08:00
Signed-off-by: Mustafa &lt;mustafa.cetin@intel.com&gt;
diff --git a/.github/workflows/docker/compose/dataprep-compose-cd.yaml b/.github/workflows/docker/compose/dataprep-compose-cd.yaml
@@ -42,4 +42,4 @@ services:
   dataprep-audio2text:
     build:
       dockerfile: comps/dataprep/multimedia2text/video2audio/Dockerfile
-    image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
diff --git a/comps/dataprep/multimedia2text/README.md b/comps/dataprep/multimedia2text/README.md
@@ -127,7 +127,7 @@ python comps/dataprep/multimedia2text/audio2text/check_a2t_server.py
 Expected output:
 
 ```
-{'downstream_black_list': [], 'id': '21b0459477abea6d85d20f4b5ddcb714', 'query': 'who is pat gelsinger'}
+Test passed successfully!
 ```
 
 *Note: The `id` value will be different.*
@@ -217,8 +217,4 @@ To stop and remove the Docker containers and images associated with the multimed
 
    ```bash
    docker image prune -a
-   ```
-
-
-
-
+   ```
diff --git a/comps/dataprep/multimedia2text/audio2text/Dockerfile b/comps/dataprep/multimedia2text/audio2text/Dockerfile
@@ -34,4 +34,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user
 WORKDIR /home/user/comps/dataprep/multimedia2text/audio2text
 
 # Define the entry point for the container
-ENTRYPOINT ["python", "audio2text.py"]
+ENTRYPOINT ["python", "audio2text.py"]
diff --git a/comps/dataprep/multimedia2text/audio2text/audio2text.py b/comps/dataprep/multimedia2text/audio2text/audio2text.py
@@ -85,4 +85,4 @@ async def audio_to_text(audio: Base64ByteStrDoc):
         
     except Exception as e:
         logger.error(f"Failed to start the microservice: {e}")
-        raise
+        raise
diff --git a/comps/dataprep/multimedia2text/audio2text/check_a2t_server.py b/comps/dataprep/multimedia2text/audio2text/check_a2t_server.py
@@ -5,29 +5,81 @@
 import json
 import os
 import requests
-import uuid
-import urllib.request
+import argparse
 
+# Get the root folder of the current script
+root_folder = os.path.dirname(os.path.abspath(__file__))
 
-uid = str(uuid.uuid4())
-file_name = uid + ".wav"
+def audio_to_text(path_to_audio):
+    """
+    Convert an audio file to text by sending a request to the server.
+    
+    Args:
+        path_to_audio (str): Path to the audio file.
+    
+    Returns:
+        str: The transcribed text.
+    """
+    file_name = os.path.join(root_folder, path_to_audio)
 
-urllib.request.urlretrieve(
-    "https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav",
-    file_name,
-)
+    # Read the audio file and encode it in base64
+    with open(file_name, "rb") as f:
+        audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
+        
+    endpoint = "http://localhost:9099/v1/audio/transcriptions"
+    inputs = {"byte_str": audio_base64_str}
+      
+    # Send the POST request to the server
+    response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
+    
+    # Check if the request was successful
+    response.raise_for_status()
+    
+    # Return the transcribed text
+    return response.json()['query']
 
-# Read and encode the audio file in base64
-with open(file_name, "rb") as f:
-    test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
-os.remove(file_name)
+def check_response(response):
+    """
+    Check the response from the server and print the result.
+    
+    Args:
+        response (str): The transcribed text from the server.
+    """
+    expected_response = "well"
+    assert response == expected_response, f"Expected '{expected_response}', but got '{response}'"
+    print("Test passed successfully!")
 
-# Define the endpoint and the input data
-endpoint = "http://localhost:9099/v1/audio/transcriptions"
-inputs = {"byte_str": test_audio_base64_str}
+def read_config():
+    """
+    Read the configuration parameters from the input file.
+    
+    Returns:
+        argparse.Namespace: Parsed arguments.
+    """
+    # Create an argument parser
+    parser = argparse.ArgumentParser(description="Process configuration parameters.")
+    
+    # Add argument for the audio file path
+    parser.add_argument(
+        "--path_to_audio",
+        help="Location of the audio file that will be converted to text.",
+        required=False,
+        default=os.path.join(root_folder, "../data/intel_short.wav")
+    )
+    
+    # Parse the arguments
+    args = parser.parse_args()
+    
+    # Return the parsed arguments
+    return args
 
-# Send the POST request to the endpoint
-response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
-
-# Print the response from the server
-print(response.json())
+if __name__ == "__main__":
+    # Read the configuration parameters
+    args = read_config()
+    
+    # Convert audio to text
+    response = audio_to_text(args.path_to_audio)
+    
+    # Check the response
+    check_response(response)
+    
diff --git a/comps/dataprep/multimedia2text/check_multimedia2text.py b/comps/dataprep/multimedia2text/check_multimedia2text.py
@@ -142,4 +142,4 @@ def test_multimedia2text_data():
         test_multimedia2text_data()
         
     except AssertionError as e:
-        print(f"Test failed: {e}")
+        print(f"Test failed: {e}")
diff --git a/comps/dataprep/multimedia2text/data/README.md b/comps/dataprep/multimedia2text/data/README.md
@@ -0,0 +1,31 @@
+# Test Data for Document Summarization
+
+## Overview
+
+This document provides information about the test data used for the Document Summarization application.
+
+## Source of Test Data
+
+The data used for testing originated from the following video:
+
+[YouTube Video](https://www.youtube.com/watch?v=HUpnCtJRTg4)
+
+## Description of Test Data
+
+1. **Video File**: We extracted a 1-second segment from the above video and saved it as `intel_short.mp4`.
+2. **Audio File**: The audio was extracted from the `intel_short.mp4` video file and saved as `intel_short.wav`.
+
+These files are used to test the functionality of the Document Summarization application, including the conversion of multimedia content to text.
+
+## Files
+
+- `intel_short.mp4`: A 1-second video segment extracted from the YouTube video.
+- `intel_short.wav`: An audio file converted from the `intel_short.mp4` video file.
+
+## Usage
+
+These files can be used to validate the multimedia-to-text services provided by the Document Summarization application. Ensure that the files are placed in the appropriate directory as specified in the application's configuration.
+
+## License
+
+The original video content is subject to the terms and conditions of YouTube and the content creator. The extracted segments are used solely for testing and validation purposes.
diff --git a/comps/dataprep/multimedia2text/multimedia2text.py b/comps/dataprep/multimedia2text/multimedia2text.py
@@ -93,4 +93,4 @@ async def audio_to_text(input: DocSumDoc):
 
     except Exception as e:
         logger.error(f"Failed to start the multimedia2text microservice: {e}")
-        raise
+        raise
diff --git a/comps/dataprep/multimedia2text/video2audio/check_v2a_microserver.py b/comps/dataprep/multimedia2text/video2audio/check_v2a_microserver.py
@@ -87,4 +87,4 @@ def read_config():
         
     print("========= Audio file saved as ======")
     print(args.path_to_audio)
-    print("====================================")
+    print("====================================")
diff --git a/comps/dataprep/multimedia2text/video2audio/video2audio.py b/comps/dataprep/multimedia2text/video2audio/video2audio.py
@@ -87,4 +87,4 @@ def convert_video_to_audio_base64(self, video_file_name):
         # Convert the audio file to a base64 encoded string
         base64_str = self.convert_base64(audio_file_name)
         
-        return base64_str
+        return base64_str
diff --git a/comps/dataprep/multimedia2text/video2audio/video2audio_microservice.py b/comps/dataprep/multimedia2text/video2audio/video2audio_microservice.py
@@ -84,4 +84,4 @@ async def audio_to_text(request: Base64ByteStrDoc):
         
     except Exception as e:
         logger.error(f"Failed to start the microservice: {e}")
-        raise
+        raise