[ADD] Reading GCloud bucket from main.py

BenCretois · BenCretois · commit 2d51ac90af05 · 2023-10-03T16:16:54.000+02:00
diff --git a/cloud_analysis/README.md b/cloud_analysis/README.md
@@ -4,6 +4,28 @@
 
 - If lost, there is a button `console` on the upper right corner that brings us back to the projects
 
+## Create service account with access to the Google Cloud bucket
+
+First create a new service account:
+
+```
+IAM and Admin -> Service Account -> Create Service Account
+```
+
+Then you need to change the permission for **Storage Object Viewer** so that the service account can access the cloud bucket.
+
+Then create a **Key** that will act as the `GOOGLE_APPLICATION_CREDENTIALS`, an environment variable that authentificate a user for accessing the Google Cloud Bucket (see `main.py/fetch_audio_data`).
+
+Copy/Paste the `.json` file created from the key and copy it in a file called `g_application_credentials.json`. This will be access in the `main.py/fetch_audio_data`:
+
+```
+    credentials = service_account.Credentials.from_service_account_file(
+        '/app/cloud_analysis/g_application_credentials.json'
+)
+
+    storage_client = storage.Client(credentials=credentials)
+```
+
 ## Create the Docker image for Cloud analysis
 
 The Docker image used for the cloud analysis is slightly different:
diff --git a/cloud_analysis/main.py b/cloud_analysis/main.py
@@ -19,6 +19,9 @@
 from predict import initModel
 from utils.utils import AudioList
 
+from google.cloud import storage
+import io
+
 import logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
@@ -55,16 +58,68 @@ def send_email(subject, body):
         logging.error(f"Error sending email: {e}")
 
 
+from pydub import AudioSegment
+import io
+
+def convert_mp3_to_wav(mp3_file_object):
+    # Load MP3 file from file object
+    audio = AudioSegment.from_file(mp3_file_object, format="mp3")
+    
+    # Convert to WAV
+    wav_file_object = io.BytesIO()
+    audio.export(wav_file_object, format="wav")
+    wav_file_object.seek(0)  # Move file pointer to the start
+    
+    return wav_file_object
+
+
+def fetch_audio_data(bucket_name, blob_name):
+    """
+    Fetches audio data from Google Cloud Storage.
+
+    Parameters:
+        bucket_name (str): The name of the GCS bucket.
+        blob_name (str): The name of the blob (file) in the GCS bucket.
+
+    Returns:
+        BytesIO: An in-memory file object of the audio data.
+    """
+    # Create a GCS client
+    from google.oauth2 import service_account
+    import google.auth
+
+    credentials = service_account.Credentials.from_service_account_file(
+        '/app/cloud_analysis/g_application_credentials.json'
+)
+
+    storage_client = storage.Client(credentials=credentials)
+
+    # Get the GCS bucket and blob
+    bucket = storage_client.get_bucket(bucket_name)
+    blob = bucket.blob(blob_name)
+
+    # Download the file into an in-memory file object
+    audio_file_object = io.BytesIO()
+    blob.download_to_file(audio_file_object)
+    audio_file_object.seek(0)  # Move file pointer to the start
+
+    # Convert MP3 to WAV
+    wav_file_object = convert_mp3_to_wav(audio_file_object)
+    
+    return wav_file_object
+
+
 def analyseAudioFile(
-        audio_file_path, batch_size=1, num_workers=4, min_hr = 0.1, min_conf = 0.99
+        audio_file_object, batch_size=1, num_workers=4, min_hr = 0.1, min_conf = 0.99
 ):
+
     # Initiate model
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model_path = "/app/audioclip/assets/snowmobile_model.pth"
     model = initModel(model_path=model_path, device=device)
 
     # Run the predictions
-    list_preds = AudioList().get_processed_list(audio_file_path)
+    list_preds = AudioList().get_processed_list(audio_file_object)
     predLoader = DataLoader(list_preds, batch_size=batch_size, num_workers=num_workers, pin_memory=False)
     prob_audioclip_array, hr_array = predict(predLoader, model, device)
 
@@ -93,15 +148,14 @@ def analyseAudioFile(
 
     return results
 
-def on_process_audio(
-        audio_id: str, audio_rec: dict, audio_file_path: str
-):
+def on_process_audio(audio_id: str, audio_rec: dict, bucket_name: str, blob_name: str):
     
     print(f"PROCESSING audioId={audio_id}")
     location = audio_rec["location"]
 
     # A call out to your code here. Optionally we can pass on the recorder coordinates 
-    results = analyseAudioFile(audio_file_path)
+    audio_file_object = fetch_audio_data(bucket_name, blob_name)
+    results = analyseAudioFile(audio_file_object)
     # The object results is a list containing detections in the form:
     # [start, end, confidence, harmonic ratio]
 
@@ -130,15 +184,17 @@ def on_process_audio(
 
 @app.route('/process-audio', methods=['POST'])
 def process_audio_endpoint():
-    audio_file_path = request.json['audio_file_path']
-    audio_id = request.json['audio_id']
-    audio_rec = request.json['audio_rec']
+    data = request.json
+    bucket_name = data['bucket_name']
+    blob_name = data['blob_name']
+    audio_id = data['audio_id']
+    audio_rec = data['audio_rec']
     
-    detection_count = on_process_audio(audio_id, audio_rec, audio_file_path)
+    results = on_process_audio(audio_id, audio_rec, bucket_name, blob_name)
+
+    if results > 0:
+        send_email("Snowmobile Detection Alert", f"{results} snowmobile detections were made in the audio file!")
 
-    if detection_count > 0:
-        send_email("Snowmobile Detection Alert", f"{detection_count} snowmobile detections were made in the audio file!")
-    
     return jsonify({"message": "Audio processing completed!"})
 
 
diff --git a/cloud_analysis/test_cloud.sh b/cloud_analysis/test_cloud.sh
@@ -7,4 +7,3 @@ curl -X POST \
      https://model-4uhtnq5xla-lz.a.run.app/process-audio
 
 
-# /home/benjamin.cretois/data/snowmobile/example_audio.mp3
diff --git a/cloud_analysis/test_local.sh b/cloud_analysis/test_local.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 
-curl -X POST -H "Content-Type: application/json" \
-    -d '{"audio_file_path": "/app/example/example_audio.mp3", "audio_id": "test-id", "audio_rec": {"location": {"latitude": 0, "longitude": 0}}}' \
-    http://localhost:8080/process-audio
+curl -X POST \
+     -H "Content-Type: application/json" \
+     -d '{"audio_id": "test-id", "audio_rec": {"location": {"latitude": 0, "longitude": 0}}, "bucket_name": "snoskuter-detector-test", "blob_name": "example_audio.mp3"}' \
+     http://localhost:8080/process-audio

Original file line number	Diff line number	Diff line change
`@@ -7,4 +7,3 @@ curl -X POST \`
`7`	`7`	`https://model-4uhtnq5xla-lz.a.run.app/process-audio`
`8`	`8`
`9`	`9`
`10`		`-# /home/benjamin.cretois/data/snowmobile/example_audio.mp3`