Skip to content

Commit

Permalink
[ADD] Reading GCloud bucket from main.py
Browse files Browse the repository at this point in the history
  • Loading branch information
BenCretois committed Oct 3, 2023
1 parent 545959b commit 2d51ac9
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 17 deletions.
22 changes: 22 additions & 0 deletions cloud_analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,28 @@

- If lost, there is a button `console` on the upper right corner that brings us back to the projects

## Create service account with access to the Google Cloud bucket

First create a new service account:

```
IAM and Admin -> Service Account -> Create Service Account
```

Then you need to change the permission for **Storage Object Viewer** so that the service account can access the cloud bucket.

Then create a **Key** that will act as the `GOOGLE_APPLICATION_CREDENTIALS`, an environment variable that authentificate a user for accessing the Google Cloud Bucket (see `main.py/fetch_audio_data`).

Copy/Paste the `.json` file created from the key and copy it in a file called `g_application_credentials.json`. This will be access in the `main.py/fetch_audio_data`:

```
credentials = service_account.Credentials.from_service_account_file(
'/app/cloud_analysis/g_application_credentials.json'
)
storage_client = storage.Client(credentials=credentials)
```

## Create the Docker image for Cloud analysis

The Docker image used for the cloud analysis is slightly different:
Expand Down
82 changes: 69 additions & 13 deletions cloud_analysis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from predict import initModel
from utils.utils import AudioList

from google.cloud import storage
import io

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

Expand Down Expand Up @@ -55,16 +58,68 @@ def send_email(subject, body):
logging.error(f"Error sending email: {e}")


from pydub import AudioSegment
import io

def convert_mp3_to_wav(mp3_file_object):
# Load MP3 file from file object
audio = AudioSegment.from_file(mp3_file_object, format="mp3")

# Convert to WAV
wav_file_object = io.BytesIO()
audio.export(wav_file_object, format="wav")
wav_file_object.seek(0) # Move file pointer to the start

return wav_file_object


def fetch_audio_data(bucket_name, blob_name):
"""
Fetches audio data from Google Cloud Storage.
Parameters:
bucket_name (str): The name of the GCS bucket.
blob_name (str): The name of the blob (file) in the GCS bucket.
Returns:
BytesIO: An in-memory file object of the audio data.
"""
# Create a GCS client
from google.oauth2 import service_account
import google.auth

credentials = service_account.Credentials.from_service_account_file(
'/app/cloud_analysis/g_application_credentials.json'
)

storage_client = storage.Client(credentials=credentials)

# Get the GCS bucket and blob
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(blob_name)

# Download the file into an in-memory file object
audio_file_object = io.BytesIO()
blob.download_to_file(audio_file_object)
audio_file_object.seek(0) # Move file pointer to the start

# Convert MP3 to WAV
wav_file_object = convert_mp3_to_wav(audio_file_object)

return wav_file_object


def analyseAudioFile(
audio_file_path, batch_size=1, num_workers=4, min_hr = 0.1, min_conf = 0.99
audio_file_object, batch_size=1, num_workers=4, min_hr = 0.1, min_conf = 0.99
):

# Initiate model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "/app/audioclip/assets/snowmobile_model.pth"
model = initModel(model_path=model_path, device=device)

# Run the predictions
list_preds = AudioList().get_processed_list(audio_file_path)
list_preds = AudioList().get_processed_list(audio_file_object)
predLoader = DataLoader(list_preds, batch_size=batch_size, num_workers=num_workers, pin_memory=False)
prob_audioclip_array, hr_array = predict(predLoader, model, device)

Expand Down Expand Up @@ -93,15 +148,14 @@ def analyseAudioFile(

return results

def on_process_audio(
audio_id: str, audio_rec: dict, audio_file_path: str
):
def on_process_audio(audio_id: str, audio_rec: dict, bucket_name: str, blob_name: str):

print(f"PROCESSING audioId={audio_id}")
location = audio_rec["location"]

# A call out to your code here. Optionally we can pass on the recorder coordinates
results = analyseAudioFile(audio_file_path)
audio_file_object = fetch_audio_data(bucket_name, blob_name)
results = analyseAudioFile(audio_file_object)
# The object results is a list containing detections in the form:
# [start, end, confidence, harmonic ratio]

Expand Down Expand Up @@ -130,15 +184,17 @@ def on_process_audio(

@app.route('/process-audio', methods=['POST'])
def process_audio_endpoint():
audio_file_path = request.json['audio_file_path']
audio_id = request.json['audio_id']
audio_rec = request.json['audio_rec']
data = request.json
bucket_name = data['bucket_name']
blob_name = data['blob_name']
audio_id = data['audio_id']
audio_rec = data['audio_rec']

detection_count = on_process_audio(audio_id, audio_rec, audio_file_path)
results = on_process_audio(audio_id, audio_rec, bucket_name, blob_name)

if results > 0:
send_email("Snowmobile Detection Alert", f"{results} snowmobile detections were made in the audio file!")

if detection_count > 0:
send_email("Snowmobile Detection Alert", f"{detection_count} snowmobile detections were made in the audio file!")

return jsonify({"message": "Audio processing completed!"})


Expand Down
1 change: 0 additions & 1 deletion cloud_analysis/test_cloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ curl -X POST \
https://model-4uhtnq5xla-lz.a.run.app/process-audio


# /home/benjamin.cretois/data/snowmobile/example_audio.mp3
7 changes: 4 additions & 3 deletions cloud_analysis/test_local.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash

curl -X POST -H "Content-Type: application/json" \
-d '{"audio_file_path": "/app/example/example_audio.mp3", "audio_id": "test-id", "audio_rec": {"location": {"latitude": 0, "longitude": 0}}}' \
http://localhost:8080/process-audio
curl -X POST \
-H "Content-Type: application/json" \
-d '{"audio_id": "test-id", "audio_rec": {"location": {"latitude": 0, "longitude": 0}}, "bucket_name": "snoskuter-detector-test", "blob_name": "example_audio.mp3"}' \
http://localhost:8080/process-audio

0 comments on commit 2d51ac9

Please sign in to comment.