Skip to content

Commit 2d51ac9

Browse files
committed
[ADD] Reading GCloud bucket from main.py
1 parent 545959b commit 2d51ac9

File tree

4 files changed

+95
-17
lines changed

4 files changed

+95
-17
lines changed

cloud_analysis/README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,28 @@
44

55
- If lost, there is a button `console` on the upper right corner that brings us back to the projects
66

7+
## Create service account with access to the Google Cloud bucket
8+
9+
First create a new service account:
10+
11+
```
12+
IAM and Admin -> Service Account -> Create Service Account
13+
```
14+
15+
Then you need to change the permission for **Storage Object Viewer** so that the service account can access the cloud bucket.
16+
17+
Then create a **Key** that will act as the `GOOGLE_APPLICATION_CREDENTIALS`, an environment variable that authentificate a user for accessing the Google Cloud Bucket (see `main.py/fetch_audio_data`).
18+
19+
Copy/Paste the `.json` file created from the key and copy it in a file called `g_application_credentials.json`. This will be access in the `main.py/fetch_audio_data`:
20+
21+
```
22+
credentials = service_account.Credentials.from_service_account_file(
23+
'/app/cloud_analysis/g_application_credentials.json'
24+
)
25+
26+
storage_client = storage.Client(credentials=credentials)
27+
```
28+
729
## Create the Docker image for Cloud analysis
830

931
The Docker image used for the cloud analysis is slightly different:

cloud_analysis/main.py

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from predict import initModel
2020
from utils.utils import AudioList
2121

22+
from google.cloud import storage
23+
import io
24+
2225
import logging
2326
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
2427

@@ -55,16 +58,68 @@ def send_email(subject, body):
5558
logging.error(f"Error sending email: {e}")
5659

5760

61+
from pydub import AudioSegment
62+
import io
63+
64+
def convert_mp3_to_wav(mp3_file_object):
65+
# Load MP3 file from file object
66+
audio = AudioSegment.from_file(mp3_file_object, format="mp3")
67+
68+
# Convert to WAV
69+
wav_file_object = io.BytesIO()
70+
audio.export(wav_file_object, format="wav")
71+
wav_file_object.seek(0) # Move file pointer to the start
72+
73+
return wav_file_object
74+
75+
76+
def fetch_audio_data(bucket_name, blob_name):
77+
"""
78+
Fetches audio data from Google Cloud Storage.
79+
80+
Parameters:
81+
bucket_name (str): The name of the GCS bucket.
82+
blob_name (str): The name of the blob (file) in the GCS bucket.
83+
84+
Returns:
85+
BytesIO: An in-memory file object of the audio data.
86+
"""
87+
# Create a GCS client
88+
from google.oauth2 import service_account
89+
import google.auth
90+
91+
credentials = service_account.Credentials.from_service_account_file(
92+
'/app/cloud_analysis/g_application_credentials.json'
93+
)
94+
95+
storage_client = storage.Client(credentials=credentials)
96+
97+
# Get the GCS bucket and blob
98+
bucket = storage_client.get_bucket(bucket_name)
99+
blob = bucket.blob(blob_name)
100+
101+
# Download the file into an in-memory file object
102+
audio_file_object = io.BytesIO()
103+
blob.download_to_file(audio_file_object)
104+
audio_file_object.seek(0) # Move file pointer to the start
105+
106+
# Convert MP3 to WAV
107+
wav_file_object = convert_mp3_to_wav(audio_file_object)
108+
109+
return wav_file_object
110+
111+
58112
def analyseAudioFile(
59-
audio_file_path, batch_size=1, num_workers=4, min_hr = 0.1, min_conf = 0.99
113+
audio_file_object, batch_size=1, num_workers=4, min_hr = 0.1, min_conf = 0.99
60114
):
115+
61116
# Initiate model
62117
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
63118
model_path = "/app/audioclip/assets/snowmobile_model.pth"
64119
model = initModel(model_path=model_path, device=device)
65120

66121
# Run the predictions
67-
list_preds = AudioList().get_processed_list(audio_file_path)
122+
list_preds = AudioList().get_processed_list(audio_file_object)
68123
predLoader = DataLoader(list_preds, batch_size=batch_size, num_workers=num_workers, pin_memory=False)
69124
prob_audioclip_array, hr_array = predict(predLoader, model, device)
70125

@@ -93,15 +148,14 @@ def analyseAudioFile(
93148

94149
return results
95150

96-
def on_process_audio(
97-
audio_id: str, audio_rec: dict, audio_file_path: str
98-
):
151+
def on_process_audio(audio_id: str, audio_rec: dict, bucket_name: str, blob_name: str):
99152

100153
print(f"PROCESSING audioId={audio_id}")
101154
location = audio_rec["location"]
102155

103156
# A call out to your code here. Optionally we can pass on the recorder coordinates
104-
results = analyseAudioFile(audio_file_path)
157+
audio_file_object = fetch_audio_data(bucket_name, blob_name)
158+
results = analyseAudioFile(audio_file_object)
105159
# The object results is a list containing detections in the form:
106160
# [start, end, confidence, harmonic ratio]
107161

@@ -130,15 +184,17 @@ def on_process_audio(
130184

131185
@app.route('/process-audio', methods=['POST'])
132186
def process_audio_endpoint():
133-
audio_file_path = request.json['audio_file_path']
134-
audio_id = request.json['audio_id']
135-
audio_rec = request.json['audio_rec']
187+
data = request.json
188+
bucket_name = data['bucket_name']
189+
blob_name = data['blob_name']
190+
audio_id = data['audio_id']
191+
audio_rec = data['audio_rec']
136192

137-
detection_count = on_process_audio(audio_id, audio_rec, audio_file_path)
193+
results = on_process_audio(audio_id, audio_rec, bucket_name, blob_name)
194+
195+
if results > 0:
196+
send_email("Snowmobile Detection Alert", f"{results} snowmobile detections were made in the audio file!")
138197

139-
if detection_count > 0:
140-
send_email("Snowmobile Detection Alert", f"{detection_count} snowmobile detections were made in the audio file!")
141-
142198
return jsonify({"message": "Audio processing completed!"})
143199

144200

cloud_analysis/test_cloud.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,3 @@ curl -X POST \
77
https://model-4uhtnq5xla-lz.a.run.app/process-audio
88

99

10-
# /home/benjamin.cretois/data/snowmobile/example_audio.mp3

cloud_analysis/test_local.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
22

3-
curl -X POST -H "Content-Type: application/json" \
4-
-d '{"audio_file_path": "/app/example/example_audio.mp3", "audio_id": "test-id", "audio_rec": {"location": {"latitude": 0, "longitude": 0}}}' \
5-
http://localhost:8080/process-audio
3+
curl -X POST \
4+
-H "Content-Type: application/json" \
5+
-d '{"audio_id": "test-id", "audio_rec": {"location": {"latitude": 0, "longitude": 0}}, "bucket_name": "snoskuter-detector-test", "blob_name": "example_audio.mp3"}' \
6+
http://localhost:8080/process-audio

0 commit comments

Comments
 (0)