You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hey folks,
I have a very basic question here. I've looked through the tutorials and the scripts and haven't found then answer, so if it's already listed elsewhere apologies.
I'm working on marine mammal classifiers using birdnet embeddings and I've been able to train a custom model in the GUI and it works pretty well. I've also written custom scripts to use the model to predict on audio files to that I can build precision recall curves and confusion matrices.
Unfortunately, I've not been able to figure out how to do that using the pre-written package functions. I'm reasonably ok at Python, I can write and read objects, but I don't see where I can point to a tflite and species list as well as the input parameters I used to train, e.g. limited to 8khz. Real stupid question here but how to I point to my pre-trained model using model.load_model() when it doesn't have an input location? Below is what I've written to process my data with my custom model but it still does't take into account the spectral parameters.
classBirdNetPredictor:
def__init__(self, model_path, label_path, audio_folder,
sample_rate=48000, audio_duration=3.0, confidence_thresh=0.5):
""" Processor class for runing tflite (e.g. birdnet) models on audio files in a folder Parameters: model_path (str): Path to the TensorFlow Lite model. label_path (str): Path to the label file (text file with one label per line). audio_folder (str): Path to the folder containing audio files. sample_rate (int): Sample rate for the model (default 48000). audio_duration (float): Duration in seconds of audio to classify (default 2.0). # Example usage model_path = "C:\\Users\\kaity\\Documents\\GitHub\\Ecotype\\Experiments\\BirdNET\\Bckrnd_mn_srkw_tkw_offshore_TKW_balanced_4k\\CustomClassifier_100_calls_Balanced_calltypes.tflite" label_path = "C:\\Users\\kaity\\Documents\\GitHub\\Ecotype\\Experiments\\BirdNET\\Bckrnd_mn_srkw_tkw_offshore_TKW_balanced_4k\\CustomClassifier_100_calls_Balanced_calltypes_Labels.txt" audio_folder = 'C:\\TempData\\DCLDE_EVAL\\SMRU\\Audio\\SMRU_test\\' processor = BirdNetPredictor(model_path, label_path, audio_folder) # Batch process audio files in folder and export to CSV output_csv = "predictions_output.csv" df_SMRU = processor.batch_process_audio_folder(output_csv) processor.export_to_raven(df_SMRU, raven_file="raven_output.txt") """self.model_path=model_pathself.label_path=label_pathself.audio_folder=audio_folderself.sample_rate=sample_rateself.audio_duration=audio_duration# Load model and labelsself.interpreter=tflite.Interpreter(model_path=model_path)
self.interpreter.allocate_tensors()
self.input_details=self.interpreter.get_input_details()
self.output_details=self.interpreter.get_output_details()
# Confidence thresholdself.confidence_thresh=confidence_thresh# Extract expected sample rate and duration from model input shapeinput_shape=self.input_details[0]['shape']
self.audio_duration=input_shape[1] /self.sample_rateprint(f"Model expects {self.audio_duration} seconds of audio at {self.sample_rate} Hz")
# Load labelsself.labels=self.load_labels(label_path)
defload_labels(self, label_path):
"""Load class labels from a text file (one label per line)."""withopen(label_path, "r") asf:
return [line.strip() forlineinf.readlines()]
defpreprocess_audio(self, audio, sr, target_sr=48000, duration=3.0):
"""Resample, trim/pad, and format audio to match model input."""ifsr!=target_sr:
audio=librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
# Calculate the required length in samples (144,000 for a 3-second audio at 48 kHz)required_length=144000# Change this to 144000 for the 3-second audio duration# Zero padding if the segment is shorter than requirediflen(audio) <required_length:
padding=required_length-len(audio)
audio=np.pad(audio, (0, padding), mode='constant', constant_values=0)
else:
# Trim the audio to the exact required length if it's longeraudio=audio[:required_length]
returnnp.expand_dims(audio.astype(np.float32), axis=0)
defpredict_segment(self, audio_segment):
"""Run inference on a single preprocessed audio segment."""self.interpreter.set_tensor(self.input_details[0]['index'], audio_segment)
self.interpreter.invoke()
predictions=self.interpreter.get_tensor(self.output_details[0]['index'])[0]
returnpredictionsdefpredict_long_audio(self, audio_path):
"""Split a long audio file into fixed-length chunks and classify each segment."""y, sr=librosa.load(audio_path, sr=None) # Load with native sample ratesegment_length=int(sr*self.audio_duration) # Samples per segmentnum_segments=int(np.ceil(len(y) /segment_length)) # Number of chunksresults= []
foriinrange(num_segments):
start_sample=i*segment_lengthend_sample=min((i+1) *segment_length, len(y))
segment=y[start_sample:end_sample]
# Preprocess segmentprocessed_segment=self.preprocess_audio(segment, sr=sr,
target_sr=self.sample_rate)
# Predict (logit output)predictions=self.predict_segment(processed_segment)
# Convert logits to BirdNET confidence scores using sigmoidconfidence_scores=scipy.special.expit(predictions) # Sigmoid functiontop_idx=np.argmax(predictions)
top_label=self.labels[top_idx] iftop_idx<len(self.labels) elsef"Unknown Class {top_idx}"confidence=confidence_scores[top_idx]
ifconfidence>=self.confidence_thresh:
results.append({
"Begin Time (S)": round(start_sample/sr, 2),
"End Time (S)": round(end_sample/sr, 2),
"Class": top_label,
"Common name": top_label, # Replace with common name if needed"Score": round(confidence, 4),
"File": os.path.basename(audio_path),
'FilePath': audio_path
})
print(f"Segment {i+1}: {top_label} (Confidence: {confidence:.2f})")
df=pd.DataFrame(results)
returndfdefbatch_process_audio_folder(self, output_csv="predictions.csv"):
"""Recursively process all audio files in a folder and save results to CSV."""all_results= []
# Recursively walk through the directory and subdirectoriesforroot, _, filesinos.walk(self.audio_folder):
forfilenameinfiles:
iffilename.endswith(('.wav', '.mp3', '.flac', '.ogg')): # Process only audio filesaudio_path=os.path.join(root, filename)
print(f"Processing {audio_path}...")
df=self.predict_long_audio(audio_path)
all_results.append(df)
# Concatenate all DataFrames and save to CSVfinal_df=pd.concat(all_results, ignore_index=True)
final_df.to_csv(output_csv, index=False)
print(f"Batch processing complete! Results saved to {output_csv}")
returnfinal_dfdefexport_to_raven(self, df, raven_file="raven_output.txt"):
"""Export prediction results to a Raven selection table format."""df['Selection']=range(1, df.shape[0])
df['Channel']=1df['View']='Spectrogram 1'withopen(raven_file, 'w') asf:
# Write headerf.write("Selection\tView\tChannel\tBegin Time (S)\tEnd Time (S)\tCommon name\tScore\n")
for_, rowindf.iterrows():
Selection=row['Selection']
View=row['View']
Channel=row['Channel']
start=row['Begin Time (S)']
end=row['End Time (S)']
label=row['Common name']
confidence=row['Score']
line=f"{Selection}\t{View}\t{Channel}\t{start}\t{end}\t{label}\t{confidence}\n"f.write(line)
print(f"Raven selection table exported to {raven_file}")
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
Hey folks,
I have a very basic question here. I've looked through the tutorials and the scripts and haven't found then answer, so if it's already listed elsewhere apologies.
I'm working on marine mammal classifiers using birdnet embeddings and I've been able to train a custom model in the GUI and it works pretty well. I've also written custom scripts to use the model to predict on audio files to that I can build precision recall curves and confusion matrices.
Unfortunately, I've not been able to figure out how to do that using the pre-written package functions. I'm reasonably ok at Python, I can write and read objects, but I don't see where I can point to a tflite and species list as well as the input parameters I used to train, e.g. limited to 8khz. Real stupid question here but how to I point to my pre-trained model using model.load_model() when it doesn't have an input location? Below is what I've written to process my data with my custom model but it still does't take into account the spectral parameters.
Beta Was this translation helpful? Give feedback.
All reactions