From 95038d1ac37099d62bfb6520bad40c7025ef7f9c Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Thu, 20 Sep 2018 21:44:49 -0400 Subject: [PATCH 01/15] Modularizing the RNN model --- examples/rnnoise_demo.c | 2 +- include/rnnoise.h | 5 +++-- src/denoise.c | 20 ++++++++++++++------ src/rnn.c | 24 ++++++++++++------------ src/rnn.h | 1 + src/rnn_data.c | 32 ++++++++++++++++++++++++++------ src/rnn_data.h | 35 ++++++++++++++++++----------------- 7 files changed, 75 insertions(+), 44 deletions(-) diff --git a/examples/rnnoise_demo.c b/examples/rnnoise_demo.c index e1e239a2..3e18c215 100644 --- a/examples/rnnoise_demo.c +++ b/examples/rnnoise_demo.c @@ -35,7 +35,7 @@ int main(int argc, char **argv) { float x[FRAME_SIZE]; FILE *f1, *fout; DenoiseState *st; - st = rnnoise_create(); + st = rnnoise_create(NULL); if (argc!=3) { fprintf(stderr, "usage: %s <noisy speech> <output denoised>\n", argv[0]); return 1; diff --git a/include/rnnoise.h b/include/rnnoise.h index 562b34cb..f1804da2 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -40,12 +40,13 @@ typedef struct DenoiseState DenoiseState; +typedef struct RNNModel RNNModel; RNNOISE_EXPORT int rnnoise_get_size(); -RNNOISE_EXPORT int rnnoise_init(DenoiseState *st); +RNNOISE_EXPORT int rnnoise_init(DenoiseState *st, RNNModel *model); -RNNOISE_EXPORT DenoiseState *rnnoise_create(); +RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model); RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); diff --git a/src/denoise.c b/src/denoise.c index 128cd999..8e3eb49f 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -70,6 +70,10 @@ #define TRAINING 0 #endif + +extern const struct RNNModel model_orig; + + static const opus_int16 eband5ms[] = { /*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100 @@ -284,15 +288,19 @@ int rnnoise_get_size() { return sizeof(DenoiseState); } -int rnnoise_init(DenoiseState *st) { +int rnnoise_init(DenoiseState *st, RNNModel *model) { memset(st, 0, sizeof(*st)); + if (model) + st->rnn.model = model; + else + st->rnn.model = &model_orig; return 0; } -DenoiseState *rnnoise_create() { +DenoiseState *rnnoise_create(RNNModel *model) { DenoiseState *st; st = malloc(rnnoise_get_size()); - rnnoise_init(st); + rnnoise_init(st, model); return st; } @@ -542,9 +550,9 @@ int main(int argc, char **argv) { DenoiseState *st; DenoiseState *noise_state; DenoiseState *noisy; - st = rnnoise_create(); - noise_state = rnnoise_create(); - noisy = rnnoise_create(); + st = rnnoise_create(NULL); + noise_state = rnnoise_create(NULL); + noisy = rnnoise_create(NULL); if (argc!=4) { fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]); return 1; diff --git a/src/rnn.c b/src/rnn.c index 1daa7b6c..c54958eb 100644 --- a/src/rnn.c +++ b/src/rnn.c @@ -162,17 +162,17 @@ void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input) { float dense_out[MAX_NEURONS]; float noise_input[MAX_NEURONS*3]; float denoise_input[MAX_NEURONS*3]; - compute_dense(&input_dense, dense_out, input); - compute_gru(&vad_gru, rnn->vad_gru_state, dense_out); - compute_dense(&vad_output, vad, rnn->vad_gru_state); - for (i=0;i<INPUT_DENSE_SIZE;i++) noise_input[i] = dense_out[i]; - for (i=0;i<VAD_GRU_SIZE;i++) noise_input[i+INPUT_DENSE_SIZE] = rnn->vad_gru_state[i]; - for (i=0;i<INPUT_SIZE;i++) noise_input[i+INPUT_DENSE_SIZE+VAD_GRU_SIZE] = input[i]; - compute_gru(&noise_gru, rnn->noise_gru_state, noise_input); + compute_dense(rnn->model->input_dense, dense_out, input); + compute_gru(rnn->model->vad_gru, rnn->vad_gru_state, dense_out); + compute_dense(rnn->model->vad_output, vad, rnn->vad_gru_state); + for (i=0;i<rnn->model->input_dense_size;i++) noise_input[i] = dense_out[i]; + for (i=0;i<rnn->model->vad_gru_size;i++) noise_input[i+rnn->model->input_dense_size] = rnn->vad_gru_state[i]; + for (i=0;i<INPUT_SIZE;i++) noise_input[i+rnn->model->input_dense_size+rnn->model->vad_gru_size] = input[i]; + compute_gru(rnn->model->noise_gru, rnn->noise_gru_state, noise_input); - for (i=0;i<VAD_GRU_SIZE;i++) denoise_input[i] = rnn->vad_gru_state[i]; - for (i=0;i<NOISE_GRU_SIZE;i++) denoise_input[i+VAD_GRU_SIZE] = rnn->noise_gru_state[i]; - for (i=0;i<INPUT_SIZE;i++) denoise_input[i+VAD_GRU_SIZE+NOISE_GRU_SIZE] = input[i]; - compute_gru(&denoise_gru, rnn->denoise_gru_state, denoise_input); - compute_dense(&denoise_output, gains, rnn->denoise_gru_state); + for (i=0;i<rnn->model->vad_gru_size;i++) denoise_input[i] = rnn->vad_gru_state[i]; + for (i=0;i<rnn->model->noise_gru_size;i++) denoise_input[i+rnn->model->vad_gru_size] = rnn->noise_gru_state[i]; + for (i=0;i<INPUT_SIZE;i++) denoise_input[i+rnn->model->vad_gru_size+rnn->model->noise_gru_size] = input[i]; + compute_gru(rnn->model->denoise_gru, rnn->denoise_gru_state, denoise_input); + compute_dense(rnn->model->denoise_output, gains, rnn->denoise_gru_state); } diff --git a/src/rnn.h b/src/rnn.h index 9e08b44a..e7638957 100644 --- a/src/rnn.h +++ b/src/rnn.h @@ -56,6 +56,7 @@ typedef struct { int activation; } GRULayer; +typedef struct RNNModel RNNModel; typedef struct RNNState RNNState; void compute_dense(const DenseLayer *layer, float *output, const float *input); diff --git a/src/rnn_data.c b/src/rnn_data.c index 8f6c99bb..82a07f1d 100644 --- a/src/rnn_data.c +++ b/src/rnn_data.c @@ -5,6 +5,7 @@ #endif #include "rnn.h" +#include "rnn_data.h" static const rnn_weight input_dense_weights[1008] = { -10, 0, -3, 1, -8, -6, 3, -13, @@ -141,7 +142,7 @@ static const rnn_weight input_dense_bias[24] = { -126, 28, 127, 125, -30, 127, -89, -20 }; -const DenseLayer input_dense = { +static const DenseLayer input_dense = { input_dense_bias, input_dense_weights, 42, 24, ACTIVATION_TANH @@ -597,7 +598,7 @@ static const rnn_weight vad_gru_bias[72] = { -29, 127, 34, -66, 49, 53, 27, 62 }; -const GRULayer vad_gru = { +static const GRULayer vad_gru = { vad_gru_bias, vad_gru_weights, vad_gru_recurrent_weights, @@ -3115,7 +3116,7 @@ static const rnn_weight noise_gru_bias[144] = { -23, -64, 31, 86, -50, 2, -38, 7 }; -const GRULayer noise_gru = { +static const GRULayer noise_gru = { noise_gru_bias, noise_gru_weights, noise_gru_recurrent_weights, @@ -10727,7 +10728,7 @@ static const rnn_weight denoise_gru_bias[288] = { -21, 25, 18, -58, 25, 126, -84, 127 }; -const GRULayer denoise_gru = { +static const GRULayer denoise_gru = { denoise_gru_bias, denoise_gru_weights, denoise_gru_recurrent_weights, @@ -11007,7 +11008,7 @@ static const rnn_weight denoise_output_bias[22] = { -126, -105, -53, -49, -18, -9 }; -const DenseLayer denoise_output = { +static const DenseLayer denoise_output = { denoise_output_bias, denoise_output_weights, 96, 22, ACTIVATION_SIGMOID @@ -11023,9 +11024,28 @@ static const rnn_weight vad_output_bias[1] = { -50 }; -const DenseLayer vad_output = { +static const DenseLayer vad_output = { vad_output_bias, vad_output_weights, 24, 1, ACTIVATION_SIGMOID }; +const struct RNNModel model_orig = { + 24, + &input_dense, + + 24, + &vad_gru, + + 48, + &noise_gru, + + 96, + &denoise_gru, + + 22, + &denoise_output, + + 1, + &vad_output +}; diff --git a/src/rnn_data.h b/src/rnn_data.h index 56109804..2aa41f90 100644 --- a/src/rnn_data.h +++ b/src/rnn_data.h @@ -1,32 +1,33 @@ -/*This file is automatically generated from a Keras model*/ - #ifndef RNN_DATA_H #define RNN_DATA_H #include "rnn.h" -#define INPUT_DENSE_SIZE 24 -extern const DenseLayer input_dense; +struct RNNModel { + int input_dense_size; + DenseLayer *input_dense; -#define VAD_GRU_SIZE 24 -extern const GRULayer vad_gru; + int vad_gru_size; + GRULayer *vad_gru; -#define NOISE_GRU_SIZE 48 -extern const GRULayer noise_gru; + int noise_gru_size; + GRULayer *noise_gru; -#define DENOISE_GRU_SIZE 96 -extern const GRULayer denoise_gru; + int denoise_gru_size; + GRULayer *denoise_gru; -#define DENOISE_OUTPUT_SIZE 22 -extern const DenseLayer denoise_output; + int denoise_output_size; + DenseLayer *denoise_output; -#define VAD_OUTPUT_SIZE 1 -extern const DenseLayer vad_output; + int vad_output_size; + DenseLayer *vad_output; +}; struct RNNState { - float vad_gru_state[VAD_GRU_SIZE]; - float noise_gru_state[NOISE_GRU_SIZE]; - float denoise_gru_state[DENOISE_GRU_SIZE]; + const RNNModel *model; + float *vad_gru_state; + float *noise_gru_state; + float *denoise_gru_state; }; From edf049d6b07410186185cdc97d4f2f4fba8c1dc3 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Wed, 24 Apr 2019 16:41:27 -0400 Subject: [PATCH 02/15] Avoid namespace collision In an attempt to avoid namespace collision, rename model_orig as rnnoise_model_orig. (Note that the previous implementation exported things like vad_data, so this at least only as one, prefixed, unnecessary public variable) --- src/denoise.c | 5 +++-- src/rnn_data.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/denoise.c b/src/denoise.c index 8e3eb49f..70d9fde3 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -71,7 +71,8 @@ #endif -extern const struct RNNModel model_orig; +/* The built-in model, used if no file is given as input */ +extern const struct RNNModel rnnoise_model_orig; static const opus_int16 eband5ms[] = { @@ -293,7 +294,7 @@ int rnnoise_init(DenoiseState *st, RNNModel *model) { if (model) st->rnn.model = model; else - st->rnn.model = &model_orig; + st->rnn.model = &rnnoise_model_orig; return 0; } diff --git a/src/rnn_data.c b/src/rnn_data.c index 82a07f1d..22c53165 100644 --- a/src/rnn_data.c +++ b/src/rnn_data.c @@ -11030,7 +11030,7 @@ static const DenseLayer vad_output = { 24, 1, ACTIVATION_SIGMOID }; -const struct RNNModel model_orig = { +const struct RNNModel rnnoise_model_orig = { 24, &input_dense, From 6b4d22bd15a6ca1a76b6824386ef612807dc042f Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Thu, 20 Sep 2018 21:47:08 -0400 Subject: [PATCH 03/15] Fix some warnings --- include/rnnoise.h | 5 +++++ src/denoise.c | 33 ++++++--------------------------- src/rnn.h | 3 ++- src/rnn_data.h | 12 ++++++------ 4 files changed, 19 insertions(+), 34 deletions(-) diff --git a/include/rnnoise.h b/include/rnnoise.h index f1804da2..07e932ff 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -24,6 +24,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef RNNOISE_H +#define RNNOISE_H 1 + #ifndef RNNOISE_EXPORT # if defined(WIN32) # if defined(RNNOISE_BUILD) && defined(DLL_EXPORT) @@ -51,3 +54,5 @@ RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model); RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); + +#endif diff --git a/src/denoise.c b/src/denoise.c index 70d9fde3..7334a575 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -547,7 +547,8 @@ int main(int argc, char **argv) { int vad_cnt=0; int gain_change_count=0; float speech_gain = 1, noise_gain = 1; - FILE *f1, *f2, *fout; + FILE *f1, *f2; + int maxCount; DenoiseState *st; DenoiseState *noise_state; DenoiseState *noisy; @@ -555,12 +556,12 @@ int main(int argc, char **argv) { noise_state = rnnoise_create(NULL); noisy = rnnoise_create(NULL); if (argc!=4) { - fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]); + fprintf(stderr, "usage: %s <speech> <noise> <count>\n", argv[0]); return 1; } f1 = fopen(argv[1], "r"); f2 = fopen(argv[2], "r"); - fout = fopen(argv[3], "w"); + maxCount = atoi(argv[3]); for(i=0;i<150;i++) { short tmp[FRAME_SIZE]; fread(tmp, sizeof(short), FRAME_SIZE, f2); @@ -572,12 +573,11 @@ int main(int argc, char **argv) { float Ln[NB_BANDS]; float features[NB_FEATURES]; float g[NB_BANDS]; - float gf[FREQ_SIZE]={1}; short tmp[FRAME_SIZE]; float vad=0; - float vad_prob; float E=0; - if (count==50000000) break; + if (count==maxCount) break; + if ((count%1000)==0) fprintf(stderr, "%d\r", count); if (++gain_change_count > 2821) { speech_gain = pow(10., (-40+(rand()%60))/20.); noise_gain = pow(10., (-30+(rand()%50))/20.); @@ -652,37 +652,16 @@ int main(int argc, char **argv) { if (vad==0 && noise_gain==0) g[i] = -1; } count++; -#if 0 - for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]); - for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]); - for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]); - printf("%f\n", vad); -#endif #if 1 fwrite(features, sizeof(float), NB_FEATURES, stdout); fwrite(g, sizeof(float), NB_BANDS, stdout); fwrite(Ln, sizeof(float), NB_BANDS, stdout); fwrite(&vad, sizeof(float), 1, stdout); -#endif -#if 0 - compute_rnn(&noisy->rnn, g, &vad_prob, features); - interp_band_gain(gf, g); -#if 1 - for (i=0;i<FREQ_SIZE;i++) { - X[i].r *= gf[i]; - X[i].i *= gf[i]; - } -#endif - frame_synthesis(noisy, xn, X); - - for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i]; - fwrite(tmp, sizeof(short), FRAME_SIZE, fout); #endif } fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1); fclose(f1); fclose(f2); - fclose(fout); return 0; } diff --git a/src/rnn.h b/src/rnn.h index e7638957..10329f55 100644 --- a/src/rnn.h +++ b/src/rnn.h @@ -27,6 +27,8 @@ #ifndef RNN_H_ #define RNN_H_ +#include "rnnoise.h" + #include "opus_types.h" #define WEIGHTS_SCALE (1.f/256) @@ -56,7 +58,6 @@ typedef struct { int activation; } GRULayer; -typedef struct RNNModel RNNModel; typedef struct RNNState RNNState; void compute_dense(const DenseLayer *layer, float *output, const float *input); diff --git a/src/rnn_data.h b/src/rnn_data.h index 2aa41f90..f2186fe0 100644 --- a/src/rnn_data.h +++ b/src/rnn_data.h @@ -5,22 +5,22 @@ struct RNNModel { int input_dense_size; - DenseLayer *input_dense; + const DenseLayer *input_dense; int vad_gru_size; - GRULayer *vad_gru; + const GRULayer *vad_gru; int noise_gru_size; - GRULayer *noise_gru; + const GRULayer *noise_gru; int denoise_gru_size; - GRULayer *denoise_gru; + const GRULayer *denoise_gru; int denoise_output_size; - DenseLayer *denoise_output; + const DenseLayer *denoise_output; int vad_output_size; - DenseLayer *vad_output; + const DenseLayer *vad_output; }; struct RNNState { From d2071d914d1391f4176278822413f53bff9153c4 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Tue, 28 Aug 2018 10:40:28 -0400 Subject: [PATCH 04/15] Made dump_rnn output in nu format. --- training/dump_rnn.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/training/dump_rnn.py b/training/dump_rnn.py index 9f267a7e..a9931b76 100755 --- a/training/dump_rnn.py +++ b/training/dump_rnn.py @@ -30,7 +30,7 @@ def printVector(f, vector, name): f.write('\n};\n\n') return; -def printLayer(f, hf, layer): +def printLayer(f, layer): weights = layer.get_weights() printVector(f, weights[0], layer.name + '_weights') if len(weights) > 2: @@ -39,19 +39,24 @@ def printLayer(f, hf, layer): name = layer.name activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() if len(weights) > 2: - f.write('const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' + f.write('static const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation)) - hf.write('#define {}_SIZE {}\n'.format(name.upper(), weights[0].shape[1]/3)) - hf.write('extern const GRULayer {};\n\n'.format(name)); else: - f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' + f.write('static const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' .format(name, name, name, weights[0].shape[0], weights[0].shape[1], activation)) - hf.write('#define {}_SIZE {}\n'.format(name.upper(), weights[0].shape[1])) - hf.write('extern const DenseLayer {};\n\n'.format(name)); + +def structLayer(f, layer): + weights = layer.get_weights() + name = layer.name + if len(weights) > 2: + f.write(' {},\n'.format(weights[0].shape[1]/3)) + else: + f.write(' {},\n'.format(weights[0].shape[1])) + f.write(' &{},\n'.format(name)) def foo(c, name): - return 1 + return None def mean_squared_sqrt_error(y_true, y_pred): return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1) @@ -62,27 +67,26 @@ def mean_squared_sqrt_error(y_true, y_pred): weights = model.get_weights() f = open(sys.argv[2], 'w') -hf = open(sys.argv[3], 'w') f.write('/*This file is automatically generated from a Keras model*/\n\n') f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n') -hf.write('/*This file is automatically generated from a Keras model*/\n\n') -hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "rnn.h"\n\n') - layer_list = [] for i, layer in enumerate(model.layers): if len(layer.get_weights()) > 0: - printLayer(f, hf, layer) + printLayer(f, layer) if len(layer.get_weights()) > 2: layer_list.append(layer.name) -hf.write('struct RNNState {\n') -for i, name in enumerate(layer_list): - hf.write(' float {}_state[{}_SIZE];\n'.format(name, name.upper())) -hf.write('};\n') +f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[3])) +for i, layer in enumerate(model.layers): + if len(layer.get_weights()) > 0: + structLayer(f, layer) +f.write('};\n') -hf.write('\n\n#endif\n') +#hf.write('struct RNNState {\n') +#for i, name in enumerate(layer_list): +# hf.write(' float {}_state[{}_SIZE];\n'.format(name, name.upper())) +#hf.write('};\n') f.close() -hf.close() From 6d49c2d0cb0c5a0852b4ee7f020c1f5527cf5a0c Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Thu, 20 Sep 2018 21:51:38 -0400 Subject: [PATCH 05/15] Neural network model files Extending the neural network dumper to dump to a simple text file format, and adding reader functions to read a neural network description from a FILE *. --- Makefile.am | 1 + include/rnnoise.h | 8 ++- src/rnn_reader.c | 143 +++++++++++++++++++++++++++++++++++++++++++ training/dump_rnn.py | 23 ++++--- 4 files changed, 167 insertions(+), 8 deletions(-) create mode 100644 src/rnn_reader.c diff --git a/Makefile.am b/Makefile.am index a004dc62..735d17a3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -22,6 +22,7 @@ librnnoise_la_SOURCES = \ src/denoise.c \ src/rnn.c \ src/rnn_data.c \ + src/rnn_reader.c \ src/pitch.c \ src/kiss_fft.c \ src/celt_lpc.c diff --git a/include/rnnoise.h b/include/rnnoise.h index 07e932ff..52ce5856 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -27,6 +27,9 @@ #ifndef RNNOISE_H #define RNNOISE_H 1 +#include <stdio.h> + + #ifndef RNNOISE_EXPORT # if defined(WIN32) # if defined(RNNOISE_BUILD) && defined(DLL_EXPORT) @@ -41,7 +44,6 @@ # endif #endif - typedef struct DenoiseState DenoiseState; typedef struct RNNModel RNNModel; @@ -55,4 +57,8 @@ RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); +RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f); + +RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model); + #endif diff --git a/src/rnn_reader.c b/src/rnn_reader.c new file mode 100644 index 00000000..e3ee4cc1 --- /dev/null +++ b/src/rnn_reader.c @@ -0,0 +1,143 @@ +/* Copyright (c) 2018 Gregor Richards */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "rnn.h" +#include "rnn_data.h" +#include "rnnoise.h" + +RNNModel *rnnoise_model_from_file(FILE *f) +{ + int i, in; + + if (fscanf(f, "rnnoise-nu model file version %d\n", &in) != 1 || in != 1) + return NULL; + + RNNModel *ret = calloc(1, sizeof(RNNModel)); + if (!ret) + return NULL; + +#define ALLOC_LAYER(type, name) \ + type *name; \ + name = calloc(1, sizeof(type)); \ + if (!name) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + ret->name = name + + ALLOC_LAYER(DenseLayer, input_dense); + ALLOC_LAYER(GRULayer, vad_gru); + ALLOC_LAYER(GRULayer, noise_gru); + ALLOC_LAYER(GRULayer, denoise_gru); + ALLOC_LAYER(DenseLayer, denoise_output); + ALLOC_LAYER(DenseLayer, vad_output); + +#define INPUT_VAL(name) do { \ + if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + name = in; \ + } while (0) + +#define INPUT_ARRAY(name, len) do { \ + rnn_weight *values = malloc((len) * sizeof(rnn_weight)); \ + if (!values) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + name = values; \ + for (i = 0; i < (len); i++) { \ + if (fscanf(f, "%d", &in) != 1) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + values[i] = in; \ + } \ + } while (0) + +#define INPUT_DENSE(name) do { \ + INPUT_VAL(name->nb_inputs); \ + INPUT_VAL(name->nb_neurons); \ + INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ + INPUT_ARRAY(name->bias, name->nb_neurons); \ + } while (0) + +#define INPUT_GRU(name) do { \ + INPUT_VAL(name->nb_inputs); \ + INPUT_VAL(name->nb_neurons); \ + INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \ + INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \ + INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ + } while (0) + + INPUT_DENSE(input_dense); + INPUT_GRU(vad_gru); + INPUT_GRU(noise_gru); + INPUT_GRU(denoise_gru); + INPUT_DENSE(denoise_output); + INPUT_DENSE(vad_output); + + return ret; +} + +void rnnoise_model_free(RNNModel *model) +{ +#define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) +#define FREE_DENSE(name) do { \ + if (model->name) { \ + free((void *) model->name->input_weights); \ + free((void *) model->name->bias); \ + free((void *) model->name); \ + } \ + } while (0) +#define FREE_GRU(name) do { \ + if (model->name) { \ + free((void *) model->name->input_weights); \ + free((void *) model->name->recurrent_weights); \ + free((void *) model->name->bias); \ + free((void *) model->name); \ + } \ + } while (0) + + if (!model) + return; + FREE_DENSE(input_dense); + FREE_GRU(vad_gru); + FREE_GRU(noise_gru); + FREE_GRU(denoise_gru); + FREE_DENSE(denoise_output); + FREE_DENSE(vad_output); + free(model); +} diff --git a/training/dump_rnn.py b/training/dump_rnn.py index a9931b76..5ef4ec7c 100755 --- a/training/dump_rnn.py +++ b/training/dump_rnn.py @@ -12,30 +12,37 @@ import re import numpy as np -def printVector(f, vector, name): +def printVector(f, ft, vector, name): v = np.reshape(vector, (-1)); #print('static const float ', name, '[', len(v), '] = \n', file=f) f.write('static const rnn_weight {}[{}] = {{\n '.format(name, len(v))) for i in range(0, len(v)): f.write('{}'.format(min(127, int(round(256*v[i]))))) + ft.write('{}'.format(min(127, int(round(256*v[i]))))) if (i!=len(v)-1): f.write(',') else: break; + ft.write(" ") if (i%8==7): f.write("\n ") else: f.write(" ") #print(v, file=f) f.write('\n};\n\n') + ft.write("\n") return; -def printLayer(f, layer): +def printLayer(f, ft, layer): weights = layer.get_weights() - printVector(f, weights[0], layer.name + '_weights') if len(weights) > 2: - printVector(f, weights[1], layer.name + '_recurrent_weights') - printVector(f, weights[-1], layer.name + '_bias') + ft.write('{} {}\n'.format(weights[0].shape[0], weights[0].shape[1]/3)) + else: + ft.write('{} {}\n'.format(weights[0].shape[0], weights[0].shape[1])) + printVector(f, ft, weights[0], layer.name + '_weights') + if len(weights) > 2: + printVector(f, ft, weights[1], layer.name + '_recurrent_weights') + printVector(f, ft, weights[-1], layer.name + '_bias') name = layer.name activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() if len(weights) > 2: @@ -67,18 +74,20 @@ def mean_squared_sqrt_error(y_true, y_pred): weights = model.get_weights() f = open(sys.argv[2], 'w') +ft = open(sys.argv[3], 'w') f.write('/*This file is automatically generated from a Keras model*/\n\n') f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n') +ft.write('rnnoise-nu model file version 1\n') layer_list = [] for i, layer in enumerate(model.layers): if len(layer.get_weights()) > 0: - printLayer(f, layer) + printLayer(f, ft, layer) if len(layer.get_weights()) > 2: layer_list.append(layer.name) -f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[3])) +f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[4])) for i, layer in enumerate(model.layers): if len(layer.get_weights()) > 0: structLayer(f, layer) From 61f587a392f1d9ea8ba0141ec1ff4185d31fffcc Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Thu, 20 Sep 2018 21:55:02 -0400 Subject: [PATCH 06/15] Some fixes to make model files work. --- src/rnn_reader.c | 23 +++++++++++++++++++++++ training/dump_rnn.py | 14 ++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/rnn_reader.c b/src/rnn_reader.c index e3ee4cc1..f71ead04 100644 --- a/src/rnn_reader.c +++ b/src/rnn_reader.c @@ -36,6 +36,12 @@ #include "rnn_data.h" #include "rnnoise.h" +/* Although these values are the same as in rnn.h, we make them separate to + * avoid accidentally burning internal values into a file format */ +#define F_ACTIVATION_TANH 0 +#define F_ACTIVATION_SIGMOID 1 +#define F_ACTIVATION_RELU 2 + RNNModel *rnnoise_model_from_file(FILE *f) { int i, in; @@ -71,6 +77,21 @@ RNNModel *rnnoise_model_from_file(FILE *f) name = in; \ } while (0) +#define INPUT_ACTIVATION(name) do { \ + int activation; \ + INPUT_VAL(activation); \ + switch (activation) { \ + case F_ACTIVATION_SIGMOID: \ + name = ACTIVATION_SIGMOID; \ + break; \ + case F_ACTIVATION_RELU: \ + name = ACTIVATION_RELU; \ + break; \ + default: \ + name = ACTIVATION_TANH; \ + } \ + } while (0) + #define INPUT_ARRAY(name, len) do { \ rnn_weight *values = malloc((len) * sizeof(rnn_weight)); \ if (!values) { \ @@ -90,6 +111,7 @@ RNNModel *rnnoise_model_from_file(FILE *f) #define INPUT_DENSE(name) do { \ INPUT_VAL(name->nb_inputs); \ INPUT_VAL(name->nb_neurons); \ + INPUT_ACTIVATION(name->activation); \ INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ INPUT_ARRAY(name->bias, name->nb_neurons); \ } while (0) @@ -97,6 +119,7 @@ RNNModel *rnnoise_model_from_file(FILE *f) #define INPUT_GRU(name) do { \ INPUT_VAL(name->nb_inputs); \ INPUT_VAL(name->nb_neurons); \ + INPUT_ACTIVATION(name->activation); \ INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \ INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \ INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ diff --git a/training/dump_rnn.py b/training/dump_rnn.py index 5ef4ec7c..2f04359d 100755 --- a/training/dump_rnn.py +++ b/training/dump_rnn.py @@ -35,16 +35,22 @@ def printVector(f, ft, vector, name): def printLayer(f, ft, layer): weights = layer.get_weights() + activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() if len(weights) > 2: - ft.write('{} {}\n'.format(weights[0].shape[0], weights[0].shape[1]/3)) + ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]/3)) + else: + ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1])) + if activation == 'SIGMOID': + ft.write('1\n') + elif activation == 'RELU': + ft.write('2\n') else: - ft.write('{} {}\n'.format(weights[0].shape[0], weights[0].shape[1])) + ft.write('0\n') printVector(f, ft, weights[0], layer.name + '_weights') if len(weights) > 2: printVector(f, ft, weights[1], layer.name + '_recurrent_weights') printVector(f, ft, weights[-1], layer.name + '_bias') name = layer.name - activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() if len(weights) > 2: f.write('static const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation)) @@ -77,7 +83,7 @@ def mean_squared_sqrt_error(y_true, y_pred): ft = open(sys.argv[3], 'w') f.write('/*This file is automatically generated from a Keras model*/\n\n') -f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n') +f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n#include "rnn_data.h"\n\n') ft.write('rnnoise-nu model file version 1\n') layer_list = [] From a2f46596f995b24da0c29ba13bfcbd0e35c88809 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Wed, 29 Aug 2018 21:05:15 -0400 Subject: [PATCH 07/15] Corrected a buggy lack of allocation. --- src/denoise.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/denoise.c b/src/denoise.c index 7334a575..2238698f 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -295,6 +295,9 @@ int rnnoise_init(DenoiseState *st, RNNModel *model) { st->rnn.model = model; else st->rnn.model = &rnnoise_model_orig; + st->rnn.vad_gru_state = calloc(sizeof(float), st->rnn.model->vad_gru_size); + st->rnn.noise_gru_state = calloc(sizeof(float), st->rnn.model->noise_gru_size); + st->rnn.denoise_gru_state = calloc(sizeof(float), st->rnn.model->denoise_gru_size); return 0; } @@ -306,6 +309,9 @@ DenoiseState *rnnoise_create(RNNModel *model) { } void rnnoise_destroy(DenoiseState *st) { + free(st->rnn.vad_gru_state); + free(st->rnn.noise_gru_state); + free(st->rnn.denoise_gru_state); free(st); } From df246fd24df927c2fa13417e63b9a021fcf0bf1c Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Wed, 29 Aug 2018 21:06:20 -0400 Subject: [PATCH 08/15] Added parameterizable maximum attenuation. --- Makefile.am | 2 +- examples/rnnoise_demo.c | 65 +++++++++++++++++++++++++++++------------ include/rnnoise.h | 5 ++++ src/denoise.c | 37 +++++++++++++++++++++++ 4 files changed, 90 insertions(+), 19 deletions(-) diff --git a/Makefile.am b/Makefile.am index 735d17a3..f2185608 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,7 +36,7 @@ noinst_PROGRAMS = examples/rnnoise_demo endif examples_rnnoise_demo_SOURCES = examples/rnnoise_demo.c -examples_rnnoise_demo_LDADD = librnnoise.la +examples_rnnoise_demo_LDADD = librnnoise.la $(LIBM) pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = rnnoise.pc diff --git a/examples/rnnoise_demo.c b/examples/rnnoise_demo.c index 3e18c215..0f18bcf8 100644 --- a/examples/rnnoise_demo.c +++ b/examples/rnnoise_demo.c @@ -24,36 +24,65 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <math.h> #include <stdio.h> #include "rnnoise.h" +#include <stdlib.h> +#include <sys/types.h> +#include "rnnoise.h" #define FRAME_SIZE 480 int main(int argc, char **argv) { - int i; + int i, ci; int first = 1; + int channels; float x[FRAME_SIZE]; - FILE *f1, *fout; - DenoiseState *st; - st = rnnoise_create(NULL); - if (argc!=3) { - fprintf(stderr, "usage: %s <noisy speech> <output denoised>\n", argv[0]); + short *tmp; + RNNModel *model = NULL; + DenoiseState **sts; + float max_attenuation; + if (argc < 3) { + fprintf(stderr, "usage: %s <channels> <max attenuation dB>\n", argv[0]); + return 1; + } + + channels = atoi(argv[1]); + if (channels < 1) channels = 1; + max_attenuation = pow(10, -atof(argv[2])/10); + + sts = malloc(channels * sizeof(DenoiseState *)); + if (!sts) { + perror("malloc"); return 1; } - f1 = fopen(argv[1], "r"); - fout = fopen(argv[2], "w"); + tmp = malloc(channels * FRAME_SIZE * sizeof(short)); + if (!tmp) { + perror("malloc"); + return 1; + } + for (i = 0; i < channels; i++) { + sts[i] = rnnoise_create(model); + rnnoise_set_param(sts[i], RNNOISE_PARAM_MAX_ATTENUATION, max_attenuation); + } + while (1) { - short tmp[FRAME_SIZE]; - fread(tmp, sizeof(short), FRAME_SIZE, f1); - if (feof(f1)) break; - for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i]; - rnnoise_process_frame(st, x, x); - for (i=0;i<FRAME_SIZE;i++) tmp[i] = x[i]; - if (!first) fwrite(tmp, sizeof(short), FRAME_SIZE, fout); + fread(tmp, sizeof(short), channels * FRAME_SIZE, stdin); + if (feof(stdin)) break; + + for (ci = 0; ci < channels; ci++) { + for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i*channels+ci]; + rnnoise_process_frame(sts[ci], x, x); + for (i=0;i<FRAME_SIZE;i++) tmp[i*channels+ci] = x[i]; + } + + if (!first) fwrite(tmp, sizeof(short), channels * FRAME_SIZE, stdout); first = 0; } - rnnoise_destroy(st); - fclose(f1); - fclose(fout); + + for (i = 0; i < channels; i++) + rnnoise_destroy(sts[i]); + free(tmp); + free(sts); return 0; } diff --git a/include/rnnoise.h b/include/rnnoise.h index 52ce5856..e493ae75 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -61,4 +61,9 @@ RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f); RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model); +/* Parameters to a denoise state */ +#define RNNOISE_PARAM_MAX_ATTENUATION 1 + +RNNOISE_EXPORT void rnnoise_set_param(DenoiseState *st, int param, float value); + #endif diff --git a/src/denoise.c b/src/denoise.c index 2238698f..61665e65 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -65,6 +65,9 @@ #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2) +/* We don't allow max attenuation to be more than 60dB */ +#define MIN_MAX_ATTENUATION 0.000001f + #ifndef TRAINING #define TRAINING 0 @@ -100,6 +103,8 @@ struct DenoiseState { float mem_hp_x[2]; float lastg[NB_BANDS]; RNNState rnn; + + float max_attenuation; }; #if SMOOTH_BANDS @@ -508,6 +513,26 @@ float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { g[i] = MAX16(g[i], alpha*st->lastg[i]); st->lastg[i] = g[i]; } + + /* Apply maximum attenuation (minimum value) */ + if (st->max_attenuation) { + float min = 1, mult; + for (i=0;i<NB_BANDS;i++) { + if (g[i] < min) min = g[i]; + } + if (min < st->max_attenuation) { + if (min < MIN_MAX_ATTENUATION) + min = MIN_MAX_ATTENUATION; + mult = st->max_attenuation / min; + for (i=0;i<NB_BANDS;i++) { + if (g[i] < MIN_MAX_ATTENUATION) g[i] = MIN_MAX_ATTENUATION; + g[i] *= mult; + if (g[i] > 1) g[i] = 1; + st->lastg[i] = g[i]; + } + } + } + interp_band_gain(gf, g); #if 1 for (i=0;i<FREQ_SIZE;i++) { @@ -521,6 +546,18 @@ float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { return vad_prob; } +void rnnoise_set_param(DenoiseState *st, int param, float value) +{ + switch (param) { + case RNNOISE_PARAM_MAX_ATTENUATION: + if ((value > MIN_MAX_ATTENUATION && value <= 1) || value == 0) + st->max_attenuation = value; + else + st->max_attenuation = MIN_MAX_ATTENUATION; + break; + } +} + #if TRAINING static float uni_rand() { From d4ad247a39508bf236130ee49dd6d16b9c644792 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Thu, 20 Sep 2018 21:58:31 -0400 Subject: [PATCH 09/15] Updated README --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index 27b18fa9..88fc79cd 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -RNNoise is a noise suppression library based on a recurrent neural network +RNNoise is a noise suppression library based on a recurrent neural network. To compile, just type: % ./autogen.sh @@ -12,6 +12,6 @@ While it is meant to be used as a library, a simple command-line tool is provided as an example. It operates on RAW 16-bit (machine endian) mono PCM files sampled at 48 kHz. It can be used as: -./examples/rnnoise_demo input.pcm output.pcm +./examples/rnnoise_demo <number of channels> <maximum attenuation> < input.raw > output.raw The output is also a 16-bit raw PCM file. From 04d30082b35c1b3dffb60cecfdd7df10c7e9de76 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Thu, 30 Aug 2018 17:21:52 -0400 Subject: [PATCH 10/15] Adding instructions on training. --- TRAINING | 11 +++++++++++ training/rnn_train.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 TRAINING diff --git a/TRAINING b/TRAINING new file mode 100644 index 00000000..86c5a4eb --- /dev/null +++ b/TRAINING @@ -0,0 +1,11 @@ +(1) cd src ; ./compile.sh + +(2) ./denoise_training signal.raw noise.raw count > training.f32 + + (note the matrix size and replace 500000 87 below) + +(3) cd training ; ./bin2hdf5.py ../src/training.f32 500000 87 training.h5 + +(4) ./rnn_train.py + +(5) ./dump_rnn.py weights.hdf5 ../src/rnn_data.c ../src/rnn_data.h diff --git a/training/rnn_train.py b/training/rnn_train.py index bb53f89b..06d7e1a4 100755 --- a/training/rnn_train.py +++ b/training/rnn_train.py @@ -82,7 +82,7 @@ def get_config(self): batch_size = 32 print('Loading data...') -with h5py.File('denoise_data9.h5', 'r') as hf: +with h5py.File('training.h5', 'r') as hf: all_data = hf['data'][:] print('done.') @@ -113,4 +113,4 @@ def get_config(self): batch_size=batch_size, epochs=120, validation_split=0.1) -model.save("newweights9i.hdf5") +model.save("weights.hdf5") From 42d8260c3cc64ffc90b5b8848f534da875ad3263 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Fri, 31 Aug 2018 21:34:57 -0400 Subject: [PATCH 11/15] Adding my own copyright lines would be wise... --- examples/rnnoise_demo.c | 3 ++- include/rnnoise.h | 3 ++- src/denoise.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/rnnoise_demo.c b/examples/rnnoise_demo.c index 0f18bcf8..9c398ba5 100644 --- a/examples/rnnoise_demo.c +++ b/examples/rnnoise_demo.c @@ -1,4 +1,5 @@ -/* Copyright (c) 2017 Mozilla */ +/* Copyright (c) 2018 Gregor Richards + * Copyright (c) 2017 Mozilla */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/include/rnnoise.h b/include/rnnoise.h index e493ae75..c0faf03a 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -1,4 +1,5 @@ -/* Copyright (c) 2017 Mozilla */ +/* Copyright (c) 2018 Gregor Richards + * Copyright (c) 2017 Mozilla */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/src/denoise.c b/src/denoise.c index 61665e65..36f30868 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -1,4 +1,5 @@ -/* Copyright (c) 2017 Mozilla */ +/* Copyright (c) 2018 Gregor Richards + * Copyright (c) 2017 Mozilla */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions From 1eb410f0a93bd596861e4b6a5b2b3882f5d6016e Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Mon, 3 Sep 2018 16:32:26 -0400 Subject: [PATCH 12/15] Let's scale our maximum attenuation correctly... --- src/denoise.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/denoise.c b/src/denoise.c index 36f30868..8e0fee91 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -524,11 +524,10 @@ float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { if (min < st->max_attenuation) { if (min < MIN_MAX_ATTENUATION) min = MIN_MAX_ATTENUATION; - mult = st->max_attenuation / min; + mult = (1.0f-st->max_attenuation) / (1.0f-min); for (i=0;i<NB_BANDS;i++) { if (g[i] < MIN_MAX_ATTENUATION) g[i] = MIN_MAX_ATTENUATION; - g[i] *= mult; - if (g[i] > 1) g[i] = 1; + g[i] = 1.0f-((1.0f-g[i]) * mult); st->lastg[i] = g[i]; } } From 42e1ffb2ecfce22545450b10e43f72e75f744854 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Wed, 24 Apr 2019 17:08:49 -0400 Subject: [PATCH 13/15] Removing the long since broken non-SMOOTH_BANDS mode. --- src/denoise.c | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/denoise.c b/src/denoise.c index 8e0fee91..b1f03716 100644 --- a/src/denoise.c +++ b/src/denoise.c @@ -53,13 +53,7 @@ #define SQUARE(x) ((x)*(x)) -#define SMOOTH_BANDS 1 - -#if SMOOTH_BANDS #define NB_BANDS 22 -#else -#define NB_BANDS 21 -#endif #define CEPS_MEM 8 #define NB_DELTA_CEPS 6 @@ -108,7 +102,6 @@ struct DenoiseState { float max_attenuation; }; -#if SMOOTH_BANDS void compute_band_energy(float *bandE, const kiss_fft_cpx *X) { int i; float sum[NB_BANDS] = {0}; @@ -173,32 +166,6 @@ void interp_band_gain(float *g, const float *bandE) { } } } -#else -void compute_band_energy(float *bandE, const kiss_fft_cpx *X) { - int i; - for (i=0;i<NB_BANDS;i++) - { - int j; - opus_val32 sum = 0; - for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) { - sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r); - sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i); - } - bandE[i] = sum; - } -} - -void interp_band_gain(float *g, const float *bandE) { - int i; - memset(g, 0, FREQ_SIZE); - for (i=0;i<NB_BANDS;i++) - { - int j; - for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) - g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i]; - } -} -#endif CommonState common; From 3fe3790aa788466d8feb86ad67bbad6eb53a63de Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Wed, 24 Apr 2019 20:04:33 -0400 Subject: [PATCH 14/15] Fix to RNN file reader to set _size correctly. --- src/rnn_reader.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rnn_reader.c b/src/rnn_reader.c index f71ead04..2a031db1 100644 --- a/src/rnn_reader.c +++ b/src/rnn_reader.c @@ -111,6 +111,7 @@ RNNModel *rnnoise_model_from_file(FILE *f) #define INPUT_DENSE(name) do { \ INPUT_VAL(name->nb_inputs); \ INPUT_VAL(name->nb_neurons); \ + ret->name ## _size = name->nb_neurons; \ INPUT_ACTIVATION(name->activation); \ INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ INPUT_ARRAY(name->bias, name->nb_neurons); \ @@ -119,6 +120,7 @@ RNNModel *rnnoise_model_from_file(FILE *f) #define INPUT_GRU(name) do { \ INPUT_VAL(name->nb_inputs); \ INPUT_VAL(name->nb_neurons); \ + ret->name ## _size = name->nb_neurons; \ INPUT_ACTIVATION(name->activation); \ INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \ INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \ From 2ab62533e63cdf4e4be94a21a80296123bdbaa64 Mon Sep 17 00:00:00 2001 From: Gregor Richards <hg-yff@gregor.im> Date: Wed, 24 Apr 2019 20:04:52 -0400 Subject: [PATCH 15/15] Added input of model file to the demo. --- README | 2 +- examples/rnnoise_demo.c | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/README b/README index 88fc79cd..03697801 100644 --- a/README +++ b/README @@ -12,6 +12,6 @@ While it is meant to be used as a library, a simple command-line tool is provided as an example. It operates on RAW 16-bit (machine endian) mono PCM files sampled at 48 kHz. It can be used as: -./examples/rnnoise_demo <number of channels> <maximum attenuation> < input.raw > output.raw +./examples/rnnoise_demo <number of channels> <maximum attenuation> [model file] < input.raw > output.raw The output is also a 16-bit raw PCM file. diff --git a/examples/rnnoise_demo.c b/examples/rnnoise_demo.c index 9c398ba5..95c2be45 100644 --- a/examples/rnnoise_demo.c +++ b/examples/rnnoise_demo.c @@ -44,7 +44,7 @@ int main(int argc, char **argv) { DenoiseState **sts; float max_attenuation; if (argc < 3) { - fprintf(stderr, "usage: %s <channels> <max attenuation dB>\n", argv[0]); + fprintf(stderr, "usage: %s <channels> <max attenuation dB> [model file]\n", argv[0]); return 1; } @@ -52,6 +52,21 @@ int main(int argc, char **argv) { if (channels < 1) channels = 1; max_attenuation = pow(10, -atof(argv[2])/10); + if (argc >= 4) { + FILE *model_file = fopen(argv[3], "r"); + if (!model_file) { + perror(argv[3]); + return 1; + } + model = rnnoise_model_from_file(model_file); + fprintf(stderr, "\n\n\n%p\n\n\n", model); + if (!model) { + perror(argv[3]); + return 1; + } + fclose(model_file); + } + sts = malloc(channels * sizeof(DenoiseState *)); if (!sts) { perror("malloc"); @@ -85,5 +100,7 @@ int main(int argc, char **argv) { rnnoise_destroy(sts[i]); free(tmp); free(sts); + if (model) + rnnoise_model_free(model); return 0; }