diff --git a/src/Kconfig.projbuild b/src/Kconfig.projbuild new file mode 100644 index 0000000..fbf6a4f --- /dev/null +++ b/src/Kconfig.projbuild @@ -0,0 +1,15 @@ +menu "OpenAI Board Configuration" + + choice OPENAI_BOARD_TYPE + prompt "openai board type" + default OPENAI_BOARD_ESP32_S3 + + config OPENAI_BOARD_ESP32_S3 + bool "default demo board ESP32-S3" + + config OPENAI_BOARD_M5_ATOMS3R + bool "M5Stack ATOMS3R with EchoBase" + + endchoice + +endmenu diff --git a/src/idf_component.yml b/src/idf_component.yml index a494de4..c3078df 100644 --- a/src/idf_component.yml +++ b/src/idf_component.yml @@ -1,3 +1,4 @@ dependencies: + espressif/es8311: "^1.0.0~1" idf: version: ">=4.1.0" diff --git a/src/media.cpp b/src/media.cpp index 3ca44c0..0050d73 100644 --- a/src/media.cpp +++ b/src/media.cpp @@ -4,9 +4,13 @@ #include "main.h" #define OPUS_OUT_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode + +#if CONFIG_OPENAI_BOARD_ESP32_S3 // Default ESP32-S3 board config #define SAMPLE_RATE 8000 #define BUFFER_SAMPLES 320 +#define I2S_DATA_OUT_PORT I2S_NUM_0 +#define I2S_DATA_IN_PORT I2S_NUM_1 #define MCLK_PIN 0 #define DAC_BCLK_PIN 15 #define DAC_LRCLK_PIN 16 @@ -14,11 +18,32 @@ #define ADC_BCLK_PIN 38 #define ADC_LRCLK_PIN 39 #define ADC_DATA_PIN 40 +#elif CONFIG_OPENAI_BOARD_M5_ATOMS3R // ATOMS3R board with Atomic EchoBase +#include "es8311.h" +#define SAMPLE_RATE 16000 //! EchoBase not support 8K sample rate :) +#define BUFFER_SAMPLES (320 * 2) + +#define I2C_PORT I2C_NUM_1 +#define I2C_FREQ_HZ 400000 +#define I2C_SCL_PIN 39 +#define I2C_SDA_PIN 38 + +#define I2S_DATA_OUT_PORT I2S_NUM_1 +#define I2S_DATA_IN_PORT I2S_DATA_OUT_PORT +#define MCLK_PIN -1 +#define DAC_BCLK_PIN 8 +#define DAC_LRCLK_PIN 6 +#define DAC_DATA_PIN 5 +#define ADC_DATA_PIN 7 + +es8311_handle_t es8311_handle = nullptr; +#endif #define OPUS_ENCODER_BITRATE 30000 #define OPUS_ENCODER_COMPLEXITY 0 void oai_init_audio_capture() { +#if CONFIG_OPENAI_BOARD_ESP32_S3 i2s_config_t i2s_config_out = { .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX), .sample_rate = SAMPLE_RATE, @@ -31,7 +56,7 @@ void oai_init_audio_capture() { .use_apll = 1, .tx_desc_auto_clear = true, }; - if (i2s_driver_install(I2S_NUM_0, &i2s_config_out, 0, NULL) != ESP_OK) { + if (i2s_driver_install(I2S_DATA_OUT_PORT, &i2s_config_out, 0, NULL) != ESP_OK) { printf("Failed to configure I2S driver for audio output"); return; } @@ -43,11 +68,11 @@ void oai_init_audio_capture() { .data_out_num = DAC_DATA_PIN, .data_in_num = I2S_PIN_NO_CHANGE, }; - if (i2s_set_pin(I2S_NUM_0, &pin_config_out) != ESP_OK) { + if (i2s_set_pin(I2S_DATA_OUT_PORT, &pin_config_out) != ESP_OK) { printf("Failed to set I2S pins for audio output"); return; } - i2s_zero_dma_buffer(I2S_NUM_0); + i2s_zero_dma_buffer(I2S_DATA_OUT_PORT); i2s_config_t i2s_config_in = { .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), @@ -60,7 +85,7 @@ void oai_init_audio_capture() { .dma_buf_len = BUFFER_SAMPLES, .use_apll = 1, }; - if (i2s_driver_install(I2S_NUM_1, &i2s_config_in, 0, NULL) != ESP_OK) { + if (i2s_driver_install(I2S_DATA_IN_PORT, &i2s_config_in, 0, NULL) != ESP_OK) { printf("Failed to configure I2S driver for audio input"); return; } @@ -72,10 +97,78 @@ void oai_init_audio_capture() { .data_out_num = I2S_PIN_NO_CHANGE, .data_in_num = ADC_DATA_PIN, }; - if (i2s_set_pin(I2S_NUM_1, &pin_config_in) != ESP_OK) { + if (i2s_set_pin(I2S_DATA_IN_PORT, &pin_config_in) != ESP_OK) { printf("Failed to set I2S pins for audio input"); return; } +#elif CONFIG_OPENAI_BOARD_M5_ATOMS3R + i2c_config_t conf = { + .mode = I2C_MODE_MASTER, + .sda_io_num = I2C_SDA_PIN, + .scl_io_num = I2C_SCL_PIN, + .sda_pullup_en = GPIO_PULLUP_ENABLE, + .scl_pullup_en = GPIO_PULLUP_ENABLE, + .master = + { + .clk_speed = I2C_FREQ_HZ, + } + }; + + i2c_param_config(I2C_PORT, &conf); + i2c_driver_install(I2C_PORT, conf.mode, 0, 0, 0); + + es8311_handle = es8311_create(I2C_PORT, ES8311_ADDRRES_0); + if (es8311_handle == nullptr) { + printf("Failed to create ES8311 handle"); + return; + } + + es8311_clock_config_t clk_cfg = { + .mclk_inverted = false, + .sclk_inverted = false, + .mclk_from_mclk_pin = false, + .sample_frequency = SAMPLE_RATE, + }; + + if (es8311_init(es8311_handle, &clk_cfg, ES8311_RESOLUTION_32, + ES8311_RESOLUTION_32) != ESP_OK) { + printf("Failed to initialize ES8311"); + return; + } + + es8311_voice_volume_set(es8311_handle, 80, NULL); + es8311_microphone_config(es8311_handle, false); + + i2s_config_t i2s_config = { + .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX | I2S_MODE_RX), + .sample_rate = SAMPLE_RATE, + .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, + .channel_format = I2S_CHANNEL_FMT_ALL_LEFT, //! Important for EchoBase + .communication_format = I2S_COMM_FORMAT_I2S, + .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, + .dma_buf_count = 8, + .dma_buf_len = BUFFER_SAMPLES, + .use_apll = 1, + .tx_desc_auto_clear = true, + }; + if (i2s_driver_install(I2S_DATA_IN_PORT, &i2s_config, 0, NULL) != ESP_OK) { + printf("Failed to configure I2S driver for audio input/output"); + return; + } + + i2s_pin_config_t pin_config_out = { + .mck_io_num = MCLK_PIN, + .bck_io_num = DAC_BCLK_PIN, + .ws_io_num = DAC_LRCLK_PIN, + .data_out_num = DAC_DATA_PIN, + .data_in_num = ADC_DATA_PIN, + }; + if (i2s_set_pin(I2S_DATA_IN_PORT, &pin_config_out) != ESP_OK) { + printf("Failed to set I2S pins for audio output"); + return; + } + i2s_zero_dma_buffer(I2S_DATA_IN_PORT); +#endif } opus_int16 *output_buffer = NULL; @@ -98,7 +191,7 @@ void oai_audio_decode(uint8_t *data, size_t size) { if (decoded_size > 0) { size_t bytes_written = 0; - i2s_write(I2S_NUM_0, output_buffer, BUFFER_SAMPLES * sizeof(opus_int16), + i2s_write(I2S_DATA_OUT_PORT, output_buffer, BUFFER_SAMPLES * sizeof(opus_int16), &bytes_written, portMAX_DELAY); } } @@ -132,7 +225,7 @@ void oai_init_audio_encoder() { void oai_send_audio(PeerConnection *peer_connection) { size_t bytes_read = 0; - i2s_read(I2S_NUM_1, encoder_input_buffer, BUFFER_SAMPLES, &bytes_read, + i2s_read(I2S_DATA_IN_PORT, encoder_input_buffer, BUFFER_SAMPLES, &bytes_read, portMAX_DELAY); auto encoded_size = diff --git a/src/webrtc.cpp b/src/webrtc.cpp index e41b284..422ecc7 100644 --- a/src/webrtc.cpp +++ b/src/webrtc.cpp @@ -37,10 +37,19 @@ static void oai_onconnectionstatechange_task(PeerConnectionState state, #endif } else if (state == PEER_CONNECTION_CONNECTED) { #ifndef LINUX_BUILD +#if CONFIG_OPENAI_BOARD_ESP32_S3 StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( 20000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); xTaskCreateStaticPinnedToCore(oai_send_audio_task, "audio_publisher", 20000, NULL, 7, stack_memory, &task_buffer, 0); +#elif CONFIG_OPENAI_BOARD_M5_ATOMS3R + // Because we change the sampling rate to 16K, so we need increased the + // memory size, if not will overflow :) + StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( + 40000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); + xTaskCreateStaticPinnedToCore(oai_send_audio_task, "audio_publisher", 40000, + NULL, 7, stack_memory, &task_buffer, 0); +#endif #endif } }