|
| 1 | +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. |
| 14 | +==============================================================================*/ |
| 15 | + |
| 16 | +#include "audio_provider.h" |
| 17 | + |
| 18 | +#include <cstdlib> |
| 19 | +#include <cstring> |
| 20 | + |
| 21 | +// FreeRTOS.h must be included before some of the following dependencies. |
| 22 | +// Solves b/150260343. |
| 23 | +// clang-format off |
| 24 | +#include "freertos/FreeRTOS.h" |
| 25 | +// clang-format on |
| 26 | + |
| 27 | +#include "driver/i2s.h" |
| 28 | +#include "esp_log.h" |
| 29 | +#include "esp_spi_flash.h" |
| 30 | +#include "esp_system.h" |
| 31 | +#include "esp_timer.h" |
| 32 | +#include "freertos/task.h" |
| 33 | +#include "ringbuf.h" |
| 34 | +#include "micro_model_settings.h" |
| 35 | + |
| 36 | +using namespace std; |
| 37 | + |
| 38 | +#define NO_I2S_SUPPORT CONFIG_IDF_TARGET_ESP32C2 || \ |
| 39 | + (CONFIG_IDF_TARGET_ESP32C3 \ |
| 40 | + && (ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0))) |
| 41 | + |
| 42 | +static const char* TAG = "TF_LITE_AUDIO_PROVIDER"; |
| 43 | +/* ringbuffer to hold the incoming audio data */ |
| 44 | +ringbuf_t* g_audio_capture_buffer; |
| 45 | +volatile int32_t g_latest_audio_timestamp = 0; |
| 46 | +/* model requires 20ms new data from g_audio_capture_buffer and 10ms old data |
| 47 | + * each time , storing old data in the histrory buffer , { |
| 48 | + * history_samples_to_keep = 10 * 16 } */ |
| 49 | +constexpr int32_t history_samples_to_keep = |
| 50 | + ((kFeatureSliceDurationMs - kFeatureSliceStrideMs) * |
| 51 | + (kAudioSampleFrequency / 1000)); |
| 52 | +/* new samples to get each time from ringbuffer, { new_samples_to_get = 20 * 16 |
| 53 | + * } */ |
| 54 | +constexpr int32_t new_samples_to_get = |
| 55 | + (kFeatureSliceStrideMs * (kAudioSampleFrequency / 1000)); |
| 56 | + |
| 57 | +namespace { |
| 58 | +int16_t g_audio_output_buffer[kMaxAudioSampleSize]; |
| 59 | +bool g_is_audio_initialized = false; |
| 60 | +int16_t g_history_buffer[history_samples_to_keep]; |
| 61 | +} // namespace |
| 62 | + |
| 63 | +const int32_t kAudioCaptureBufferSize = 80000; |
| 64 | +const int32_t i2s_bytes_to_read = 3200; |
| 65 | + |
| 66 | +#if NO_I2S_SUPPORT |
| 67 | + // nothing to be done here |
| 68 | +#else |
| 69 | +static void i2s_init(void) { |
| 70 | + // Start listening for audio: MONO @ 16KHz |
| 71 | + i2s_config_t i2s_config = { |
| 72 | + .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX), |
| 73 | + .sample_rate = 16000, |
| 74 | + .bits_per_sample = (i2s_bits_per_sample_t)16, |
| 75 | + .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, |
| 76 | + .communication_format = I2S_COMM_FORMAT_I2S, |
| 77 | + .intr_alloc_flags = 0, |
| 78 | + .dma_buf_count = 3, |
| 79 | + .dma_buf_len = 300, |
| 80 | + .use_apll = false, |
| 81 | + .tx_desc_auto_clear = false, |
| 82 | + .fixed_mclk = -1, |
| 83 | + }; |
| 84 | + i2s_pin_config_t pin_config = { |
| 85 | + .bck_io_num = 26, // IIS_SCLK |
| 86 | + .ws_io_num = 32, // IIS_LCLK |
| 87 | + .data_out_num = -1, // IIS_DSIN |
| 88 | + .data_in_num = 33, // IIS_DOUT |
| 89 | + }; |
| 90 | + esp_err_t ret = 0; |
| 91 | + ret = i2s_driver_install((i2s_port_t)1, &i2s_config, 0, NULL); |
| 92 | + if (ret != ESP_OK) { |
| 93 | + ESP_LOGE(TAG, "Error in i2s_driver_install"); |
| 94 | + } |
| 95 | + ret = i2s_set_pin((i2s_port_t)1, &pin_config); |
| 96 | + if (ret != ESP_OK) { |
| 97 | + ESP_LOGE(TAG, "Error in i2s_set_pin"); |
| 98 | + } |
| 99 | + |
| 100 | + ret = i2s_zero_dma_buffer((i2s_port_t)1); |
| 101 | + if (ret != ESP_OK) { |
| 102 | + ESP_LOGE(TAG, "Error in initializing dma buffer with 0"); |
| 103 | + } |
| 104 | +} |
| 105 | +#endif |
| 106 | + |
| 107 | +static void CaptureSamples(void* arg) { |
| 108 | +#if NO_I2S_SUPPORT |
| 109 | + ESP_LOGE(TAG, "i2s support not available on C3 chip for IDF < 4.4.0"); |
| 110 | + return; |
| 111 | +#else |
| 112 | + size_t bytes_read = i2s_bytes_to_read; |
| 113 | + uint8_t i2s_read_buffer[i2s_bytes_to_read] = {}; |
| 114 | + i2s_init(); |
| 115 | + while (1) { |
| 116 | + /* read 100ms data at once from i2s */ |
| 117 | + i2s_read((i2s_port_t)1, (void*)i2s_read_buffer, i2s_bytes_to_read, |
| 118 | + &bytes_read, pdMS_TO_TICKS(100)); |
| 119 | + if (bytes_read <= 0) { |
| 120 | + ESP_LOGE(TAG, "Error in I2S read : %d", bytes_read); |
| 121 | + } else { |
| 122 | + if (bytes_read < i2s_bytes_to_read) { |
| 123 | + ESP_LOGW(TAG, "Partial I2S read"); |
| 124 | + } |
| 125 | + /* write bytes read by i2s into ring buffer */ |
| 126 | + int bytes_written = rb_write(g_audio_capture_buffer, |
| 127 | + (uint8_t*)i2s_read_buffer, bytes_read, pdMS_TO_TICKS(100)); |
| 128 | + /* update the timestamp (in ms) to let the model know that new data has |
| 129 | + * arrived */ |
| 130 | + g_latest_audio_timestamp = g_latest_audio_timestamp + |
| 131 | + ((1000 * (bytes_written / 2)) / kAudioSampleFrequency); |
| 132 | + if (bytes_written <= 0) { |
| 133 | + ESP_LOGE(TAG, "Could Not Write in Ring Buffer: %d ", bytes_written); |
| 134 | + } else if (bytes_written < bytes_read) { |
| 135 | + ESP_LOGW(TAG, "Partial Write"); |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | +#endif |
| 140 | + vTaskDelete(NULL); |
| 141 | +} |
| 142 | + |
| 143 | +TfLiteStatus InitAudioRecording() { |
| 144 | + g_audio_capture_buffer = rb_init("tf_ringbuffer", kAudioCaptureBufferSize); |
| 145 | + if (!g_audio_capture_buffer) { |
| 146 | + ESP_LOGE(TAG, "Error creating ring buffer"); |
| 147 | + return kTfLiteError; |
| 148 | + } |
| 149 | + /* create CaptureSamples Task which will get the i2s_data from mic and fill it |
| 150 | + * in the ring buffer */ |
| 151 | + xTaskCreate(CaptureSamples, "CaptureSamples", 1024 * 32, NULL, 10, NULL); |
| 152 | + while (!g_latest_audio_timestamp) { |
| 153 | + vTaskDelay(1); // one tick delay to avoid watchdog |
| 154 | + } |
| 155 | + ESP_LOGI(TAG, "Audio Recording started"); |
| 156 | + return kTfLiteOk; |
| 157 | +} |
| 158 | + |
| 159 | +TfLiteStatus GetAudioSamples(int start_ms, int duration_ms, |
| 160 | + int* audio_samples_size, int16_t** audio_samples) { |
| 161 | + if (!g_is_audio_initialized) { |
| 162 | + TfLiteStatus init_status = InitAudioRecording(); |
| 163 | + if (init_status != kTfLiteOk) { |
| 164 | + return init_status; |
| 165 | + } |
| 166 | + g_is_audio_initialized = true; |
| 167 | + } |
| 168 | + /* copy 160 samples (320 bytes) into output_buff from history */ |
| 169 | + memcpy((void*)(g_audio_output_buffer), (void*)(g_history_buffer), |
| 170 | + history_samples_to_keep * sizeof(int16_t)); |
| 171 | + |
| 172 | + /* copy 320 samples (640 bytes) from rb at ( int16_t*(g_audio_output_buffer) + |
| 173 | + * 160 ), first 160 samples (320 bytes) will be from history */ |
| 174 | + int bytes_read = |
| 175 | + rb_read(g_audio_capture_buffer, |
| 176 | + ((uint8_t*)(g_audio_output_buffer + history_samples_to_keep)), |
| 177 | + new_samples_to_get * sizeof(int16_t), pdMS_TO_TICKS(100)); |
| 178 | + if (bytes_read < 0) { |
| 179 | + ESP_LOGE(TAG, " Model Could not read data from Ring Buffer"); |
| 180 | + } else if (bytes_read < new_samples_to_get * sizeof(int16_t)) { |
| 181 | + ESP_LOGD(TAG, "RB FILLED RIGHT NOW IS %d", |
| 182 | + rb_filled(g_audio_capture_buffer)); |
| 183 | + ESP_LOGD(TAG, " Partial Read of Data by Model "); |
| 184 | + ESP_LOGV(TAG, " Could only read %d bytes when required %d bytes ", |
| 185 | + bytes_read, (int) (new_samples_to_get * sizeof(int16_t))); |
| 186 | + } |
| 187 | + |
| 188 | + /* copy 320 bytes from output_buff into history */ |
| 189 | + memcpy((void*)(g_history_buffer), |
| 190 | + (void*)(g_audio_output_buffer + new_samples_to_get), |
| 191 | + history_samples_to_keep * sizeof(int16_t)); |
| 192 | + |
| 193 | + *audio_samples_size = kMaxAudioSampleSize; |
| 194 | + *audio_samples = g_audio_output_buffer; |
| 195 | + return kTfLiteOk; |
| 196 | +} |
| 197 | + |
| 198 | +int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; } |
0 commit comments