diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index 2157caec..ed0db50a 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -24,6 +24,7 @@ set(SOURCES "audio/audio_codec.cc" "display/lvgl_display/gif/lvgl_gif.cc" "display/lvgl_display/gif/gifdec.c" "display/lvgl_display/jpg/image_to_jpeg.cpp" + "display/lvgl_display/jpg/jpeg_to_image.c" "protocols/protocol.cc" "protocols/mqtt_protocol.cc" "protocols/websocket_protocol.cc" @@ -687,7 +688,8 @@ if(CONFIG_IDF_TARGET_ESP32) "audio/codecs/es8389_audio_codec.cc" "led/gpio_led.cc" "${CMAKE_CURRENT_SOURCE_DIR}/boards/common/esp32_camera.cc" - "display/lvgl_display/jpg/image_to_jpeg.cpp" + "display/lvgl_display/jpg/image_to_jpeg.cpp" + "display/lvgl_display/jpg/jpeg_to_image.c" ) endif() diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index 785412f3..2a1ddff9 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -685,6 +685,16 @@ menu "Camera Configuration" depends on !IDF_TARGET_ESP32 comment "Warning: Please read the help text before modifying these settings." + + config XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + bool "Allow JPEG Input" + default n + help + Allow JPEG Input format for the camera. + + This option may need to be enabled when using a USB camera. + + Not currently supported when used simultaneously with XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE. config XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER bool "Enable Hardware JPEG Encoder" @@ -694,6 +704,14 @@ menu "Camera Configuration" Use hardware JPEG encoder on ESP32-P4 to encode image to JPEG. See https://docs.espressif.com/projects/esp-idf/en/stable/esp32p4/api-reference/peripherals/jpeg.html for more details. + config XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER + bool "Enable Hardware JPEG Decoder" + default n + depends on SOC_JPEG_DECODE_SUPPORTED && XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + help + Use hardware JPEG decoder on ESP32-P4 to decode JPEG to image. + See https://docs.espressif.com/projects/esp-idf/en/stable/esp32p4/api-reference/peripherals/jpeg.html for more details. + config XIAOZHI_ENABLE_CAMERA_DEBUG_MODE bool "Enable Camera Debug Mode" default n @@ -702,7 +720,7 @@ menu "Camera Configuration" Only works on boards that support camera. config XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP - bool "Enable software camera buffer endianness swapping (USE WITH CAUTION)" + bool "Enable software camera buffer endianness swapping" default n depends on !CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER help @@ -717,12 +735,15 @@ menu "Camera Configuration" menuconfig XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE bool "Enable Camera Image Rotation" default n + depends on !XIAOZHI_CAMERA_ALLOW_JPEG_INPUT help Enable camera image rotation, rotate the camera image to the correct orientation. - On ESP32-P4, rotation is handled by PPA hardware. - On other chips, rotation is done in software with performance cost. - For 180° rotation, use HFlip + VFlip instead of this option. + Not currently supported when used simultaneously with XIAOZHI_CAMERA_ALLOW_JPEG_INPUT. + if XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE choice XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE prompt "Camera Image Rotation Angle (clockwise)" diff --git a/main/boards/common/esp32_camera.cc b/main/boards/common/esp32_camera.cc index f8994983..491b7569 100644 --- a/main/boards/common/esp32_camera.cc +++ b/main/boards/common/esp32_camera.cc @@ -3,24 +3,31 @@ #include #include #include +#include +#include +#include +#include #include "esp_imgfx_color_convert.h" #include "esp_video_device.h" #include "esp_video_init.h" #include "linux/videodev2.h" -#include "esp32_camera.h" #include "board.h" #include "display.h" +#include "esp32_camera.h" +#include "esp_jpeg_common.h" +#include "jpg/image_to_jpeg.h" +#include "jpg/jpeg_to_image.h" #include "lvgl_display.h" #include "mcp_server.h" #include "system_info.h" -#include "jpg/image_to_jpeg.h" #ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE #undef LOG_LOCAL_LEVEL #define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG) #endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE +#include // should be after LOCAL_LOG_LEVEL definition #ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE #ifdef CONFIG_IDF_TARGET_ESP32P4 @@ -44,11 +51,6 @@ #endif // target #endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE -#include -#include -#include -#include -#include #define TAG "Esp32Camera" @@ -128,7 +130,7 @@ Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) { #endif #if CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE else if (config.usb_uvc != nullptr) { - video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(config.usb_uvc->uvc.uvc_dev_num); + video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(0); } #endif @@ -196,7 +198,7 @@ Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) { case V4L2_PIX_FMT_RGB565: return 1; #ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER -case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式 + case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式 return 2; #endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER case V4L2_PIX_FMT_GREY: @@ -209,17 +211,21 @@ case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式 auto get_rank = [](uint32_t fmt) -> int { switch (fmt) { case V4L2_PIX_FMT_YUV422P: - return 0; + return 10; case V4L2_PIX_FMT_RGB565: - return 1; + return 11; case V4L2_PIX_FMT_RGB24: - return 2; + return 12; #ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER case V4L2_PIX_FMT_YUV420: - return 3; + return 13; #endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_JPEG: + return 5; +#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT case V4L2_PIX_FMT_GREY: - return 4; + return 20; default: return 1 << 29; // unsupported } @@ -404,7 +410,7 @@ bool Esp32Camera::Capture() { frame_.len = buf.bytesused; frame_.data = (uint8_t*)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); if (!frame_.data) { - ESP_LOGE(TAG, "alloc frame copy failed"); + ESP_LOGE(TAG, "alloc frame copy failed: need allocate %d bytes", buf.bytesused); if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); } @@ -427,6 +433,9 @@ bool Esp32Camera::Capture() { case V4L2_PIX_FMT_YUYV: case V4L2_PIX_FMT_YUV420: case V4L2_PIX_FMT_GREY: +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_JPEG: +#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT #ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP { auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; @@ -791,6 +800,33 @@ bool Esp32Camera::Capture() { lvgl_image_size = frame_.len; // fallthrough 时兼顾 YUYV 与 RGB565 break; +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_JPEG: { + uint8_t* out_data = nullptr; // out data is allocated by jpeg_to_image + size_t out_len = 0; + size_t out_width = 0; + size_t out_height = 0; + size_t out_stride = 0; + + esp_err_t ret = + jpeg_to_image(frame_.data, frame_.len, &out_data, &out_len, &out_width, &out_height, &out_stride); + if (ret != ESP_OK) { + ESP_LOGE(TAG, "Failed to decode JPEG image: %d (%s)", (int)ret, esp_err_to_name(ret)); + if (out_data) { + heap_caps_free(out_data); + out_data = nullptr; + } + return false; + } + + data = out_data; + w = out_width; + h = out_height; + lvgl_image_size = out_len; + stride = out_stride; + break; + } +#endif default: ESP_LOGE(TAG, "unsupported frame format: 0x%08lx", frame_.format); return false; @@ -876,16 +912,31 @@ std::string Esp32Camera::Explain(const std::string& question) { uint16_t w = frame_.width ? frame_.width : 320; uint16_t h = frame_.height ? frame_.height : 240; v4l2_pix_fmt_t enc_fmt = frame_.format; - image_to_jpeg_cb( + bool ok = image_to_jpeg_cb( frame_.data, frame_.len, w, h, enc_fmt, 80, [](void* arg, size_t index, const void* data, size_t len) -> size_t { - auto jpeg_queue = (QueueHandle_t)arg; - JpegChunk chunk = {.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM), .len = len}; - memcpy(chunk.data, data, len); + auto jpeg_queue = static_cast(arg); + JpegChunk chunk = {.data = nullptr, .len = len}; + if (index == 0 && data != nullptr && len > 0) { + chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (chunk.data == nullptr) { + ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len); + chunk.len = 0; + } else { + memcpy(chunk.data, data, len); + } + } else { + chunk.len = 0; // Sentinel or error + } xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); return len; }, jpeg_queue); + + if (!ok) { + JpegChunk chunk = {.data = nullptr, .len = 0}; + xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); + } }); auto network = Board::GetInstance().GetNetwork(); @@ -938,6 +989,7 @@ std::string Esp32Camera::Explain(const std::string& question) { // 第三块:JPEG数据 size_t total_sent = 0; + bool saw_terminator = false; while (true) { JpegChunk chunk; if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) { @@ -945,6 +997,7 @@ std::string Esp32Camera::Explain(const std::string& question) { break; } if (chunk.data == nullptr) { + saw_terminator = true; break; // The last chunk } http->Write((const char*)chunk.data, chunk.len); @@ -956,6 +1009,11 @@ std::string Esp32Camera::Explain(const std::string& question) { // 清理队列 vQueueDelete(jpeg_queue); + if (!saw_terminator || total_sent == 0) { + ESP_LOGE(TAG, "JPEG encoder failed or produced empty output"); + throw std::runtime_error("Failed to encode image to JPEG"); + } + { // 第四块:multipart尾部 std::string multipart_footer; diff --git a/main/display/lvgl_display/jpg/image_to_jpeg.cpp b/main/display/lvgl_display/jpg/image_to_jpeg.cpp index bf38eff9..9192e350 100644 --- a/main/display/lvgl_display/jpg/image_to_jpeg.cpp +++ b/main/display/lvgl_display/jpg/image_to_jpeg.cpp @@ -426,6 +426,19 @@ static bool encode_with_esp_new_jpeg(const uint8_t* src, size_t src_len, uint16_ bool image_to_jpeg(uint8_t* src, size_t src_len, uint16_t width, uint16_t height, v4l2_pix_fmt_t format, uint8_t quality, uint8_t** out, size_t* out_len) { +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + if (format == V4L2_PIX_FMT_JPEG) { + uint8_t * out_data = (uint8_t*)heap_caps_malloc(src_len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (!out_data) { + ESP_LOGE(TAG, "Failed to allocate memory for JPEG output"); + return false; + } + memcpy(out_data, src, src_len); + *out = out_data; + *out_len = src_len; + return true; + } +#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT #if CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER if (encode_with_hw_jpeg(src, src_len, width, height, format, quality, out, out_len, NULL, NULL)) { return true; @@ -437,6 +450,13 @@ bool image_to_jpeg(uint8_t* src, size_t src_len, uint16_t width, uint16_t height bool image_to_jpeg_cb(uint8_t* src, size_t src_len, uint16_t width, uint16_t height, v4l2_pix_fmt_t format, uint8_t quality, jpg_out_cb cb, void* arg) { +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + if (format == V4L2_PIX_FMT_JPEG) { + cb(arg, 0, src, src_len); + cb(arg, 1, nullptr, 0); // end signal + return true; + } +#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT #if CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER if (encode_with_hw_jpeg(src, src_len, width, height, format, quality, NULL, NULL, cb, arg)) { return true; diff --git a/main/display/lvgl_display/jpg/jpeg_to_image.c b/main/display/lvgl_display/jpg/jpeg_to_image.c new file mode 100644 index 00000000..da924555 --- /dev/null +++ b/main/display/lvgl_display/jpg/jpeg_to_image.c @@ -0,0 +1,264 @@ +#include +#include +#include +#include + +#include "esp_jpeg_common.h" +#include "esp_jpeg_dec.h" + +#include "jpeg_to_image.h" + +#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE +#undef LOG_LOCAL_LEVEL +#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG) +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE +#include + +#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER +#include "driver/jpeg_decode.h" +#endif + +#define TAG "jpeg_to_image" + +static esp_err_t decode_with_new_jpeg(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, size_t* width, + size_t* height, size_t* stride) { + ESP_LOGD(TAG, "Decoding JPEG with software decoder"); + esp_err_t ret = ESP_OK; + jpeg_error_t jpeg_ret = JPEG_ERR_OK; + uint8_t* out_buf = NULL; + jpeg_dec_io_t jpeg_io = {0}; + jpeg_dec_header_info_t out_info = {0}; + + jpeg_dec_config_t config = DEFAULT_JPEG_DEC_CONFIG(); + config.output_type = JPEG_PIXEL_FORMAT_RGB565_LE; + config.rotate = JPEG_ROTATE_0D; + + jpeg_dec_handle_t jpeg_dec = NULL; + jpeg_ret = jpeg_dec_open(&config, &jpeg_dec); + if (jpeg_ret != JPEG_ERR_OK) { + ESP_LOGE(TAG, "Failed to open JPEG decoder"); + ret = ESP_FAIL; + goto jpeg_dec_failed; + } + + jpeg_io.inbuf = (uint8_t*)src; + jpeg_io.inbuf_len = (int)src_len; + + jpeg_ret = jpeg_dec_parse_header(jpeg_dec, &jpeg_io, &out_info); + if (jpeg_ret != JPEG_ERR_OK) { + ESP_LOGE(TAG, "Failed to parse JPEG header"); + ret = ESP_ERR_INVALID_ARG; + goto jpeg_dec_failed; + } + + ESP_LOGD(TAG, "JPEG header info: width=%d, height=%d", out_info.width, out_info.height); + + out_buf = jpeg_calloc_align(out_info.width * out_info.height * 2, 16); + if (out_buf == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for JPEG output buffer"); + ret = ESP_ERR_NO_MEM; + goto jpeg_dec_failed; + } + + jpeg_io.outbuf = out_buf; + jpeg_ret = jpeg_dec_process(jpeg_dec, &jpeg_io); + if (jpeg_ret != JPEG_ERR_OK) { + ESP_LOGE(TAG, "Failed to decode JPEG"); + ret = ESP_FAIL; + goto jpeg_dec_failed; + } + + ESP_LOG_BUFFER_HEXDUMP(TAG, out_buf, MIN(out_info.width * out_info.height * 2, 256), ESP_LOG_DEBUG); + + *out = out_buf; + out_buf = NULL; + *out_len = (size_t)(out_info.width * out_info.height * 2); + *width = (size_t)out_info.width; + *height = (size_t)out_info.height; + *stride = (size_t)out_info.width * 2; + jpeg_dec_close(jpeg_dec); + jpeg_dec = NULL; + + return ret; + +jpeg_dec_failed: + if (jpeg_dec) { + jpeg_dec_close(jpeg_dec); + jpeg_dec = NULL; + } + if (out_buf) { + jpeg_free_align(out_buf); + out_buf = NULL; + } + + *out = NULL; + *out_len = 0; + *width = 0; + *height = 0; + *stride = 0; + return ret; +} + +#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER +static esp_err_t decode_with_hardware_jpeg(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, + size_t* width, size_t* height, size_t* stride) { + ESP_LOGD(TAG, "Decoding JPEG with hardware decoder"); + esp_err_t ret = ESP_OK; + + jpeg_decoder_handle_t jpeg_dec = NULL; + uint8_t* bit_stream = NULL; + uint8_t* out_buf = NULL; + size_t out_buf_len = 0; + size_t tx_buffer_size = 0; + size_t rx_buffer_size = 0; + + jpeg_decode_engine_cfg_t eng_cfg = { + .intr_priority = 1, + .timeout_ms = 1000, + }; + + jpeg_decode_cfg_t decode_cfg_rgb = { + .output_format = JPEG_DECODE_OUT_FORMAT_RGB565, + .rgb_order = JPEG_DEC_RGB_ELEMENT_ORDER_BGR, + }; + + ret = jpeg_new_decoder_engine(&eng_cfg, &jpeg_dec); + if (ret != ESP_OK) { + ESP_LOGE(TAG, "Failed to create JPEG decoder engine"); + goto jpeg_hw_dec_failed; + } + + jpeg_decode_memory_alloc_cfg_t tx_mem_cfg = { + .buffer_direction = JPEG_DEC_ALLOC_INPUT_BUFFER, + }; + + jpeg_decode_memory_alloc_cfg_t rx_mem_cfg = { + .buffer_direction = JPEG_DEC_ALLOC_OUTPUT_BUFFER, + }; + + bit_stream = (uint8_t*)jpeg_alloc_decoder_mem(src_len, &tx_mem_cfg, &tx_buffer_size); + if (bit_stream == NULL || tx_buffer_size < src_len) { + ESP_LOGE(TAG, "Failed to allocate memory for JPEG bit stream"); + ret = ESP_ERR_NO_MEM; + goto jpeg_hw_dec_failed; + } + + memcpy(bit_stream, src, src_len); + + jpeg_decode_picture_info_t header_info; + ESP_GOTO_ON_ERROR(jpeg_decoder_get_info(bit_stream, src_len, &header_info), jpeg_hw_dec_failed, TAG, + "Failed to get JPEG header info"); + + ESP_LOGD(TAG, "JPEG header info: width=%d, height=%d, sample_method=%d", header_info.width, header_info.height, + (int)header_info.sample_method); + + switch (header_info.sample_method) { + case JPEG_DOWN_SAMPLING_GRAY: + case JPEG_DOWN_SAMPLING_YUV444: + out_buf_len = header_info.width * header_info.height * 2; + *stride = header_info.width * 2; + break; + case JPEG_DOWN_SAMPLING_YUV422: + case JPEG_DOWN_SAMPLING_YUV420: + out_buf_len = ((header_info.width + 15) & ~15) * ((header_info.height + 15) & ~15) * 2; + *stride = ((header_info.width + 15) & ~15) * 2; + break; + default: + ESP_LOGE(TAG, "Unsupported JPEG sample method"); + ret = ESP_ERR_NOT_SUPPORTED; + goto jpeg_hw_dec_failed; + } + + out_buf = (uint8_t*)jpeg_alloc_decoder_mem(out_buf_len, &rx_mem_cfg, &rx_buffer_size); + if (out_buf == NULL || rx_buffer_size < out_buf_len) { + ESP_LOGE(TAG, "Failed to allocate memory for JPEG output buffer"); + ret = ESP_ERR_NO_MEM; + goto jpeg_hw_dec_failed; + } + + uint32_t out_size = 0; + + ESP_GOTO_ON_ERROR( + jpeg_decoder_process(jpeg_dec, &decode_cfg_rgb, bit_stream, src_len, out_buf, out_buf_len, &out_size), + jpeg_hw_dec_failed, TAG, "Failed to decode JPEG"); + + ESP_LOGD(TAG, "Expected %d bytes, got %" PRIu32 " bytes", out_buf_len, out_size); + + if (out_size != out_buf_len) { + ESP_LOGE(TAG, "Decoded image size mismatch: Expected %zu bytes, got %" PRIu32 " bytes", out_buf_len, out_size); + ret = ESP_ERR_INVALID_SIZE; + goto jpeg_hw_dec_failed; + } + + if (header_info.sample_method == JPEG_DOWN_SAMPLING_GRAY) { + // convert GRAY8 to RGB565 + uint32_t i = header_info.width * header_info.height; + do { + --i; + uint8_t r = (out_buf[i] >> 3) & 0x1F; + uint8_t g = (out_buf[i] >> 2) & 0x3F; + // b is same as r + uint16_t rgb565 = (r << 11) | (g << 5) | r; + out_buf[2 * i + 1] = (rgb565 >> 8) & 0xFF; + out_buf[2 * i] = rgb565 & 0xFF; + } while (i != 0); + out_size = header_info.width * header_info.height * 2; + ESP_LOGD(TAG, "Converted GRAY8 to RGB565, new size: %zu", out_size); + } + + ESP_LOG_BUFFER_HEXDUMP(TAG, out_buf, MIN(out_size, 256), ESP_LOG_DEBUG); + + *out = out_buf; + out_buf = NULL; + *out_len = (size_t)out_size; + jpeg_del_decoder_engine(jpeg_dec); + jpeg_dec = NULL; + heap_caps_free(bit_stream); + bit_stream = NULL; + *width = header_info.width; + *height = header_info.height; + + return ret; + +jpeg_hw_dec_failed: + if (out_buf) { + heap_caps_free(out_buf); + out_buf = NULL; + } + if (bit_stream) { + heap_caps_free(bit_stream); + bit_stream = NULL; + } + if (jpeg_dec) { + jpeg_del_decoder_engine(jpeg_dec); + jpeg_dec = NULL; + } + *out = NULL; + *out_len = 0; + *width = 0; + *height = 0; + *stride = 0; + return ret; +} +#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER + +esp_err_t jpeg_to_image(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, size_t* width, + size_t* height, size_t* stride) { +#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + esp_log_level_set(TAG, ESP_LOG_DEBUG); +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + if (src == NULL || src_len == 0 || out == NULL || out_len == NULL || width == NULL || height == NULL || + stride == NULL) { + ESP_LOGE(TAG, "Invalid parameters"); + return ESP_ERR_INVALID_ARG; + } +#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER + esp_err_t ret = decode_with_hardware_jpeg(src, src_len, out, out_len, width, height, stride); + if (ret == ESP_OK) { + return ret; + } + ESP_LOGW(TAG, "Failed to decode with hardware JPEG, fallback to software decoder"); + // Fallback to esp_new_jpeg +#endif + return decode_with_new_jpeg(src, src_len, out, out_len, width, height, stride); +} diff --git a/main/display/lvgl_display/jpg/jpeg_to_image.h b/main/display/lvgl_display/jpg/jpeg_to_image.h new file mode 100644 index 00000000..b33dcef3 --- /dev/null +++ b/main/display/lvgl_display/jpg/jpeg_to_image.h @@ -0,0 +1,62 @@ +#include "sdkconfig.h" +#ifndef CONFIG_IDF_TARGET_ESP32 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Decodes a JPEG image from memory to raw RGB565 pixel data + * + * This function attempts to decode a JPEG image using hardware acceleration first (if enabled), + * falling back to a software decoder if hardware decoding fails or is unavailable. + * + * @param[in] src Pointer to the JPEG bitstream in memory + * @param[in] src_len Length of the JPEG bitstream in bytes + * @param[out] out Pointer to a buffer pointer that will be set to the decoded image data. + * This buffer is allocated internally and MUST be freed by the caller using heap_caps_free(). + * @param[out] out_len Pointer to a variable that will receive the size of the decoded image data in bytes + * @param[out] width Pointer to a variable that will receive the image width in pixels + * @param[out] height Pointer to a variable that will receive the image height in pixels + * @param[out] stride Pointer to a variable that will receive the image stride in bytes + * + * @return ESP_OK on successful decoding + * @return ESP_ERR_INVALID_ARG on invalid parameters + * @return ESP_ERR_NO_MEM on memory allocation failure + * @return ESP_FAIL on failure + * + * @attention Memory Management for `*out`: + * - The function allocates memory for the decoded image internally + * - On success, the caller takes ownership of this memory and SHOULD free it using heap_caps_free() + * - On failure, `*out` is guaranteed to be NULL and no freeing is required + * - Example usage: + * @code{.c} + * uint8_t *image = NULL; + * size_t len, width, height; + * if (jpeg_to_image(jpeg_data, jpeg_len, &image, &len, &width, &height)) { + * // Use image data... + * heap_caps_free(image); // Critical: use heap_caps_free + * } + * @endcode + * + * @note Configuration dependency: + * - When CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER is enabled, hardware acceleration is attempted first + * - Both hardware and software paths allocate memory that requires heap_caps_free() for deallocation + * - The decoded image format is always RGB565 (2 bytes per pixel) + * + * @note When using hardware decoder, the decoded image dimensions might be aligned up to 16-byte boundaries. + * For YUV420 or YUV422 compressed images, both width and height will be rounded up to the nearest multiple of 16. + * See details at + * + * + */ +esp_err_t jpeg_to_image(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, size_t* width, + size_t* height, size_t* stride); + +#ifdef __cplusplus +} +#endif + +#endif // CONFIG_IDF_TARGET_ESP32 \ No newline at end of file diff --git a/sdkconfig.defaults.esp32p4 b/sdkconfig.defaults.esp32p4 index f704b373..43a36de8 100644 --- a/sdkconfig.defaults.esp32p4 +++ b/sdkconfig.defaults.esp32p4 @@ -25,6 +25,7 @@ CONFIG_IDF_EXPERIMENTAL_FEATURES=y CONFIG_COMPILER_OPTIMIZATION_PERF=y CONFIG_ESP_VIDEO_ENABLE_ISP_PIPELINE_CONTROLLER=y +CONFIG_USB_HOST_CONTROL_TRANSFER_MAX_SIZE=1024 # LVGL Graphics CONFIG_LV_USE_SNAPSHOT=y