mtmd : add video input support (#24269)

* wip

* ok: lazy bitmap API

* remember to free lazy text

* wip

* add mtmd_helper_video

* support video input on server (base64 input)

* add MTMD_VIDEO config

* add timestamp

* update CLI

* cli: allow auto-completion for video

* add --video arg

* fix build

* update docs

* rename as suggested
This commit is contained in:
Xuan-Son Nguyen
2026-06-08 13:40:12 +02:00
committed by GitHub
parent c2b1518fd4
commit 8f83d6c271
16 changed files with 807 additions and 77 deletions
+7 -2
View File
@@ -235,7 +235,7 @@ struct cli_context {
};
// TODO?: Make this reusable, enums, docs
static const std::array<std::string_view, 7> cmds = {
static const std::array<std::string_view, 8> cmds = {
"/audio ",
"/clear",
"/exit",
@@ -243,6 +243,7 @@ static const std::array<std::string_view, 7> cmds = {
"/image ",
"/read ",
"/regen",
"/video ",
};
static std::vector<std::pair<std::string, size_t>> auto_completion_callback(std::string_view line, size_t cursor_byte_pos) {
@@ -457,6 +458,9 @@ int llama_cli(int argc, char ** argv) {
if (inf.has_inp_audio) {
console::log(" /audio <file> add an audio file\n");
}
if (inf.has_inp_video) {
console::log(" /video <file> add a video file\n");
}
console::log("\n");
// interactive loop
@@ -553,7 +557,8 @@ int llama_cli(int argc, char ** argv) {
continue;
} else if (
(string_starts_with(buffer, "/image ") && inf.has_inp_image) ||
(string_starts_with(buffer, "/audio ") && inf.has_inp_audio)) {
(string_starts_with(buffer, "/audio ") && inf.has_inp_audio) ||
(string_starts_with(buffer, "/video ") && inf.has_inp_video)) {
// just in case (bad copy-paste for example), we strip all trailing/leading spaces
std::string fname = string_strip(buffer.substr(7));
std::string marker = ctx_cli.load_input_file(fname, true);