mtmd : add video input support (#24269)

* wip * ok: lazy bitmap API * remember to free lazy text * wip * add mtmd_helper_video * support video input on server (base64 input) * add MTMD_VIDEO config * add timestamp * update CLI * cli: allow auto-completion for video * add --video arg * fix build * update docs * rename as suggested
2026-06-26 06:10:19 +00:00 · 2026-06-08 13:40:12 +02:00
parent c2b1518fd4
commit 8f83d6c271
16 changed files with 807 additions and 77 deletions
@@ -235,7 +235,7 @@ struct cli_context {
 };

 // TODO?: Make this reusable, enums, docs
-static const std::array<std::string_view, 7> cmds = {
+static const std::array<std::string_view, 8> cmds = {
    "/audio ",
    "/clear",
    "/exit",
@@ -243,6 +243,7 @@ static const std::array<std::string_view, 7> cmds = {
    "/image ",
    "/read ",
    "/regen",
+    "/video ",
 };

 static std::vector<std::pair<std::string, size_t>> auto_completion_callback(std::string_view line, size_t cursor_byte_pos) {
@@ -457,6 +458,9 @@ int llama_cli(int argc, char ** argv) {
    if (inf.has_inp_audio) {
        console::log("  /audio <file>       add an audio file\n");
    }
+    if (inf.has_inp_video) {
+        console::log("  /video <file>       add a video file\n");
+    }
    console::log("\n");

    // interactive loop
@@ -553,7 +557,8 @@ int llama_cli(int argc, char ** argv) {
            continue;
        } else if (
                (string_starts_with(buffer, "/image ") && inf.has_inp_image) ||
-                (string_starts_with(buffer, "/audio ") && inf.has_inp_audio)) {
+                (string_starts_with(buffer, "/audio ") && inf.has_inp_audio) ||
+                (string_starts_with(buffer, "/video ") && inf.has_inp_video)) {
            // just in case (bad copy-paste for example), we strip all trailing/leading spaces
            std::string fname = string_strip(buffer.substr(7));
            std::string marker = ctx_cli.load_input_file(fname, true);