mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-26 14:20:21 +00:00
mtmd: add batching support for internvl (#24775)
This commit is contained in:
+1
-1
@@ -534,7 +534,7 @@ ggml_tensor * clip_graph::build_vit(
|
||||
ggml_tensor * clip_graph::build_inp() {
|
||||
ggml_tensor * inp_raw = build_inp_raw();
|
||||
ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
|
||||
inp = ggml_reshape_2d(ctx0, inp, n_patches, n_embd);
|
||||
inp = ggml_reshape_3d(ctx0, inp, n_patches, n_embd, n_batch);
|
||||
inp = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
|
||||
if (model.patch_bias) {
|
||||
inp = ggml_add(ctx0, inp, model.patch_bias);
|
||||
|
||||
@@ -8,7 +8,9 @@ ggml_cgraph * clip_graph_internvl::build() {
|
||||
ggml_tensor * inp = build_inp();
|
||||
|
||||
// add CLS token
|
||||
inp = ggml_concat(ctx0, inp, model.class_embedding, 1);
|
||||
ggml_tensor * cls_repeated = ggml_repeat_4d(ctx0, model.class_embedding,
|
||||
model.class_embedding->ne[0], 1, n_batch, 1);
|
||||
inp = ggml_concat(ctx0, inp, cls_repeated, 1);
|
||||
|
||||
// The larger models use a different ViT, which uses RMS norm instead of layer norm
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/13443#issuecomment-2869786188
|
||||
@@ -24,14 +26,15 @@ ggml_cgraph * clip_graph_internvl::build() {
|
||||
nullptr);
|
||||
|
||||
// remove CLS token
|
||||
cur = ggml_view_2d(ctx0, cur,
|
||||
n_embd, n_patches,
|
||||
ggml_row_size(cur->type, n_embd), 0);
|
||||
cur = ggml_view_3d(ctx0, cur,
|
||||
n_embd, n_patches, n_batch,
|
||||
cur->nb[1], cur->nb[2], 0);
|
||||
cur = ggml_cont(ctx0, cur);
|
||||
|
||||
// pixel shuffle
|
||||
{
|
||||
const int scale_factor = model.hparams.n_merge;
|
||||
const int bsz = 1; // batch size, always 1 for now since we don't support batching
|
||||
const int bsz = n_batch;
|
||||
const int height = n_patches_y;
|
||||
const int width = n_patches_x;
|
||||
GGML_ASSERT(scale_factor > 0);
|
||||
@@ -44,9 +47,10 @@ ggml_cgraph * clip_graph_internvl::build() {
|
||||
bsz);
|
||||
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
|
||||
// flatten to 2D
|
||||
cur = ggml_cont_2d(ctx0, cur,
|
||||
cur = ggml_cont_3d(ctx0, cur,
|
||||
n_embd * scale_factor * scale_factor,
|
||||
cur->ne[1] * cur->ne[2]);
|
||||
cur->ne[1] * cur->ne[2],
|
||||
cur->ne[3]);
|
||||
}
|
||||
|
||||
// projector (always using GELU activation)
|
||||
|
||||
@@ -80,6 +80,7 @@ struct clip_graph_minicpmv4_6 : clip_graph {
|
||||
struct clip_graph_internvl : clip_graph {
|
||||
clip_graph_internvl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
bool support_batch() const override { return true; }
|
||||
};
|
||||
|
||||
struct clip_graph_nemotron_v2_vl : clip_graph {
|
||||
|
||||
Reference in New Issue
Block a user