rm debugging printf

This commit is contained in:
Xuan Son Nguyen
2026-06-18 16:41:12 +02:00
parent 9158400c42
commit 4ea849efc7
-23
View File
@@ -256,9 +256,6 @@ ggml_cgraph * clip_graph_deepseekocr::build() {
// note: we expect either a batch of rows or a batch of overviews, but not a mix of both
printf("[DSOCR] inp_raw=[%lld,%lld,%lld,%lld] is_overview=%d img.nx=%d img.ny=%d\n",
inp_raw->ne[0], inp_raw->ne[1], inp_raw->ne[2], inp_raw->ne[3], (int)is_overview, img.nx(), img.ny());
if (!is_overview) {
// handle the case where we have a batch of rows
// sanity check
@@ -274,26 +271,20 @@ ggml_cgraph * clip_graph_deepseekocr::build() {
GGML_ASSERT(img.ny() >= img.nx());
GGML_ASSERT(img.ny() % img.nx() == 0);
n_tiles_per_row = img.ny() / img.nx();
printf("[DSOCR] n_tiles_per_row=%d\n", n_tiles_per_row);
// input shape: [tile_size, tile_size * n_tiles_per_row, 3]
// we want to reshape it to [tile_size, tile_size, 3, n_tiles_per_row]
inp_raw = ggml_reshape_4d(ctx0, inp_raw, img.nx(), img.nx(), n_tiles_per_row, 3);
printf("[DSOCR] inp_raw after reshape_4d=[%lld,%lld,%lld,%lld]\n", inp_raw->ne[0], inp_raw->ne[1], inp_raw->ne[2], inp_raw->ne[3]);
inp_raw = ggml_cont(ctx0, ggml_permute(ctx0, inp_raw, 0, 1, 3, 2));
printf("[DSOCR] inp_raw after permute=[%lld,%lld,%lld,%lld]\n", inp_raw->ne[0], inp_raw->ne[1], inp_raw->ne[2], inp_raw->ne[3]);
}
ggml_tensor * sam_out = build_sam(inp_raw);
printf("[DSOCR] sam_out=[%lld,%lld,%lld,%lld]\n", sam_out->ne[0], sam_out->ne[1], sam_out->ne[2], sam_out->ne[3]);
if (!is_overview) {
n_batch = n_tiles_per_row;
}
printf("[DSOCR] n_batch=%d\n", n_batch);
const int clip_n_patches = sam_out->ne[0] * sam_out->ne[1];
printf("[DSOCR] clip_n_patches=%d\n", clip_n_patches);
ggml_tensor * clip_out;
// Building DS-OCR CLIP
@@ -302,11 +293,8 @@ ggml_cgraph * clip_graph_deepseekocr::build() {
// sam_out: [patch_h, patch_w, n_embd, n_batch]
// -> [n_embd, clip_n_patches, n_batch]
printf("[DSOCR] CLIP inp: reshape_3d(%d, %lld, %lld) from sam_out\n", clip_n_patches, sam_out->ne[2], sam_out->ne[3]);
inp = ggml_reshape_3d(ctx0, sam_out, clip_n_patches, sam_out->ne[2], sam_out->ne[3]);
printf("[DSOCR] CLIP inp after reshape_3d=[%lld,%lld,%lld]\n", inp->ne[0], inp->ne[1], inp->ne[2]);
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3));
printf("[DSOCR] CLIP inp after permute=[%lld,%lld,%lld,%lld]\n", inp->ne[0], inp->ne[1], inp->ne[2], inp->ne[3]);
ggml_tensor * new_pos_embd = model.position_embeddings;
@@ -349,10 +337,7 @@ ggml_cgraph * clip_graph_deepseekocr::build() {
// sam_out: [patch_h, patch_w, n_embd, n_batch]
// -> [n_embd, clip_n_patches, n_batch]
printf("[DSOCR] sam_out before permute=[%lld,%lld,%lld,%lld]\n", sam_out->ne[0], sam_out->ne[1], sam_out->ne[2], sam_out->ne[3]);
sam_out = ggml_cont(ctx0, ggml_permute(ctx0, sam_out, 1, 2, 0, 3));
printf("[DSOCR] sam_out after permute=[%lld,%lld,%lld,%lld]\n", sam_out->ne[0], sam_out->ne[1], sam_out->ne[2], sam_out->ne[3]);
printf("[DSOCR] reshape_3d(%lld, %d, %d)\n", sam_out->ne[0], clip_n_patches, n_batch);
sam_out = ggml_reshape_3d(ctx0, sam_out, sam_out->ne[0], clip_n_patches, n_batch);
// clip_out: [n_embd, n_pos, n_batch] where n_pos = clip_n_patches + 1 (CLS)
@@ -362,9 +347,7 @@ ggml_cgraph * clip_graph_deepseekocr::build() {
ggml_tensor * cur;
cur = ggml_concat(ctx0, clip_out, sam_out, 0);
printf("[DSOCR] after concat: cur=[%lld,%lld,%lld,%lld]\n", cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
cur = ggml_mul_mat(ctx0, model.mm_fc_w, cur);
printf("[DSOCR] after mul_mat: cur=[%lld,%lld,%lld,%lld]\n", cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
cur = ggml_add(ctx0, cur, model.mm_fc_b);
if (is_overview) {
@@ -382,31 +365,25 @@ ggml_cgraph * clip_graph_deepseekocr::build() {
const int grid_x = static_cast<int>(std::sqrt(static_cast<float>(clip_n_patches)));
const int grid_y = grid_x;
const auto n_dim = cur->ne[0];
printf("[DSOCR] grid_x=%d grid_y=%d n_dim=%lld n_batch=%d\n", grid_x, grid_y, n_dim, n_batch);
// (n_dim, clip_n_patches, n_batch) -> (n_dim, grid_x, grid_y, n_batch)
cur = ggml_reshape_4d(ctx0, cur, n_dim, grid_x, grid_y, n_batch);
printf("[DSOCR] after reshape_4d: cur=[%lld,%lld,%lld,%lld]\n", cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
// tiles: re-order from A.row0 A.row1 B.row0 B.row1 ...
// to A.row0 B.row0 A.row1 B.row1 ...
// then add nl: A.row0 B.row0 [nl] A.row1 B.row1 [nl] ...
// interleave tiles: (n_dim, grid_x, grid_y, n_batch) -> (n_dim, grid_x, n_batch, grid_y)
cur = ggml_cont(ctx0, ggml_permute(ctx0, cur, 0, 1, 3, 2));
printf("[DSOCR] after permute: cur=[%lld,%lld,%lld,%lld]\n", cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
// merge: (n_dim, grid_x, n_batch, grid_y) -> (n_dim, grid_x*n_batch, grid_y, 1)
cur = ggml_reshape_4d(ctx0, cur, n_dim, grid_x * n_batch, grid_y, 1);
printf("[DSOCR] after merge reshape: cur=[%lld,%lld,%lld,%lld]\n", cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
// append newline per row: (n_dim, grid_x*n_batch+1, grid_y, 1)
ggml_tensor * imgnl = ggml_repeat_4d(ctx0, model.image_newline, n_dim, 1, grid_y, 1);
cur = ggml_concat(ctx0, cur, imgnl, 1);
printf("[DSOCR] after append nl: cur=[%lld,%lld,%lld,%lld]\n", cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
// flatten: (n_dim, (grid_x*n_batch+1)*grid_y)
cur = ggml_reshape_2d(ctx0, cur, n_dim, (grid_x * n_batch + 1) * grid_y);
printf("[DSOCR] after flatten: cur=[%lld,%lld]\n", cur->ne[0], cur->ne[1]);
}
cb(cur, "dsocr_output", -1);