mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-26 06:10:19 +00:00
e7bcf1c3a8
* Deduplicate imatrix loading code * Add back LLAMA_TRACE, early exit on quantize missing metadata
27 lines
765 B
C++
27 lines
765 B
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <map>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
inline constexpr const char * LLM_KV_IMATRIX_DATASETS = "imatrix.datasets";
|
|
inline constexpr const char * LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
|
|
inline constexpr const char * LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";
|
|
|
|
struct common_imatrix_entry {
|
|
std::vector<float> sums;
|
|
std::vector<int64_t> counts;
|
|
};
|
|
|
|
struct common_imatrix {
|
|
std::map<std::string, common_imatrix_entry> entries;
|
|
std::vector<std::string> datasets;
|
|
int32_t chunk_count = 0;
|
|
int32_t chunk_size = 0;
|
|
bool is_legacy = false;
|
|
bool has_metadata = false;
|
|
};
|
|
|
|
bool common_imatrix_load(const std::string & fname, common_imatrix & imatrix);
|