diff --git a/common/arg.cpp b/common/arg.cpp index 6fd366d33b..8f4f7d0763 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -17,6 +17,7 @@ # define NOMINMAX #endif #include +#include #endif #define JSON_ASSERT GGML_ASSERT @@ -893,7 +894,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map buf; + std::vector ptrs; +}; + +static utf8_argv make_utf8_argv() { + utf8_argv out; + int wargc = 0; + LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc); + if (!wargv) return out; + + out.buf.reserve(wargc); + for (int i = 0; i < wargc; ++i) { + int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr); + if (n <= 0) { out.buf.emplace_back(); continue; } + auto& s = out.buf.emplace_back(); + s.resize(static_cast(n - 1)); + (void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr); + } + LocalFree(wargv); + + out.ptrs.reserve(out.buf.size() + 1); + for (auto& s : out.buf) out.ptrs.push_back(s.data()); + out.ptrs.push_back(nullptr); + return out; +} +#endif + bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) { +#ifdef _WIN32 + auto utf8 = make_utf8_argv(); + if (!utf8.ptrs.empty()) { + argc = static_cast(utf8.buf.size()); + argv = utf8.ptrs.data(); + } +#endif + auto ctx_arg = common_params_parser_init(params, ex, print_usage); const common_params params_org = ctx_arg.params; // the example can modify the default params diff --git a/common/common.cpp b/common/common.cpp index f3f114f682..a14e7bbed9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1074,6 +1074,18 @@ std::vector fs_list(const std::string & path, bool include_dir return files; } +std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) { +#ifdef _WIN32 + int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0); + if (!wlen) { return std::ifstream(); } + std::vector wfname(wlen); + (void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen); + return std::ifstream(wfname.data(), mode); +#else + return std::ifstream(fname, mode); +#endif +} + // // TTY utils // diff --git a/common/common.h b/common/common.h index 44c605189c..254454dcb1 100644 --- a/common/common.h +++ b/common/common.h @@ -842,6 +842,9 @@ struct common_file_info { }; std::vector fs_list(const std::string & path, bool include_directories); +// fs open, also handle UTF8 on Windows +std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode); + // // TTY utils // diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index b43016c87d..0f682fd185 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -600,18 +600,15 @@ FILE * ggml_fopen(const char * fname, const char * mode) { // convert fname (UTF-8) wchar_t * wfname = ggml_mbstowcs(fname); if (wfname) { - // convert mode (ANSI) - wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t)); - wchar_t * wmode_p = wmode; - do { - *wmode_p++ = (wchar_t)*mode; - } while (*mode++); - - // open file - file = _wfopen(wfname, wmode); + // convert mode (UTF-8) + wchar_t * wmode = ggml_mbstowcs(mode); + if (wmode) { + // open file + file = _wfopen(wfname, wmode); + GGML_FREE(wmode); + } GGML_FREE(wfname); - GGML_FREE(wmode); } return file; diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp index c03894b4b1..8b7b58693f 100644 --- a/tools/cli/cli.cpp +++ b/tools/cli/cli.cpp @@ -202,7 +202,7 @@ struct cli_context { // TODO: support remote files in the future (http, https, etc) std::string load_input_file(const std::string & fname, bool is_media) { - std::ifstream file(fname, std::ios::binary); + std::ifstream file = fs_open_ifstream(fname, std::ios::binary); if (!file) { return ""; } diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index f232b68e5a..e7b5301445 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -13,6 +13,14 @@ #include #include #include +#include + +#ifdef _WIN32 +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#endif // Internal header for clip.cpp @@ -661,6 +669,22 @@ struct clip_image_f32_batch { // common utils // +#ifdef _WIN32 +static std::ifstream open_ifstream_binary(const std::string & fname) { + int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0); + if (!wlen) { + throw std::runtime_error("failed to convert filename to UTF-16: " + fname); + } + std::vector wfname(wlen); + (void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen); + return std::ifstream(wfname.data(), std::ios::binary); +} +#else +static std::ifstream open_ifstream_binary(const std::string & fname) { + return std::ifstream(fname, std::ios::binary); +} +#endif + static std::string string_format(const char * fmt, ...) { va_list ap; va_list ap2; diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 10840a851f..c713703e01 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -1752,7 +1752,7 @@ struct clip_model_loader { std::map tensor_offset; std::vector tensors_to_load; - auto fin = std::ifstream(fname, std::ios::binary); + auto fin = open_ifstream_binary(fname); if (!fin) { throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str())); } diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp index 0ad000ef01..8704ea79d7 100644 --- a/tools/mtmd/mtmd-cli.cpp +++ b/tools/mtmd/mtmd-cli.cpp @@ -396,6 +396,9 @@ int main(int argc, char ** argv) { int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict; + console::init(params.simple_io, params.use_color); + atexit([]() { console::cleanup(); }); + // Ctrl+C handling { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) diff --git a/tools/mtmd/mtmd-helper.cpp b/tools/mtmd/mtmd-helper.cpp index b5c4089232..3c73db4431 100644 --- a/tools/mtmd/mtmd-helper.cpp +++ b/tools/mtmd/mtmd-helper.cpp @@ -582,13 +582,29 @@ mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, } mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) { - std::vector buf; +#ifdef _WIN32 + int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0); + if (!wlen) { + LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname); + return {nullptr, nullptr}; + } + std::vector wfname(wlen); + wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wlen); + if (!wlen) { + LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname); + return {nullptr, nullptr}; + } + FILE * f = _wfopen(wfname.data(), L"rb"); +#else FILE * f = fopen(fname, "rb"); +#endif if (!f) { LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno)); return {nullptr, nullptr}; } + std::vector buf; + fseek(f, 0, SEEK_END); long file_size = ftell(f); fseek(f, 0, SEEK_SET);