no-wstring.patch - mozsearch

firefox-main/third_party/llama.cpp/no-wstring.patch

Enable keyboard shortcuts

Source code

File a bug in Core :: Machine Learning

Revision control

Copy as Markdown

Other Tools

diff --git a/src/unicode.cpp b/src/unicode.cpp

index 6b3b2dbe7d..cc7030d1dd 100644

--- a/src/unicode.cpp

+++ b/src/unicode.cpp

@@ -499,6 +499,7 @@

     return bpe_offsets;

+#if 0

 // use std::wregex to split the text

 static std::vector<size_t> unicode_regex_split_stl(const std::wstring & wtext, const std::wstring & regex_expr, const std::vector<size_t> & offsets) {

     std::wregex expr(regex_expr);

@@ -528,6 +529,7 @@

     return bpe_offsets;

+#endif

 // use std::regex to split the text

 static std::vector<size_t> unicode_regex_split_stl(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {

@@ -818,20 +820,22 @@

                 //printf("regex_expr_collapsed: %s\n", regex_expr_collapsed.c_str());

                 bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);

             } else {

-                // no unicode category used, we can use std::wregex directly

-                const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);

+                fprintf(stderr, "Only use utf-8");

+                std::abort();

+                // // no unicode category used, we can use std::wregex directly

+                // const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);

-                // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback

-                std::wstring wtext(cpts.begin(), cpts.end());

-                for (size_t i = 0; i < wtext.size(); ++i) {

-                    if (wtext[i] > 0x7F && unicode_cpt_flags_from_cpt(wtext[i]).is_whitespace) {

-                        wtext[i] = 0x0B;

-                    }

-                }

+                // // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback

+                // std::wstring wtext(cpts.begin(), cpts.end());

+                // for (size_t i = 0; i < wtext.size(); ++i) {

+                //     if (wtext[i] > 0x7F && unicode_cpt_flags_from_cpt(wtext[i]).is_whitespace) {

+                //         wtext[i] = 0x0B;

+                //     }

+                // }

-                //printf("text: %s\n", text.c_str());

-                //printf("regex_expr: %s\n", regex_expr.c_str());

-                bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);

+                // //printf("text: %s\n", text.c_str());

+                // //printf("regex_expr: %s\n", regex_expr.c_str());

+                // bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);

         } catch (std::regex_error & e) {

             fprintf(stderr, "Failed to process regex: '%s'\n", regex_expr.c_str());