From 2c3c603be294d83038cd5731d6aea503ae9e9095 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 20 May 2021 14:16:44 +0200 Subject: [PATCH 1/2] Sprinkle some const in components/to_utf8 --- components/to_utf8/gen_iconv.cpp | 2 +- components/to_utf8/tables_gen.hpp | 8 ++++---- components/to_utf8/to_utf8.cpp | 8 ++++---- components/to_utf8/to_utf8.hpp | 10 +++++----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/components/to_utf8/gen_iconv.cpp b/components/to_utf8/gen_iconv.cpp index f2d9a42f18..75d83fb1a7 100644 --- a/components/to_utf8/gen_iconv.cpp +++ b/components/to_utf8/gen_iconv.cpp @@ -45,7 +45,7 @@ void writeMissing(bool last) int write_table(const std::string &charset, const std::string &tableName) { // Write table header - std::cout << "static signed char " << tableName << "[] =\n{\n"; + std::cout << "const static signed char " << tableName << "[] =\n{\n"; // Open conversion system iconv_t cd = iconv_open ("UTF-8", charset.c_str()); diff --git a/components/to_utf8/tables_gen.hpp b/components/to_utf8/tables_gen.hpp index 14e66eac17..b7659979eb 100644 --- a/components/to_utf8/tables_gen.hpp +++ b/components/to_utf8/tables_gen.hpp @@ -8,7 +8,7 @@ namespace ToUTF8 /// Central European and Eastern European languages that use Latin script, /// such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, /// Serbian (Latin script), Romanian and Albanian. -static signed char windows_1250[] = +const static signed char windows_1250[] = { 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, @@ -270,7 +270,7 @@ static signed char windows_1250[] = /// Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic /// and other languages -static signed char windows_1251[] = +const static signed char windows_1251[] = { 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, @@ -531,7 +531,7 @@ static signed char windows_1251[] = }; /// Latin alphabet used by English and some other Western languages -static signed char windows_1252[] = +const static signed char windows_1252[] = { 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, @@ -790,7 +790,7 @@ static signed char windows_1252[] = 2, -61, -66, 0, 0, 0, 2, -61, -65, 0, 0, 0 }; -static signed char cp437[] = +const static signed char cp437[] = { 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, diff --git a/components/to_utf8/to_utf8.cpp b/components/to_utf8/to_utf8.cpp index bcb174b7be..708f4b41aa 100644 --- a/components/to_utf8/to_utf8.cpp +++ b/components/to_utf8/to_utf8.cpp @@ -182,7 +182,7 @@ void Utf8Encoder::resize(size_t size) is the case, then the ascii parameter is set to true, and the caller can optimize for this case. */ -size_t Utf8Encoder::getLength(const char* input, bool &ascii) +size_t Utf8Encoder::getLength(const char* input, bool &ascii) const { ascii = true; size_t len = 0; @@ -214,7 +214,7 @@ size_t Utf8Encoder::getLength(const char* input, bool &ascii) // Translate one character 'ch' using the translation array 'arr', and // advance the output pointer accordingly. -void Utf8Encoder::copyFromArray(unsigned char ch, char* &out) +void Utf8Encoder::copyFromArray(unsigned char ch, char* &out) const { // Optimize for ASCII values if (ch < 128) @@ -229,7 +229,7 @@ void Utf8Encoder::copyFromArray(unsigned char ch, char* &out) *(out++) = *(in++); } -size_t Utf8Encoder::getLength2(const char* input, bool &ascii) +size_t Utf8Encoder::getLength2(const char* input, bool &ascii) const { ascii = true; size_t len = 0; @@ -273,7 +273,7 @@ size_t Utf8Encoder::getLength2(const char* input, bool &ascii) return len; } -void Utf8Encoder::copyFromArray2(const char*& chp, char* &out) +void Utf8Encoder::copyFromArray2(const char*& chp, char* &out) const { unsigned char ch = *(chp++); // Optimize for ASCII values diff --git a/components/to_utf8/to_utf8.hpp b/components/to_utf8/to_utf8.hpp index 3f20a51f86..d8c9f09d5d 100644 --- a/components/to_utf8/to_utf8.hpp +++ b/components/to_utf8/to_utf8.hpp @@ -42,13 +42,13 @@ namespace ToUTF8 private: void resize(size_t size); - size_t getLength(const char* input, bool &ascii); - void copyFromArray(unsigned char chp, char* &out); - size_t getLength2(const char* input, bool &ascii); - void copyFromArray2(const char*& chp, char* &out); + size_t getLength(const char* input, bool &ascii) const; + void copyFromArray(unsigned char chp, char* &out) const; + size_t getLength2(const char* input, bool &ascii) const; + void copyFromArray2(const char*& chp, char* &out) const; std::vector mOutput; - signed char* translationArray; + const signed char* translationArray; }; } From 4dca2c04665679d64fe65d7c3e557389f45bff52 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 20 May 2021 14:16:58 +0200 Subject: [PATCH 2/2] Replace a handrolled memcpy with an actual call to memcpy --- components/to_utf8/to_utf8.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/to_utf8/to_utf8.cpp b/components/to_utf8/to_utf8.cpp index 708f4b41aa..f7dc33fcbf 100644 --- a/components/to_utf8/to_utf8.cpp +++ b/components/to_utf8/to_utf8.cpp @@ -225,8 +225,8 @@ void Utf8Encoder::copyFromArray(unsigned char ch, char* &out) const const signed char *in = translationArray + ch*6; int len = *(in++); - for (int i=0; i