1
0
Fork 0
mirror of https://github.com/OpenMW/openmw.git synced 2025-03-01 08:09:46 +00:00

Merge branch 'save_me_utf8' into 'master'

Handle non-ASCII characters without triggering an assertion

Closes #6396

See merge request OpenMW/openmw!1375
This commit is contained in:
Alexei Kotov 2021-11-14 12:38:36 +00:00
commit ec63546a37
12 changed files with 67 additions and 45 deletions

View file

@ -75,6 +75,7 @@
Bug #6363: Some scripts in Morrowland fail to work Bug #6363: Some scripts in Morrowland fail to work
Bug #6376: Creatures should be able to use torches Bug #6376: Creatures should be able to use torches
Bug #6386: Artifacts in water reflection due to imprecise screen-space coordinate computation Bug #6386: Artifacts in water reflection due to imprecise screen-space coordinate computation
Bug #6396: Inputting certain Unicode characters triggers an assertion
Bug #6416: Morphs are applied to the wrong target Bug #6416: Morphs are applied to the wrong target
Feature #890: OpenMW-CS: Column filtering Feature #890: OpenMW-CS: Column filtering
Feature #2554: Modifying an object triggers the instances table to scroll to the corresponding record Feature #2554: Modifying an object triggers the instances table to scroll to the corresponding record

View file

@ -2,8 +2,8 @@
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <cstdlib>
#include <components/misc/stringops.hpp>
#include <components/nif/niffile.hpp> #include <components/nif/niffile.hpp>
#include <components/files/constrainedfilestream.hpp> #include <components/files/constrainedfilestream.hpp>
#include <components/vfs/manager.hpp> #include <components/vfs/manager.hpp>
@ -18,18 +18,10 @@ namespace bpo = boost::program_options;
namespace bfs = boost::filesystem; namespace bfs = boost::filesystem;
///See if the file has the named extension ///See if the file has the named extension
bool hasExtension(std::string filename, std::string extensionToFind) bool hasExtension(std::string filename, std::string extensionToFind)
{ {
std::string extension = filename.substr(filename.find_last_of('.')+1); std::string extension = filename.substr(filename.find_last_of('.')+1);
return Misc::StringUtils::ciEqual(extension, extensionToFind);
//Convert strings to lower case for comparison
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
std::transform(extensionToFind.begin(), extensionToFind.end(), extensionToFind.begin(), ::tolower);
if(extension == extensionToFind)
return true;
else
return false;
} }
///See if the file has the "nif" extension. ///See if the file has the "nif" extension.

View file

@ -17,6 +17,19 @@
#include "textnode.hpp" #include "textnode.hpp"
#include "valuenode.hpp" #include "valuenode.hpp"
namespace
{
bool isAlpha(char c)
{
return std::isalpha(static_cast<unsigned char>(c));
}
bool isDigit(char c)
{
return std::isdigit(static_cast<unsigned char>(c));
}
}
namespace CSMFilter namespace CSMFilter
{ {
struct Token struct Token
@ -103,7 +116,7 @@ CSMFilter::Token CSMFilter::Parser::getStringToken()
{ {
char c = mInput[mIndex]; char c = mInput[mIndex];
if (std::isalpha (c) || c==':' || c=='_' || (!string.empty() && std::isdigit (c)) || c=='"' || if (isAlpha(c) || c==':' || c=='_' || (!string.empty() && isDigit(c)) || c=='"' ||
(!string.empty() && string[0]=='"')) (!string.empty() && string[0]=='"'))
string += c; string += c;
else else
@ -150,7 +163,7 @@ CSMFilter::Token CSMFilter::Parser::getNumberToken()
{ {
char c = mInput[mIndex]; char c = mInput[mIndex];
if (std::isdigit (c)) if (isDigit(c))
{ {
string += c; string += c;
hasDigit = true; hasDigit = true;
@ -225,10 +238,10 @@ CSMFilter::Token CSMFilter::Parser::getNextToken()
case '!': ++mIndex; return Token (Token::Type_OneShot); case '!': ++mIndex; return Token (Token::Type_OneShot);
} }
if (c=='"' || c=='_' || std::isalpha (c) || c==':') if (c=='"' || c=='_' || isAlpha(c) || c==':')
return getStringToken(); return getStringToken();
if (c=='-' || c=='.' || std::isdigit (c)) if (c=='-' || c=='.' || isDigit(c))
return getNumberToken(); return getNumberToken();
error(); error();

View file

@ -8,6 +8,7 @@
#include <stdexcept> #include <stdexcept>
#include <components/esm/cellid.hpp> #include <components/esm/cellid.hpp>
#include <components/misc/stringops.hpp>
#include "collectionbase.hpp" #include "collectionbase.hpp"
#include "columnbase.hpp" #include "columnbase.hpp"
@ -354,8 +355,7 @@ CSMWorld::LandTextureIdTable::ImportResults CSMWorld::LandTextureIdTable::import
for (int i = 0; i < idCollection()->getSize(); ++i) for (int i = 0; i < idCollection()->getSize(); ++i)
{ {
auto& record = static_cast<const Record<LandTexture>&>(idCollection()->getRecord(i)); auto& record = static_cast<const Record<LandTexture>&>(idCollection()->getRecord(i));
std::string texture = record.get().mTexture; std::string texture = Misc::StringUtils::lowerCase(record.get().mTexture);
std::transform(texture.begin(), texture.end(), texture.begin(), tolower);
if (record.isModified()) if (record.isModified())
reverseLookupMap.emplace(texture, idCollection()->getId(i)); reverseLookupMap.emplace(texture, idCollection()->getId(i));
} }
@ -376,8 +376,7 @@ CSMWorld::LandTextureIdTable::ImportResults CSMWorld::LandTextureIdTable::import
// Look for a pre-existing record // Look for a pre-existing record
auto& record = static_cast<const Record<LandTexture>&>(idCollection()->getRecord(oldRow)); auto& record = static_cast<const Record<LandTexture>&>(idCollection()->getRecord(oldRow));
std::string texture = record.get().mTexture; std::string texture = Misc::StringUtils::lowerCase(record.get().mTexture);
std::transform(texture.begin(), texture.end(), texture.begin(), tolower);
auto searchIt = reverseLookupMap.find(texture); auto searchIt = reverseLookupMap.find(texture);
if (searchIt != reverseLookupMap.end()) if (searchIt != reverseLookupMap.end())
{ {

View file

@ -42,9 +42,9 @@ namespace MWInput
bool consumed = SDL_IsTextInputActive() && // Little trick to check if key is printable bool consumed = SDL_IsTextInputActive() && // Little trick to check if key is printable
(!(SDLK_SCANCODE_MASK & arg.keysym.sym) && (!(SDLK_SCANCODE_MASK & arg.keysym.sym) &&
(std::isprint(arg.keysym.sym) ||
// Don't trust isprint for symbols outside the extended ASCII range // Don't trust isprint for symbols outside the extended ASCII range
(kc == MyGUI::KeyCode::None && arg.keysym.sym > 0xff))); ((kc == MyGUI::KeyCode::None && arg.keysym.sym > 0xff) ||
(arg.keysym.sym >= 0 && arg.keysym.sym <= 255 && std::isprint(arg.keysym.sym))));
if (kc != MyGUI::KeyCode::None && !mBindingsManager->isDetectingBindingState()) if (kc != MyGUI::KeyCode::None && !mBindingsManager->isDetectingBindingState())
{ {
if (MWBase::Environment::get().getWindowManager()->injectKeyPress(kc, 0, arg.repeat)) if (MWBase::Environment::get().getWindowManager()->injectKeyPress(kc, 0, arg.repeat))

View file

@ -8,6 +8,8 @@
#include <components/esm/esmreader.hpp> #include <components/esm/esmreader.hpp>
#include <components/esm/defs.hpp> #include <components/esm/defs.hpp>
#include <components/misc/utf8stream.hpp>
bool MWState::operator< (const Slot& left, const Slot& right) bool MWState::operator< (const Slot& left, const Slot& right)
{ {
return left.mTimeStamp<right.mTimeStamp; return left.mTimeStamp<right.mTimeStamp;
@ -52,12 +54,14 @@ void MWState::Character::addSlot (const ESM::SavedGame& profile)
std::ostringstream stream; std::ostringstream stream;
// The profile description is user-supplied, so we need to escape the path // The profile description is user-supplied, so we need to escape the path
for (std::string::const_iterator it = profile.mDescription.begin(); it != profile.mDescription.end(); ++it) Utf8Stream description(profile.mDescription);
while(!description.eof())
{ {
if (std::isalnum(*it)) // Ignores multibyte characters and non alphanumeric characters auto c = description.consume();
stream << *it; if(c <= 0x7F && std::isalnum(c)) // Ignore multibyte characters and non alphanumeric characters
stream << static_cast<char>(c);
else else
stream << "_"; stream << '_';
} }
const std::string ext = ".omwsave"; const std::string ext = ".omwsave";

View file

@ -5,6 +5,8 @@
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include <components/misc/utf8stream.hpp>
MWState::CharacterManager::CharacterManager (const boost::filesystem::path& saves, MWState::CharacterManager::CharacterManager (const boost::filesystem::path& saves,
const std::vector<std::string>& contentFiles) const std::vector<std::string>& contentFiles)
: mPath (saves), mCurrent (nullptr), mGame (getFirstGameFile(contentFiles)) : mPath (saves), mCurrent (nullptr), mGame (getFirstGameFile(contentFiles))
@ -57,12 +59,14 @@ MWState::Character* MWState::CharacterManager::createCharacter(const std::string
std::ostringstream stream; std::ostringstream stream;
// The character name is user-supplied, so we need to escape the path // The character name is user-supplied, so we need to escape the path
for (std::string::const_iterator it = name.begin(); it != name.end(); ++it) Utf8Stream nameStream(name);
while(!nameStream.eof())
{ {
if (std::isalnum(*it)) // Ignores multibyte characters and non alphanumeric characters auto c = nameStream.consume();
stream << *it; if(c <= 0x7F && std::isalnum(c)) // Ignore multibyte characters and non alphanumeric characters
stream << static_cast<char>(c);
else else
stream << "_"; stream << '_';
} }
boost::filesystem::path path = mPath / stream.str(); boost::filesystem::path path = mPath / stream.str();

View file

@ -84,7 +84,7 @@ namespace Files
{ {
mNext.push(character); mNext.push(character);
} }
if (!mSeenNonWhitespace && !isspace(character)) if (!mSeenNonWhitespace && !(character >= 0 && character <= 255 && isspace(character)))
mSeenNonWhitespace = true; mSeenNonWhitespace = true;
} }
int retval = mNext.front(); int retval = mNext.front();

View file

@ -172,8 +172,7 @@ namespace Interpreter{
for(unsigned int j = 0; j < globals.size(); j++){ for(unsigned int j = 0; j < globals.size(); j++){
if(globals[j].length() > temp.length()){ // Just in case there's a global with a huuuge name if(globals[j].length() > temp.length()){ // Just in case there's a global with a huuuge name
temp = text.substr(i+1, globals[j].length()); temp = Misc::StringUtils::lowerCase(text.substr(i+1, globals[j].length()));
transform(temp.begin(), temp.end(), temp.begin(), ::tolower);
} }
found = check(temp, globals[j], &i, &start); found = check(temp, globals[j], &i, &start);

View file

@ -30,6 +30,11 @@ namespace LuaUtil
{"POTION", ESM::LuaScriptCfg::sPotion}, {"POTION", ESM::LuaScriptCfg::sPotion},
{"WEAPON", ESM::LuaScriptCfg::sWeapon}, {"WEAPON", ESM::LuaScriptCfg::sWeapon},
}; };
bool isSpace(char c)
{
return std::isspace(static_cast<unsigned char>(c));
}
} }
const std::vector<int> ScriptsConfiguration::sEmpty; const std::vector<int> ScriptsConfiguration::sEmpty;
@ -101,11 +106,11 @@ namespace LuaUtil
if (!line.empty() && line.back() == '\r') if (!line.empty() && line.back() == '\r')
line = line.substr(0, line.size() - 1); line = line.substr(0, line.size() - 1);
while (!line.empty() && std::isspace(line[0])) while (!line.empty() && isSpace(line[0]))
line = line.substr(1); line = line.substr(1);
if (line.empty() || line[0] == '#') // Skip empty lines and comments if (line.empty() || line[0] == '#') // Skip empty lines and comments
continue; continue;
while (!line.empty() && std::isspace(line.back())) while (!line.empty() && isSpace(line.back()))
line = line.substr(0, line.size() - 1); line = line.substr(0, line.size() - 1);
if (!Misc::StringUtils::ciEndsWith(line, ".lua")) if (!Misc::StringUtils::ciEndsWith(line, ".lua"))
@ -118,7 +123,7 @@ namespace LuaUtil
throw std::runtime_error(Misc::StringUtils::format("No flags found in: %s", std::string(line))); throw std::runtime_error(Misc::StringUtils::format("No flags found in: %s", std::string(line)));
std::string_view flagsStr = line.substr(0, semicolonPos); std::string_view flagsStr = line.substr(0, semicolonPos);
std::string_view scriptPath = line.substr(semicolonPos + 1); std::string_view scriptPath = line.substr(semicolonPos + 1);
while (std::isspace(scriptPath[0])) while (isSpace(scriptPath[0]))
scriptPath = scriptPath.substr(1); scriptPath = scriptPath.substr(1);
// Parse flags // Parse flags
@ -126,10 +131,10 @@ namespace LuaUtil
size_t flagsPos = 0; size_t flagsPos = 0;
while (true) while (true)
{ {
while (flagsPos < flagsStr.size() && (std::isspace(flagsStr[flagsPos]) || flagsStr[flagsPos] == ',')) while (flagsPos < flagsStr.size() && (isSpace(flagsStr[flagsPos]) || flagsStr[flagsPos] == ','))
flagsPos++; flagsPos++;
size_t startPos = flagsPos; size_t startPos = flagsPos;
while (flagsPos < flagsStr.size() && !std::isspace(flagsStr[flagsPos]) && flagsStr[flagsPos] != ',') while (flagsPos < flagsStr.size() && !isSpace(flagsStr[flagsPos]) && flagsStr[flagsPos] != ',')
flagsPos++; flagsPos++;
if (startPos == flagsPos) if (startPos == flagsPos)
break; break;

View file

@ -184,17 +184,16 @@ public:
static inline void trim(std::string &s) static inline void trim(std::string &s)
{ {
// left trim const auto notSpace = [](char ch)
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch)
{ {
return !std::isspace(ch); // TODO Do we care about multibyte whitespace?
})); return !std::isspace(static_cast<unsigned char>(ch));
};
// left trim
s.erase(s.begin(), std::find_if(s.begin(), s.end(), notSpace));
// right trim // right trim
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) s.erase(std::find_if(s.rbegin(), s.rend(), notSpace).base(), s.end());
{
return !std::isspace(ch);
}).base(), s.end());
} }
template <class Container> template <class Container>

View file

@ -3,6 +3,7 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <string_view>
#include <tuple> #include <tuple>
class Utf8Stream class Utf8Stream
@ -30,6 +31,11 @@ public:
{ {
} }
Utf8Stream (std::string_view str) :
Utf8Stream (reinterpret_cast<Point>(str.data()), reinterpret_cast<Point>(str.data() + str.size()))
{
}
bool eof () const bool eof () const
{ {
return cur == end; return cur == end;