1
0
Fork 0
mirror of https://github.com/OpenMW/openmw.git synced 2025-03-01 09:09:42 +00:00

Merge branch 'save_me_utf8' into 'master'

Handle non-ASCII characters without triggering an assertion

Closes #6396

See merge request OpenMW/openmw!1375
This commit is contained in:
Alexei Kotov 2021-11-14 12:38:36 +00:00
commit ec63546a37
12 changed files with 67 additions and 45 deletions

View file

@ -75,6 +75,7 @@
Bug #6363: Some scripts in Morrowland fail to work
Bug #6376: Creatures should be able to use torches
Bug #6386: Artifacts in water reflection due to imprecise screen-space coordinate computation
Bug #6396: Inputting certain Unicode characters triggers an assertion
Bug #6416: Morphs are applied to the wrong target
Feature #890: OpenMW-CS: Column filtering
Feature #2554: Modifying an object triggers the instances table to scroll to the corresponding record

View file

@ -2,8 +2,8 @@
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <components/misc/stringops.hpp>
#include <components/nif/niffile.hpp>
#include <components/files/constrainedfilestream.hpp>
#include <components/vfs/manager.hpp>
@ -21,15 +21,7 @@ namespace bfs = boost::filesystem;
bool hasExtension(std::string filename, std::string extensionToFind)
{
std::string extension = filename.substr(filename.find_last_of('.')+1);
//Convert strings to lower case for comparison
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
std::transform(extensionToFind.begin(), extensionToFind.end(), extensionToFind.begin(), ::tolower);
if(extension == extensionToFind)
return true;
else
return false;
return Misc::StringUtils::ciEqual(extension, extensionToFind);
}
///See if the file has the "nif" extension.

View file

@ -17,6 +17,19 @@
#include "textnode.hpp"
#include "valuenode.hpp"
namespace
{
bool isAlpha(char c)
{
return std::isalpha(static_cast<unsigned char>(c));
}
bool isDigit(char c)
{
return std::isdigit(static_cast<unsigned char>(c));
}
}
namespace CSMFilter
{
struct Token
@ -103,7 +116,7 @@ CSMFilter::Token CSMFilter::Parser::getStringToken()
{
char c = mInput[mIndex];
if (std::isalpha (c) || c==':' || c=='_' || (!string.empty() && std::isdigit (c)) || c=='"' ||
if (isAlpha(c) || c==':' || c=='_' || (!string.empty() && isDigit(c)) || c=='"' ||
(!string.empty() && string[0]=='"'))
string += c;
else
@ -150,7 +163,7 @@ CSMFilter::Token CSMFilter::Parser::getNumberToken()
{
char c = mInput[mIndex];
if (std::isdigit (c))
if (isDigit(c))
{
string += c;
hasDigit = true;
@ -225,10 +238,10 @@ CSMFilter::Token CSMFilter::Parser::getNextToken()
case '!': ++mIndex; return Token (Token::Type_OneShot);
}
if (c=='"' || c=='_' || std::isalpha (c) || c==':')
if (c=='"' || c=='_' || isAlpha(c) || c==':')
return getStringToken();
if (c=='-' || c=='.' || std::isdigit (c))
if (c=='-' || c=='.' || isDigit(c))
return getNumberToken();
error();

View file

@ -8,6 +8,7 @@
#include <stdexcept>
#include <components/esm/cellid.hpp>
#include <components/misc/stringops.hpp>
#include "collectionbase.hpp"
#include "columnbase.hpp"
@ -354,8 +355,7 @@ CSMWorld::LandTextureIdTable::ImportResults CSMWorld::LandTextureIdTable::import
for (int i = 0; i < idCollection()->getSize(); ++i)
{
auto& record = static_cast<const Record<LandTexture>&>(idCollection()->getRecord(i));
std::string texture = record.get().mTexture;
std::transform(texture.begin(), texture.end(), texture.begin(), tolower);
std::string texture = Misc::StringUtils::lowerCase(record.get().mTexture);
if (record.isModified())
reverseLookupMap.emplace(texture, idCollection()->getId(i));
}
@ -376,8 +376,7 @@ CSMWorld::LandTextureIdTable::ImportResults CSMWorld::LandTextureIdTable::import
// Look for a pre-existing record
auto& record = static_cast<const Record<LandTexture>&>(idCollection()->getRecord(oldRow));
std::string texture = record.get().mTexture;
std::transform(texture.begin(), texture.end(), texture.begin(), tolower);
std::string texture = Misc::StringUtils::lowerCase(record.get().mTexture);
auto searchIt = reverseLookupMap.find(texture);
if (searchIt != reverseLookupMap.end())
{

View file

@ -42,9 +42,9 @@ namespace MWInput
bool consumed = SDL_IsTextInputActive() && // Little trick to check if key is printable
(!(SDLK_SCANCODE_MASK & arg.keysym.sym) &&
(std::isprint(arg.keysym.sym) ||
// Don't trust isprint for symbols outside the extended ASCII range
(kc == MyGUI::KeyCode::None && arg.keysym.sym > 0xff)));
((kc == MyGUI::KeyCode::None && arg.keysym.sym > 0xff) ||
(arg.keysym.sym >= 0 && arg.keysym.sym <= 255 && std::isprint(arg.keysym.sym))));
if (kc != MyGUI::KeyCode::None && !mBindingsManager->isDetectingBindingState())
{
if (MWBase::Environment::get().getWindowManager()->injectKeyPress(kc, 0, arg.repeat))

View file

@ -8,6 +8,8 @@
#include <components/esm/esmreader.hpp>
#include <components/esm/defs.hpp>
#include <components/misc/utf8stream.hpp>
bool MWState::operator< (const Slot& left, const Slot& right)
{
return left.mTimeStamp<right.mTimeStamp;
@ -52,12 +54,14 @@ void MWState::Character::addSlot (const ESM::SavedGame& profile)
std::ostringstream stream;
// The profile description is user-supplied, so we need to escape the path
for (std::string::const_iterator it = profile.mDescription.begin(); it != profile.mDescription.end(); ++it)
Utf8Stream description(profile.mDescription);
while(!description.eof())
{
if (std::isalnum(*it)) // Ignores multibyte characters and non alphanumeric characters
stream << *it;
auto c = description.consume();
if(c <= 0x7F && std::isalnum(c)) // Ignore multibyte characters and non alphanumeric characters
stream << static_cast<char>(c);
else
stream << "_";
stream << '_';
}
const std::string ext = ".omwsave";

View file

@ -5,6 +5,8 @@
#include <boost/filesystem.hpp>
#include <components/misc/utf8stream.hpp>
MWState::CharacterManager::CharacterManager (const boost::filesystem::path& saves,
const std::vector<std::string>& contentFiles)
: mPath (saves), mCurrent (nullptr), mGame (getFirstGameFile(contentFiles))
@ -57,12 +59,14 @@ MWState::Character* MWState::CharacterManager::createCharacter(const std::string
std::ostringstream stream;
// The character name is user-supplied, so we need to escape the path
for (std::string::const_iterator it = name.begin(); it != name.end(); ++it)
Utf8Stream nameStream(name);
while(!nameStream.eof())
{
if (std::isalnum(*it)) // Ignores multibyte characters and non alphanumeric characters
stream << *it;
auto c = nameStream.consume();
if(c <= 0x7F && std::isalnum(c)) // Ignore multibyte characters and non alphanumeric characters
stream << static_cast<char>(c);
else
stream << "_";
stream << '_';
}
boost::filesystem::path path = mPath / stream.str();

View file

@ -84,7 +84,7 @@ namespace Files
{
mNext.push(character);
}
if (!mSeenNonWhitespace && !isspace(character))
if (!mSeenNonWhitespace && !(character >= 0 && character <= 255 && isspace(character)))
mSeenNonWhitespace = true;
}
int retval = mNext.front();

View file

@ -172,8 +172,7 @@ namespace Interpreter{
for(unsigned int j = 0; j < globals.size(); j++){
if(globals[j].length() > temp.length()){ // Just in case there's a global with a huuuge name
temp = text.substr(i+1, globals[j].length());
transform(temp.begin(), temp.end(), temp.begin(), ::tolower);
temp = Misc::StringUtils::lowerCase(text.substr(i+1, globals[j].length()));
}
found = check(temp, globals[j], &i, &start);

View file

@ -30,6 +30,11 @@ namespace LuaUtil
{"POTION", ESM::LuaScriptCfg::sPotion},
{"WEAPON", ESM::LuaScriptCfg::sWeapon},
};
bool isSpace(char c)
{
return std::isspace(static_cast<unsigned char>(c));
}
}
const std::vector<int> ScriptsConfiguration::sEmpty;
@ -101,11 +106,11 @@ namespace LuaUtil
if (!line.empty() && line.back() == '\r')
line = line.substr(0, line.size() - 1);
while (!line.empty() && std::isspace(line[0]))
while (!line.empty() && isSpace(line[0]))
line = line.substr(1);
if (line.empty() || line[0] == '#') // Skip empty lines and comments
continue;
while (!line.empty() && std::isspace(line.back()))
while (!line.empty() && isSpace(line.back()))
line = line.substr(0, line.size() - 1);
if (!Misc::StringUtils::ciEndsWith(line, ".lua"))
@ -118,7 +123,7 @@ namespace LuaUtil
throw std::runtime_error(Misc::StringUtils::format("No flags found in: %s", std::string(line)));
std::string_view flagsStr = line.substr(0, semicolonPos);
std::string_view scriptPath = line.substr(semicolonPos + 1);
while (std::isspace(scriptPath[0]))
while (isSpace(scriptPath[0]))
scriptPath = scriptPath.substr(1);
// Parse flags
@ -126,10 +131,10 @@ namespace LuaUtil
size_t flagsPos = 0;
while (true)
{
while (flagsPos < flagsStr.size() && (std::isspace(flagsStr[flagsPos]) || flagsStr[flagsPos] == ','))
while (flagsPos < flagsStr.size() && (isSpace(flagsStr[flagsPos]) || flagsStr[flagsPos] == ','))
flagsPos++;
size_t startPos = flagsPos;
while (flagsPos < flagsStr.size() && !std::isspace(flagsStr[flagsPos]) && flagsStr[flagsPos] != ',')
while (flagsPos < flagsStr.size() && !isSpace(flagsStr[flagsPos]) && flagsStr[flagsPos] != ',')
flagsPos++;
if (startPos == flagsPos)
break;

View file

@ -184,17 +184,16 @@ public:
static inline void trim(std::string &s)
{
// left trim
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch)
const auto notSpace = [](char ch)
{
return !std::isspace(ch);
}));
// TODO Do we care about multibyte whitespace?
return !std::isspace(static_cast<unsigned char>(ch));
};
// left trim
s.erase(s.begin(), std::find_if(s.begin(), s.end(), notSpace));
// right trim
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch)
{
return !std::isspace(ch);
}).base(), s.end());
s.erase(std::find_if(s.rbegin(), s.rend(), notSpace).base(), s.end());
}
template <class Container>

View file

@ -3,6 +3,7 @@
#include <cstring>
#include <string>
#include <string_view>
#include <tuple>
class Utf8Stream
@ -30,6 +31,11 @@ public:
{
}
Utf8Stream (std::string_view str) :
Utf8Stream (reinterpret_cast<Point>(str.data()), reinterpret_cast<Point>(str.data() + str.size()))
{
}
bool eof () const
{
return cur == end;