196 lines
5.2 KiB
C++
196 lines
5.2 KiB
C++
#include "StdAfx.h"
|
|
#include "SmartStrStr.h"
|
|
|
|
SmartStrStr::SmartStrStr() {
|
|
for (uint32_t walk = 128; walk < 0x10000; ++walk) {
|
|
uint32_t c = Transform(walk);
|
|
if (c != walk) {
|
|
substituions[walk].insert(c);
|
|
}
|
|
}
|
|
for (uint32_t walk = 32; walk < 0x10000; ++walk) {
|
|
auto lo = ToLower(walk);
|
|
if (lo != walk) {
|
|
auto & s = substituions[walk]; s.insert(lo);
|
|
|
|
auto iter = substituions.find(lo);
|
|
if (iter != substituions.end()) {
|
|
s.insert(iter->second.begin(), iter->second.end());
|
|
}
|
|
}
|
|
}
|
|
|
|
InitTwoCharMappings();
|
|
}
|
|
|
|
|
|
const wchar_t * SmartStrStr::matchHereW(const wchar_t * pString, const wchar_t * pUserString) const {
|
|
auto walkData = pString;
|
|
auto walkUser = pUserString;
|
|
for (;; ) {
|
|
if (*walkUser == 0) return walkData;
|
|
|
|
uint32_t cData, cUser;
|
|
size_t dData = pfc::utf16_decode_char(walkData, &cData);
|
|
size_t dUser = pfc::utf16_decode_char(walkUser, &cUser);
|
|
if (dData == 0 || dUser == 0) return nullptr;
|
|
|
|
if (cData != cUser) {
|
|
bool gotMulti = false;
|
|
{
|
|
auto multi = twoCharMappings.find(cData);
|
|
if (multi != twoCharMappings.end()) {
|
|
const char * cDataSubst = multi->second;
|
|
PFC_ASSERT(strlen(cDataSubst) == 2);
|
|
if (matchOneChar(cUser, (uint32_t)cDataSubst[0])) {
|
|
auto walkUser2 = walkUser + dUser;
|
|
uint32_t cUser2;
|
|
auto dUser2 = pfc::utf16_decode_char(walkUser2, &cUser2);
|
|
if (matchOneChar(cUser2, (uint32_t)cDataSubst[1])) {
|
|
gotMulti = true;
|
|
dUser += dUser2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!gotMulti) {
|
|
if (!matchOneChar(cUser, cData)) return nullptr;
|
|
}
|
|
}
|
|
|
|
walkData += dData;
|
|
walkUser += dUser;
|
|
}
|
|
}
|
|
|
|
bool SmartStrStr::equals(const char * pString, const char * pUserString) const {
|
|
auto p = matchHere(pString, pUserString);
|
|
if ( p == nullptr ) return false;
|
|
return *p == 0;
|
|
}
|
|
|
|
const char * SmartStrStr::matchHere(const char * pString, const char * pUserString) const {
|
|
const char * walkData = pString;
|
|
const char * walkUser = pUserString;
|
|
for (;; ) {
|
|
if (*walkUser == 0) return walkData;
|
|
|
|
uint32_t cData, cUser;
|
|
size_t dData = pfc::utf8_decode_char(walkData, cData);
|
|
size_t dUser = pfc::utf8_decode_char(walkUser, cUser);
|
|
if (dData == 0 || dUser == 0) return nullptr;
|
|
|
|
if (cData != cUser) {
|
|
bool gotMulti = false;
|
|
{
|
|
auto multi = twoCharMappings.find(cData);
|
|
if (multi != twoCharMappings.end()) {
|
|
const char * cDataSubst = multi->second;
|
|
PFC_ASSERT(strlen(cDataSubst) == 2);
|
|
if (matchOneChar(cUser, (uint32_t)cDataSubst[0])) {
|
|
auto walkUser2 = walkUser + dUser;
|
|
uint32_t cUser2;
|
|
auto dUser2 = pfc::utf8_decode_char(walkUser2, cUser2);
|
|
if (matchOneChar(cUser2, (uint32_t)cDataSubst[1])) {
|
|
gotMulti = true;
|
|
dUser += dUser2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!gotMulti) {
|
|
if (!matchOneChar(cUser, cData)) return nullptr;
|
|
}
|
|
}
|
|
|
|
walkData += dData;
|
|
walkUser += dUser;
|
|
}
|
|
}
|
|
|
|
const char * SmartStrStr::strStrEnd(const char * pString, const char * pSubString, size_t * outFoundAt) const {
|
|
size_t walk = 0;
|
|
for (;; ) {
|
|
if (pString[walk] == 0) return nullptr;
|
|
auto end = matchHere(pString+walk, pSubString);
|
|
if (end != nullptr) {
|
|
if ( outFoundAt != nullptr ) * outFoundAt = walk;
|
|
return end;
|
|
}
|
|
|
|
size_t delta = pfc::utf8_char_len( pString + walk );
|
|
if ( delta == 0 ) return nullptr;
|
|
walk += delta;
|
|
}
|
|
}
|
|
|
|
const wchar_t * SmartStrStr::strStrEndW(const wchar_t * pString, const wchar_t * pSubString, size_t * outFoundAt) const {
|
|
size_t walk = 0;
|
|
for (;; ) {
|
|
if (pString[walk] == 0) return nullptr;
|
|
auto end = matchHereW(pString + walk, pSubString);
|
|
if (end != nullptr) {
|
|
if (outFoundAt != nullptr) * outFoundAt = walk;
|
|
return end;
|
|
}
|
|
|
|
uint32_t dontcare;
|
|
size_t delta = pfc::utf16_decode_char(pString + walk, & dontcare);
|
|
if (delta == 0) return nullptr;
|
|
walk += delta;
|
|
}
|
|
}
|
|
|
|
bool SmartStrStr::matchOneChar(uint32_t cInput, uint32_t cData) const {
|
|
if (cInput == cData) return true;
|
|
auto iter = substituions.find(cData);
|
|
if (iter == substituions.end()) return false;
|
|
auto & s = iter->second;
|
|
return s.find(cInput) != s.end();
|
|
}
|
|
|
|
uint32_t SmartStrStr::Transform(uint32_t c) {
|
|
wchar_t wide[2] = {}; char out[4] = {};
|
|
pfc::utf16_encode_char(c, wide);
|
|
BOOL fail = FALSE;
|
|
if (WideCharToMultiByte(pfc::stringcvt::codepage_ascii, 0, wide, 2, out, 4, "?", &fail) > 0) {
|
|
if (!fail) {
|
|
if (out[0] > 0 && out[1] == 0) {
|
|
c = out[0];
|
|
}
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
uint32_t SmartStrStr::ToLower(uint32_t c) {
|
|
return pfc::charLower(c);
|
|
}
|
|
|
|
void SmartStrStr::ImportTwoCharMappings(const wchar_t * list, const char * replacement) {
|
|
PFC_ASSERT(strlen(replacement) == 2);
|
|
for (const wchar_t* ptr = list; ; ) {
|
|
wchar_t c = *ptr++;
|
|
if (c == 0) break;
|
|
twoCharMappings[(uint32_t)c] = replacement;
|
|
}
|
|
}
|
|
|
|
void SmartStrStr::InitTwoCharMappings() {
|
|
ImportTwoCharMappings(L"ÆǢǼ", "AE");
|
|
ImportTwoCharMappings(L"æǣǽ", "ae");
|
|
ImportTwoCharMappings(L"Œ", "OE");
|
|
ImportTwoCharMappings(L"œɶ", "oe");
|
|
ImportTwoCharMappings(L"DŽDZ", "DZ");
|
|
ImportTwoCharMappings(L"dždzʣʥ", "dz");
|
|
ImportTwoCharMappings(L"ß", "ss");
|
|
ImportTwoCharMappings(L"LJ", "LJ");
|
|
ImportTwoCharMappings(L"Lj", "Lj");
|
|
ImportTwoCharMappings(L"lj", "lj");
|
|
ImportTwoCharMappings(L"NJ", "NJ");
|
|
ImportTwoCharMappings(L"Nj", "Nj");
|
|
ImportTwoCharMappings(L"nj", "nj");
|
|
ImportTwoCharMappings(L"IJ", "IJ");
|
|
ImportTwoCharMappings(L"ij", "ij");
|
|
}
|