From d6783f8e2c94bd76a3f9153d1a474bd608547792 Mon Sep 17 00:00:00 2001 From: tildearrow Date: Sun, 9 Jun 2024 03:31:16 -0500 Subject: [PATCH] Momo: string hashing using SipHash --- src/main.cpp | 17 +++-- src/momo/halfsiphash.c | 142 +++++++++++++++++++++++++++++++++++++++++ src/momo/momo.c | 10 ++- 3 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 src/momo/halfsiphash.c diff --git a/src/main.cpp b/src/main.cpp index 13df48038..4348901af 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -49,11 +49,9 @@ struct sigaction termsa; #endif #ifdef HAVE_MOMO -#define TA_SETLOCALE momo_setlocale #define TA_BINDTEXTDOMAIN momo_bindtextdomain #define TA_TEXTDOMAIN momo_textdomain #else -#define TA_SETLOCALE setlocale #define TA_BINDTEXTDOMAIN bindtextdomain #define TA_TEXTDOMAIN textdomain #endif @@ -512,16 +510,27 @@ int main(int argc, char** argv) { #ifdef HAVE_LOCALE String reqLocale=e.getConfString("locale",""); const char* localeRet=NULL; - if ((localeRet=TA_SETLOCALE(LC_CTYPE,reqLocale.c_str()))==NULL) { + if ((localeRet=setlocale(LC_CTYPE,reqLocale.c_str()))==NULL) { logE("could not set locale (CTYPE)!"); } else { logV("locale: %s",localeRet); } - if ((localeRet=TA_SETLOCALE(LC_MESSAGES,reqLocale.c_str()))==NULL) { + if ((localeRet=setlocale(LC_MESSAGES,reqLocale.c_str()))==NULL) { logE("could not set locale (MESSAGES)!"); +#ifdef HAVE_MOMO + if (momo_setlocale(LC_MESSAGES,reqLocale.c_str())==NULL) { + logV("Momo: could not set locale!"); + } +#endif } else { logV("locale: %s",localeRet); +#ifdef HAVE_MOMO + if (momo_setlocale(LC_MESSAGES,localeRet)==NULL) { + logV("Momo: could not set locale!"); + } +#endif } + if ((localeRet=TA_BINDTEXTDOMAIN("furnace","locale"))==NULL) { if ((localeRet=TA_BINDTEXTDOMAIN("furnace","../po/locale"))==NULL) { logE("could not bind text domain!"); diff --git a/src/momo/halfsiphash.c b/src/momo/halfsiphash.c new file mode 100644 index 000000000..202bdfd93 --- /dev/null +++ b/src/momo/halfsiphash.c @@ -0,0 +1,142 @@ + +/* + SipHash reference C implementation + + Copyright (c) 2016 Jean-Philippe Aumasson + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with + this software. If not, see + . + */ +// modified for simplicity +#include +#include + +/* default: SipHash-2-4 */ +#ifndef cROUNDS +#define cROUNDS 2 +#endif +#ifndef dROUNDS +#define dROUNDS 4 +#endif + +#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b)))) + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U8TO32_LE(p) \ + (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) + +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 5); \ + v1 ^= v0; \ + v0 = ROTL(v0, 16); \ + v2 += v3; \ + v3 = ROTL(v3, 8); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 7); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v2; \ + v2 = ROTL(v2, 16); \ + } while (0) + + +#ifdef DEBUG_SIPHASH +#include + +#define TRACE \ + do { \ + printf("(%3zu) v0 %08" PRIx32 "\n", inlen, v0); \ + printf("(%3zu) v1 %08" PRIx32 "\n", inlen, v1); \ + printf("(%3zu) v2 %08" PRIx32 "\n", inlen, v2); \ + printf("(%3zu) v3 %08" PRIx32 "\n", inlen, v3); \ + } while (0) +#else +#define TRACE +#endif + +/* + Computes a SipHash value + *in: pointer to input data (read-only) + inlen: input data length in bytes (any size_t value) + *k: pointer to the key data (read-only), must be 8 bytes + *out: pointer to output data (write-only), outlen bytes must be allocated + outlen: length of the output in bytes, must be 4 or 8 +*/ +static uint32_t halfsiphash(const void *in, const size_t inlen, uint64_t k) { + + const unsigned char *ni = (const unsigned char *)in; + + uint32_t v0 = 0; + uint32_t v1 = 0; + uint32_t v2 = UINT32_C(0x6c796765); + uint32_t v3 = UINT32_C(0x74656462); + uint32_t k0 = k; + uint32_t k1 = k >> 32; + uint32_t m; + int i; + const unsigned char *end = ni + inlen - (inlen % sizeof(uint32_t)); + const int left = inlen & 3; + uint32_t b = ((uint32_t)inlen) << 24; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for (; ni != end; ni += 4) { + m = U8TO32_LE(ni); + v3 ^= m; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 3: + b |= ((uint32_t)ni[2]) << 16; + /* FALLTHRU */ + case 2: + b |= ((uint32_t)ni[1]) << 8; + /* FALLTHRU */ + case 1: + b |= ((uint32_t)ni[0]); + break; + case 0: + break; + } + + v3 ^= b; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= b; + + v2 ^= 0xff; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v1 ^ v3; + return b; +} diff --git a/src/momo/momo.c b/src/momo/momo.c index 3e607f24b..c6c29a1b1 100644 --- a/src/momo/momo.c +++ b/src/momo/momo.c @@ -26,6 +26,8 @@ #include #include "momo.h" +#include "halfsiphash.c" + #ifdef ANDROID #include #define MO_FREE SDL_free @@ -78,8 +80,10 @@ struct LocaleDomain { size_t moLen; const char** stringPtr; const char** transPtr; + unsigned int* hashes; size_t stringCount; size_t firstString[256]; + size_t lastString[256]; struct StackData pluralProgram[256]; }; @@ -744,6 +748,7 @@ const char* momo_bindtextdomain(const char* domainName, const char* dirName) { if (newDomain->stringCount) { newDomain->stringPtr=malloc(newDomain->stringCount*sizeof(const char*)); newDomain->transPtr=malloc(newDomain->stringCount*sizeof(const char*)); + newDomain->hashes=malloc(newDomain->stringCount*sizeof(unsigned int)); } unsigned int* strTable=(unsigned int*)(&newDomain->mo[header->stringPtr]); @@ -754,6 +759,8 @@ const char* momo_bindtextdomain(const char* domainName, const char* dirName) { newDomain->stringPtr[i]=(const char*)(&newDomain->mo[strTable[1+(i<<1)]]); newDomain->transPtr[i]=(const char*)(&newDomain->mo[transTable[1+(i<<1)]]); + newDomain->hashes[i]=halfsiphash(newDomain->stringPtr[i],strlen(newDomain->stringPtr[i]),0); + while (curChar<=(unsigned char)newDomain->stringPtr[i][0]) { newDomain->firstString[curChar]=i; curChar++; @@ -835,8 +842,9 @@ const char* momo_gettext(const char* str) { } if (str==NULL) return NULL; // TODO: optimize + unsigned int hash=halfsiphash(str,strlen(str),0); for (size_t i=curDomain->firstString[(unsigned char)(str[0])]; istringCount; i++) { - if (strcmp(curDomain->stringPtr[i],str)==0) { + if (hash==curDomain->hashes[i]) { return curDomain->transPtr[i]; } }