Momo: string hashing

using SipHash
This commit is contained in:
tildearrow 2024-06-09 03:31:16 -05:00
parent e55c944ccc
commit d6783f8e2c
3 changed files with 164 additions and 5 deletions

View file

@ -49,11 +49,9 @@ struct sigaction termsa;
#endif
#ifdef HAVE_MOMO
#define TA_SETLOCALE momo_setlocale
#define TA_BINDTEXTDOMAIN momo_bindtextdomain
#define TA_TEXTDOMAIN momo_textdomain
#else
#define TA_SETLOCALE setlocale
#define TA_BINDTEXTDOMAIN bindtextdomain
#define TA_TEXTDOMAIN textdomain
#endif
@ -512,16 +510,27 @@ int main(int argc, char** argv) {
#ifdef HAVE_LOCALE
String reqLocale=e.getConfString("locale","");
const char* localeRet=NULL;
if ((localeRet=TA_SETLOCALE(LC_CTYPE,reqLocale.c_str()))==NULL) {
if ((localeRet=setlocale(LC_CTYPE,reqLocale.c_str()))==NULL) {
logE("could not set locale (CTYPE)!");
} else {
logV("locale: %s",localeRet);
}
if ((localeRet=TA_SETLOCALE(LC_MESSAGES,reqLocale.c_str()))==NULL) {
if ((localeRet=setlocale(LC_MESSAGES,reqLocale.c_str()))==NULL) {
logE("could not set locale (MESSAGES)!");
#ifdef HAVE_MOMO
if (momo_setlocale(LC_MESSAGES,reqLocale.c_str())==NULL) {
logV("Momo: could not set locale!");
}
#endif
} else {
logV("locale: %s",localeRet);
#ifdef HAVE_MOMO
if (momo_setlocale(LC_MESSAGES,localeRet)==NULL) {
logV("Momo: could not set locale!");
}
#endif
}
if ((localeRet=TA_BINDTEXTDOMAIN("furnace","locale"))==NULL) {
if ((localeRet=TA_BINDTEXTDOMAIN("furnace","../po/locale"))==NULL) {
logE("could not bind text domain!");

142
src/momo/halfsiphash.c Normal file
View file

@ -0,0 +1,142 @@
/*
SipHash reference C implementation
Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
// modified for simplicity
#include <stddef.h>
#include <stdint.h>
/* default: SipHash-2-4 */
#ifndef cROUNDS
#define cROUNDS 2
#endif
#ifndef dROUNDS
#define dROUNDS 4
#endif
#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b))))
#define U32TO8_LE(p, v) \
(p)[0] = (uint8_t)((v)); \
(p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); \
(p)[3] = (uint8_t)((v) >> 24);
#define U8TO32_LE(p) \
(((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \
((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
#define SIPROUND \
do { \
v0 += v1; \
v1 = ROTL(v1, 5); \
v1 ^= v0; \
v0 = ROTL(v0, 16); \
v2 += v3; \
v3 = ROTL(v3, 8); \
v3 ^= v2; \
v0 += v3; \
v3 = ROTL(v3, 7); \
v3 ^= v0; \
v2 += v1; \
v1 = ROTL(v1, 13); \
v1 ^= v2; \
v2 = ROTL(v2, 16); \
} while (0)
#ifdef DEBUG_SIPHASH
#include <stdio.h>
#define TRACE \
do { \
printf("(%3zu) v0 %08" PRIx32 "\n", inlen, v0); \
printf("(%3zu) v1 %08" PRIx32 "\n", inlen, v1); \
printf("(%3zu) v2 %08" PRIx32 "\n", inlen, v2); \
printf("(%3zu) v3 %08" PRIx32 "\n", inlen, v3); \
} while (0)
#else
#define TRACE
#endif
/*
Computes a SipHash value
*in: pointer to input data (read-only)
inlen: input data length in bytes (any size_t value)
*k: pointer to the key data (read-only), must be 8 bytes
*out: pointer to output data (write-only), outlen bytes must be allocated
outlen: length of the output in bytes, must be 4 or 8
*/
static uint32_t halfsiphash(const void *in, const size_t inlen, uint64_t k) {
const unsigned char *ni = (const unsigned char *)in;
uint32_t v0 = 0;
uint32_t v1 = 0;
uint32_t v2 = UINT32_C(0x6c796765);
uint32_t v3 = UINT32_C(0x74656462);
uint32_t k0 = k;
uint32_t k1 = k >> 32;
uint32_t m;
int i;
const unsigned char *end = ni + inlen - (inlen % sizeof(uint32_t));
const int left = inlen & 3;
uint32_t b = ((uint32_t)inlen) << 24;
v3 ^= k1;
v2 ^= k0;
v1 ^= k1;
v0 ^= k0;
for (; ni != end; ni += 4) {
m = U8TO32_LE(ni);
v3 ^= m;
TRACE;
for (i = 0; i < cROUNDS; ++i)
SIPROUND;
v0 ^= m;
}
switch (left) {
case 3:
b |= ((uint32_t)ni[2]) << 16;
/* FALLTHRU */
case 2:
b |= ((uint32_t)ni[1]) << 8;
/* FALLTHRU */
case 1:
b |= ((uint32_t)ni[0]);
break;
case 0:
break;
}
v3 ^= b;
TRACE;
for (i = 0; i < cROUNDS; ++i)
SIPROUND;
v0 ^= b;
v2 ^= 0xff;
TRACE;
for (i = 0; i < dROUNDS; ++i)
SIPROUND;
b = v1 ^ v3;
return b;
}

View file

@ -26,6 +26,8 @@
#include <errno.h>
#include "momo.h"
#include "halfsiphash.c"
#ifdef ANDROID
#include <SDL_rwops.h>
#define MO_FREE SDL_free
@ -78,8 +80,10 @@ struct LocaleDomain {
size_t moLen;
const char** stringPtr;
const char** transPtr;
unsigned int* hashes;
size_t stringCount;
size_t firstString[256];
size_t lastString[256];
struct StackData pluralProgram[256];
};
@ -744,6 +748,7 @@ const char* momo_bindtextdomain(const char* domainName, const char* dirName) {
if (newDomain->stringCount) {
newDomain->stringPtr=malloc(newDomain->stringCount*sizeof(const char*));
newDomain->transPtr=malloc(newDomain->stringCount*sizeof(const char*));
newDomain->hashes=malloc(newDomain->stringCount*sizeof(unsigned int));
}
unsigned int* strTable=(unsigned int*)(&newDomain->mo[header->stringPtr]);
@ -754,6 +759,8 @@ const char* momo_bindtextdomain(const char* domainName, const char* dirName) {
newDomain->stringPtr[i]=(const char*)(&newDomain->mo[strTable[1+(i<<1)]]);
newDomain->transPtr[i]=(const char*)(&newDomain->mo[transTable[1+(i<<1)]]);
newDomain->hashes[i]=halfsiphash(newDomain->stringPtr[i],strlen(newDomain->stringPtr[i]),0);
while (curChar<=(unsigned char)newDomain->stringPtr[i][0]) {
newDomain->firstString[curChar]=i;
curChar++;
@ -835,8 +842,9 @@ const char* momo_gettext(const char* str) {
}
if (str==NULL) return NULL;
// TODO: optimize
unsigned int hash=halfsiphash(str,strlen(str),0);
for (size_t i=curDomain->firstString[(unsigned char)(str[0])]; i<curDomain->stringCount; i++) {
if (strcmp(curDomain->stringPtr[i],str)==0) {
if (hash==curDomain->hashes[i]) {
return curDomain->transPtr[i];
}
}