#include "lj_arch.h"
#if LJ_HAS_OPTIMISED_HASH == 1 || defined(SMOKETEST)
#include <stdint.h>
#include <sys/types.h>
#include <time.h>
#include <nmmintrin.h>
#if defined(_MSC_VER)
#include <process.h>
#define getpid _getpid
#else
#include <unistd.h>
#endif
#include "lj_def.h"
#include "lj_str.h"
#include "lj_jit.h"
#if defined(_MSC_VER)
#if defined(__clang__) && !defined(__SSE4_2__)
#error "This file must be built with /arch:AVX1 or higher"
#endif
#else
#if !defined(__SSE4_2__)
#error "This file must be built with -msse4.2"
#endif
#endif
#define lj_crc32_u32 _mm_crc32_u32
#define lj_crc32_u64 _mm_crc32_u64
#undef LJ_AINLINE
#define LJ_AINLINE
#if defined(__MINGW32__) || defined(_MSC_VER)
#define random() ((long) rand())
#define srandom(seed) srand(seed)
#endif
static const uint64_t* cast_uint64p(const char* str)
{
return (const uint64_t*)(void*)str;
}
static const uint32_t* cast_uint32p(const char* str)
{
return (const uint32_t*)(void*)str;
}
static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, MSize len)
{
#if 0#else
uint32_t a, b, h = len;
a = *(const uint8_t *)str;
h ^= *(const uint8_t *)(str+len-1);
b = *(const uint8_t *)(str+(len>>1));
h ^= b; h -= lj_rol(b, 14);
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
return h;
#endif
}
static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, MSize len)
{
uint64_t v1, v2, h;
if (len >= 8) {
v1 = *cast_uint64p(str);
v2 = *cast_uint64p(str + len - 8);
} else {
v1 = *cast_uint32p(str);
v2 = *cast_uint32p(str + len - 4);
}
h = lj_crc32_u32(0, len);
h = lj_crc32_u64(h, v1);
h = lj_crc32_u64(h, v2);
return (uint32_t)h;
}
static uint32_t lj_str_hash_16_128(const char* str, MSize len)
{
uint64_t h1, h2;
uint32_t i;
h1 = lj_crc32_u32(0, len);
h2 = 0;
for (i = 0; i < len - 16; i += 16) {
h1 += lj_crc32_u64(h1, *cast_uint64p(str + i));
h2 += lj_crc32_u64(h2, *cast_uint64p(str + i + 8));
};
h1 = lj_crc32_u64(h1, *cast_uint64p(str + len - 16));
h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
return lj_crc32_u32((uint32_t)h1, (uint32_t)h2);
}
static uint32_t random_pos[32][2];
static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };
static LJ_AINLINE uint32_t log2_floor(uint32_t n)
{
if (n <= 127) {
return log2_tab[n];
}
if ((n >> 8) <= 127) {
return log2_tab[n >> 8] + 8;
}
if ((n >> 16) <= 127) {
return log2_tab[n >> 16] + 16;
}
if ((n >> 24) <= 127) {
return log2_tab[n >> 24] + 24;
}
return 31;
}
#define POW2_MASK(n) ((1L << (n)) - 1)
static void str_hash_init_random(void)
{
int i, seed, rml;
rml = log2_floor(RAND_MAX);
if (RAND_MAX & (RAND_MAX - 1)) {
rml += 1;
}
seed = lj_crc32_u32(0, getpid());
seed = lj_crc32_u32(seed, (uint32_t)time(NULL));
srandom(seed);
for (i = 0; i < 3; i++) {
random_pos[i][0] = random_pos[i][1] = 0;
}
for (; i < rml; i++) {
random_pos[i][0] = random() & POW2_MASK(i+1);
random_pos[i][1] = random() & POW2_MASK(i+1);
}
for (; i < 31; i++) {
int j;
for (j = 0; j < 2; j++) {
uint32_t v, scale;
scale = random_pos[i - rml][0];
if (scale == 0) {
scale = 1;
}
v = (random() * scale) & POW2_MASK(i+1);
random_pos[i][j] = v;
}
}
}
#undef POW2_MASK
static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
uint32_t idx)
{
uint32_t pos = random_pos[chunk_sz_order][idx & 1];
return pos;
}
static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
MSize len)
{
uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
uint64_t h1, h2, v;
const char* chunk_ptr;
chunk_num = 16;
chunk_sz = len / chunk_num;
chunk_sz_log2 = log2_floor(chunk_sz);
pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
h1 = lj_crc32_u32(0, len);
h2 = 0;
for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
chunk_ptr += chunk_sz, i++) {
v = *cast_uint64p(chunk_ptr + pos1);
h1 = lj_crc32_u64(h1, v);
v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
h2 = lj_crc32_u64(h2, v);
}
v = *cast_uint64p(chunk_ptr + pos1);
h1 = lj_crc32_u64(h1, v);
v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
h2 = lj_crc32_u64(h2, v);
h1 = lj_crc32_u64(h1, *cast_uint64p(str));
h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
h1 = lj_crc32_u32((uint32_t)h1, (uint32_t)h2);
return (uint32_t)h1;
}
static LJ_AINLINE uint32_t lj_str_hash_opt(const char* str, MSize len)
{
if (len < 128) {
if (len >= 16) {
return lj_str_hash_16_128(str, len);
}
if (len >= 4) {
return lj_str_hash_4_16(str, len);
}
return lj_str_hash_1_4(str, len);
}
return lj_str_hash_128_above(str, len);
}
void lj_str_hash_init(uint32_t flags)
{
if (flags & JIT_F_SSE4_2) {
#ifndef SMOKETEST
lj_str_hash = lj_str_hash_opt;
#endif
str_hash_init_random();
}
}
#endif