Skip to content

[Support] Integrate SipHash.cpp into libSupport. #94394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions llvm/include/llvm/Support/SipHash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//===--- SipHash.h - An ABI-stable string SipHash ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// An implementation of SipHash, a hash function optimized for speed on
// short inputs. Based on the SipHash reference implementation.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_SIPHASH_H
#define LLVM_SUPPORT_SIPHASH_H

#include <cstdint>

namespace llvm {

template <typename T> class ArrayRef;

/// Computes a SipHash-2-4 64-bit result.
void getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
uint8_t (&Out)[8]);

/// Computes a SipHash-2-4 128-bit result.
void getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
uint8_t (&Out)[16]);

} // end namespace llvm

#endif
4 changes: 1 addition & 3 deletions llvm/lib/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ endif()

add_subdirectory(BLAKE3)

# Temporarily ignore SipHash.cpp before we fully integrate it into LLVMSupport.
set(LLVM_OPTIONAL_SOURCES SipHash.cpp)

add_llvm_component_library(LLVMSupport
ABIBreak.cpp
AMDGPUMetadata.cpp
Expand Down Expand Up @@ -227,6 +224,7 @@ add_llvm_component_library(LLVMSupport
SHA1.cpp
SHA256.cpp
Signposts.cpp
SipHash.cpp
SlowDynamicAPInt.cpp
SmallPtrSet.cpp
SmallVector.cpp
Expand Down
322 changes: 146 additions & 176 deletions llvm/lib/Support/SipHash.cpp
Original file line number Diff line number Diff line change
@@ -1,185 +1,155 @@
/*
SipHash reference C implementation

Copyright (c) 2012-2022 Jean-Philippe Aumasson
<[email protected]>
Copyright (c) 2012-2014 Daniel J. Bernstein <[email protected]>

To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.

You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/

#include "siphash.h"
#include <assert.h>
#include <stddef.h>
#include <stdint.h>

/* default: SipHash-2-4 */
#ifndef cROUNDS
#define cROUNDS 2
#endif
#ifndef dROUNDS
#define dROUNDS 4
#endif
//===--- SipHash.cpp - An ABI-stable string hash --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/SipHash.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include <cstdint>

using namespace llvm;
using namespace support;

// Lightly adapted from the SipHash reference C implementation:
// https://github.com/veorq/SipHash
// by Jean-Philippe Aumasson and Daniel J. Bernstein

#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can use function call instead of macros?

E.g. (as in xxhash):

static uint64_t rotl64(uint64_t X, size_t R) {
  return (X << R) | (X >> (64 - R));
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, I'm a little more inclined to keep the macros as close to original as possible, it being the core of the function. It could help to name the function ROTL to match the original; we can turn the whole block into a function as well. I don't feel strongly either way, let me know which you prefer.

#define U32TO8_LE(p, v) \
(p)[0] = (uint8_t)((v)); \
(p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); \
(p)[3] = (uint8_t)((v) >> 24);

#define U64TO8_LE(p, v) \
U32TO8_LE((p), (uint32_t)((v))); \
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));

#define U8TO64_LE(p) \
(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))

#define SIPROUND \
do { \
v0 += v1; \
v1 = ROTL(v1, 13); \
v1 ^= v0; \
v0 = ROTL(v0, 32); \
v2 += v3; \
v3 = ROTL(v3, 16); \
v3 ^= v2; \
v0 += v3; \
v3 = ROTL(v3, 21); \
v3 ^= v0; \
v2 += v1; \
v1 = ROTL(v1, 17); \
v1 ^= v2; \
v2 = ROTL(v2, 32); \
} while (0)

#ifdef DEBUG_SIPHASH
#include <stdio.h>

#define TRACE \
do { \
printf("(%3zu) v0 %016" PRIx64 "\n", inlen, v0); \
printf("(%3zu) v1 %016" PRIx64 "\n", inlen, v1); \
printf("(%3zu) v2 %016" PRIx64 "\n", inlen, v2); \
printf("(%3zu) v3 %016" PRIx64 "\n", inlen, v3); \
} while (0)
#else
#define TRACE
#endif

/*
Computes a SipHash value
*in: pointer to input data (read-only)
inlen: input data length in bytes (any size_t value)
*k: pointer to the key data (read-only), must be 16 bytes
*out: pointer to output data (write-only), outlen bytes must be allocated
outlen: length of the output in bytes, must be 8 or 16
*/
int siphash(const void *in, const size_t inlen, const void *k, uint8_t *out,
const size_t outlen) {

const unsigned char *ni = (const unsigned char *)in;
const unsigned char *kk = (const unsigned char *)k;

assert((outlen == 8) || (outlen == 16));
uint64_t v0 = UINT64_C(0x736f6d6570736575);
uint64_t v1 = UINT64_C(0x646f72616e646f6d);
uint64_t v2 = UINT64_C(0x6c7967656e657261);
uint64_t v3 = UINT64_C(0x7465646279746573);
uint64_t k0 = U8TO64_LE(kk);
uint64_t k1 = U8TO64_LE(kk + 8);
uint64_t m;
int i;
const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
const int left = inlen & 7;
uint64_t b = ((uint64_t)inlen) << 56;
v3 ^= k1;
v2 ^= k0;
v1 ^= k1;
v0 ^= k0;

if (outlen == 16)
v1 ^= 0xee;

for (; ni != end; ni += 8) {
m = U8TO64_LE(ni);
v3 ^= m;

TRACE;
for (i = 0; i < cROUNDS; ++i)
SIPROUND;

v0 ^= m;
}

switch (left) {
case 7:
b |= ((uint64_t)ni[6]) << 48;
/* FALLTHRU */
case 6:
b |= ((uint64_t)ni[5]) << 40;
/* FALLTHRU */
case 5:
b |= ((uint64_t)ni[4]) << 32;
/* FALLTHRU */
case 4:
b |= ((uint64_t)ni[3]) << 24;
/* FALLTHRU */
case 3:
b |= ((uint64_t)ni[2]) << 16;
/* FALLTHRU */
case 2:
b |= ((uint64_t)ni[1]) << 8;
/* FALLTHRU */
case 1:
b |= ((uint64_t)ni[0]);
break;
case 0:
break;
}

v3 ^= b;

TRACE;
for (i = 0; i < cROUNDS; ++i)
SIPROUND;

v0 ^= b;

if (outlen == 16)
v2 ^= 0xee;
else
v2 ^= 0xff;
do { \
v0 += v1; \
v1 = ROTL(v1, 13); \
v1 ^= v0; \
v0 = ROTL(v0, 32); \
v2 += v3; \
v3 = ROTL(v3, 16); \
v3 ^= v2; \
v0 += v3; \
v3 = ROTL(v3, 21); \
v3 ^= v0; \
v2 += v1; \
v1 = ROTL(v1, 17); \
v1 ^= v2; \
v2 = ROTL(v2, 32); \
} while (0)

namespace {

/// Computes a SipHash value
///
/// \param in: pointer to input data (read-only)
/// \param inlen: input data length in bytes (any size_t value)
/// \param k: reference to the key data 16-byte array (read-only)
/// \returns output data, must be 8 or 16 bytes
///
template <int cROUNDS, int dROUNDS, size_t outlen>
void siphash(const unsigned char *in, uint64_t inlen,
const unsigned char (&k)[16], unsigned char (&out)[outlen]) {

const unsigned char *ni = (const unsigned char *)in;
const unsigned char *kk = (const unsigned char *)k;

static_assert(outlen == 8 || outlen == 16, "result should be 8 or 16 bytes");

uint64_t v0 = UINT64_C(0x736f6d6570736575);
uint64_t v1 = UINT64_C(0x646f72616e646f6d);
uint64_t v2 = UINT64_C(0x6c7967656e657261);
uint64_t v3 = UINT64_C(0x7465646279746573);
uint64_t k0 = endian::read64le(kk);
uint64_t k1 = endian::read64le(kk + 8);
uint64_t m;
int i;
const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
const int left = inlen & 7;
uint64_t b = ((uint64_t)inlen) << 56;
v3 ^= k1;
v2 ^= k0;
v1 ^= k1;
v0 ^= k0;

if (outlen == 16)
v1 ^= 0xee;

for (; ni != end; ni += 8) {
m = endian::read64le(ni);
v3 ^= m;

TRACE;
for (i = 0; i < dROUNDS; ++i)
SIPROUND;

b = v0 ^ v1 ^ v2 ^ v3;
U64TO8_LE(out, b);

if (outlen == 8)
return 0;

v1 ^= 0xdd;
for (i = 0; i < cROUNDS; ++i)
SIPROUND;

v0 ^= m;
}

switch (left) {
case 7:
b |= ((uint64_t)ni[6]) << 48;
LLVM_FALLTHROUGH;
case 6:
b |= ((uint64_t)ni[5]) << 40;
LLVM_FALLTHROUGH;
case 5:
b |= ((uint64_t)ni[4]) << 32;
LLVM_FALLTHROUGH;
case 4:
b |= ((uint64_t)ni[3]) << 24;
LLVM_FALLTHROUGH;
case 3:
b |= ((uint64_t)ni[2]) << 16;
LLVM_FALLTHROUGH;
case 2:
b |= ((uint64_t)ni[1]) << 8;
LLVM_FALLTHROUGH;
case 1:
b |= ((uint64_t)ni[0]);
break;
case 0:
break;
}

v3 ^= b;

for (i = 0; i < cROUNDS; ++i)
SIPROUND;

v0 ^= b;

if (outlen == 16)
v2 ^= 0xee;
else
v2 ^= 0xff;

for (i = 0; i < dROUNDS; ++i)
SIPROUND;

b = v0 ^ v1 ^ v2 ^ v3;
endian::write64le(out, b);

if (outlen == 8)
return;

v1 ^= 0xdd;

for (i = 0; i < dROUNDS; ++i)
SIPROUND;

b = v0 ^ v1 ^ v2 ^ v3;
endian::write64le(out + 8, b);
}

TRACE;
for (i = 0; i < dROUNDS; ++i)
SIPROUND;
} // end anonymous namespace

b = v0 ^ v1 ^ v2 ^ v3;
U64TO8_LE(out + 8, b);
void llvm::getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
uint8_t (&Out)[8]) {
siphash<2, 4>(In.data(), In.size(), K, Out);
}

return 0;
void llvm::getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
uint8_t (&Out)[16]) {
siphash<2, 4>(In.data(), In.size(), K, Out);
}
Loading
Loading