Skip to content

Commit 577c3f1

Browse files
[Support] Integrate SipHash.cpp into libSupport. (#94394)
Start building it as part of the library, with some minor tweaks compared to the reference implementation: - clang-format to match libSupport - remove tracing support - add file header - templatize cROUNDS/dROUNDS, as well as 8B/16B result length - replace assert with static_assert - use LLVM_FALLTHROUGH This also exports interfaces for SipHash-2-4-64/-128, and tests them using the reference test vectors.
1 parent cfbed2c commit 577c3f1

File tree

5 files changed

+239
-1213
lines changed

5 files changed

+239
-1213
lines changed

llvm/include/llvm/Support/SipHash.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===--- SipHash.h - An ABI-stable string SipHash ---------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// An implementation of SipHash, a hash function optimized for speed on
10+
// short inputs. Based on the SipHash reference implementation.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_SUPPORT_SIPHASH_H
15+
#define LLVM_SUPPORT_SIPHASH_H
16+
17+
#include <cstdint>
18+
19+
namespace llvm {
20+
21+
template <typename T> class ArrayRef;
22+
23+
/// Computes a SipHash-2-4 64-bit result.
24+
void getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
25+
uint8_t (&Out)[8]);
26+
27+
/// Computes a SipHash-2-4 128-bit result.
28+
void getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
29+
uint8_t (&Out)[16]);
30+
31+
} // end namespace llvm
32+
33+
#endif

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,6 @@ endif()
127127

128128
add_subdirectory(BLAKE3)
129129

130-
# Temporarily ignore SipHash.cpp before we fully integrate it into LLVMSupport.
131-
set(LLVM_OPTIONAL_SOURCES SipHash.cpp)
132-
133130
add_llvm_component_library(LLVMSupport
134131
ABIBreak.cpp
135132
AMDGPUMetadata.cpp
@@ -227,6 +224,7 @@ add_llvm_component_library(LLVMSupport
227224
SHA1.cpp
228225
SHA256.cpp
229226
Signposts.cpp
227+
SipHash.cpp
230228
SlowDynamicAPInt.cpp
231229
SmallPtrSet.cpp
232230
SmallVector.cpp

llvm/lib/Support/SipHash.cpp

Lines changed: 146 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -1,185 +1,155 @@
1-
/*
2-
SipHash reference C implementation
3-
4-
Copyright (c) 2012-2022 Jean-Philippe Aumasson
5-
6-
Copyright (c) 2012-2014 Daniel J. Bernstein <[email protected]>
7-
8-
To the extent possible under law, the author(s) have dedicated all copyright
9-
and related and neighboring rights to this software to the public domain
10-
worldwide. This software is distributed without any warranty.
11-
12-
You should have received a copy of the CC0 Public Domain Dedication along
13-
with
14-
this software. If not, see
15-
<http://creativecommons.org/publicdomain/zero/1.0/>.
16-
*/
17-
18-
#include "siphash.h"
19-
#include <assert.h>
20-
#include <stddef.h>
21-
#include <stdint.h>
22-
23-
/* default: SipHash-2-4 */
24-
#ifndef cROUNDS
25-
#define cROUNDS 2
26-
#endif
27-
#ifndef dROUNDS
28-
#define dROUNDS 4
29-
#endif
1+
//===--- SipHash.cpp - An ABI-stable string hash --------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Support/SipHash.h"
10+
#include "llvm/ADT/ArrayRef.h"
11+
#include "llvm/Support/Compiler.h"
12+
#include "llvm/Support/Endian.h"
13+
#include <cstdint>
14+
15+
using namespace llvm;
16+
using namespace support;
17+
18+
// Lightly adapted from the SipHash reference C implementation:
19+
// https://github.com/veorq/SipHash
20+
// by Jean-Philippe Aumasson and Daniel J. Bernstein
3021

3122
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
3223

33-
#define U32TO8_LE(p, v) \
34-
(p)[0] = (uint8_t)((v)); \
35-
(p)[1] = (uint8_t)((v) >> 8); \
36-
(p)[2] = (uint8_t)((v) >> 16); \
37-
(p)[3] = (uint8_t)((v) >> 24);
38-
39-
#define U64TO8_LE(p, v) \
40-
U32TO8_LE((p), (uint32_t)((v))); \
41-
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
42-
43-
#define U8TO64_LE(p) \
44-
(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
45-
((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
46-
((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
47-
((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
48-
4924
#define SIPROUND \
50-
do { \
51-
v0 += v1; \
52-
v1 = ROTL(v1, 13); \
53-
v1 ^= v0; \
54-
v0 = ROTL(v0, 32); \
55-
v2 += v3; \
56-
v3 = ROTL(v3, 16); \
57-
v3 ^= v2; \
58-
v0 += v3; \
59-
v3 = ROTL(v3, 21); \
60-
v3 ^= v0; \
61-
v2 += v1; \
62-
v1 = ROTL(v1, 17); \
63-
v1 ^= v2; \
64-
v2 = ROTL(v2, 32); \
65-
} while (0)
66-
67-
#ifdef DEBUG_SIPHASH
68-
#include <stdio.h>
69-
70-
#define TRACE \
71-
do { \
72-
printf("(%3zu) v0 %016" PRIx64 "\n", inlen, v0); \
73-
printf("(%3zu) v1 %016" PRIx64 "\n", inlen, v1); \
74-
printf("(%3zu) v2 %016" PRIx64 "\n", inlen, v2); \
75-
printf("(%3zu) v3 %016" PRIx64 "\n", inlen, v3); \
76-
} while (0)
77-
#else
78-
#define TRACE
79-
#endif
80-
81-
/*
82-
Computes a SipHash value
83-
*in: pointer to input data (read-only)
84-
inlen: input data length in bytes (any size_t value)
85-
*k: pointer to the key data (read-only), must be 16 bytes
86-
*out: pointer to output data (write-only), outlen bytes must be allocated
87-
outlen: length of the output in bytes, must be 8 or 16
88-
*/
89-
int siphash(const void *in, const size_t inlen, const void *k, uint8_t *out,
90-
const size_t outlen) {
91-
92-
const unsigned char *ni = (const unsigned char *)in;
93-
const unsigned char *kk = (const unsigned char *)k;
94-
95-
assert((outlen == 8) || (outlen == 16));
96-
uint64_t v0 = UINT64_C(0x736f6d6570736575);
97-
uint64_t v1 = UINT64_C(0x646f72616e646f6d);
98-
uint64_t v2 = UINT64_C(0x6c7967656e657261);
99-
uint64_t v3 = UINT64_C(0x7465646279746573);
100-
uint64_t k0 = U8TO64_LE(kk);
101-
uint64_t k1 = U8TO64_LE(kk + 8);
102-
uint64_t m;
103-
int i;
104-
const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
105-
const int left = inlen & 7;
106-
uint64_t b = ((uint64_t)inlen) << 56;
107-
v3 ^= k1;
108-
v2 ^= k0;
109-
v1 ^= k1;
110-
v0 ^= k0;
111-
112-
if (outlen == 16)
113-
v1 ^= 0xee;
114-
115-
for (; ni != end; ni += 8) {
116-
m = U8TO64_LE(ni);
117-
v3 ^= m;
118-
119-
TRACE;
120-
for (i = 0; i < cROUNDS; ++i)
121-
SIPROUND;
122-
123-
v0 ^= m;
124-
}
125-
126-
switch (left) {
127-
case 7:
128-
b |= ((uint64_t)ni[6]) << 48;
129-
/* FALLTHRU */
130-
case 6:
131-
b |= ((uint64_t)ni[5]) << 40;
132-
/* FALLTHRU */
133-
case 5:
134-
b |= ((uint64_t)ni[4]) << 32;
135-
/* FALLTHRU */
136-
case 4:
137-
b |= ((uint64_t)ni[3]) << 24;
138-
/* FALLTHRU */
139-
case 3:
140-
b |= ((uint64_t)ni[2]) << 16;
141-
/* FALLTHRU */
142-
case 2:
143-
b |= ((uint64_t)ni[1]) << 8;
144-
/* FALLTHRU */
145-
case 1:
146-
b |= ((uint64_t)ni[0]);
147-
break;
148-
case 0:
149-
break;
150-
}
151-
152-
v3 ^= b;
153-
154-
TRACE;
155-
for (i = 0; i < cROUNDS; ++i)
156-
SIPROUND;
157-
158-
v0 ^= b;
159-
160-
if (outlen == 16)
161-
v2 ^= 0xee;
162-
else
163-
v2 ^= 0xff;
25+
do { \
26+
v0 += v1; \
27+
v1 = ROTL(v1, 13); \
28+
v1 ^= v0; \
29+
v0 = ROTL(v0, 32); \
30+
v2 += v3; \
31+
v3 = ROTL(v3, 16); \
32+
v3 ^= v2; \
33+
v0 += v3; \
34+
v3 = ROTL(v3, 21); \
35+
v3 ^= v0; \
36+
v2 += v1; \
37+
v1 = ROTL(v1, 17); \
38+
v1 ^= v2; \
39+
v2 = ROTL(v2, 32); \
40+
} while (0)
41+
42+
namespace {
43+
44+
/// Computes a SipHash value
45+
///
46+
/// \param in: pointer to input data (read-only)
47+
/// \param inlen: input data length in bytes (any size_t value)
48+
/// \param k: reference to the key data 16-byte array (read-only)
49+
/// \returns output data, must be 8 or 16 bytes
50+
///
51+
template <int cROUNDS, int dROUNDS, size_t outlen>
52+
void siphash(const unsigned char *in, uint64_t inlen,
53+
const unsigned char (&k)[16], unsigned char (&out)[outlen]) {
54+
55+
const unsigned char *ni = (const unsigned char *)in;
56+
const unsigned char *kk = (const unsigned char *)k;
57+
58+
static_assert(outlen == 8 || outlen == 16, "result should be 8 or 16 bytes");
59+
60+
uint64_t v0 = UINT64_C(0x736f6d6570736575);
61+
uint64_t v1 = UINT64_C(0x646f72616e646f6d);
62+
uint64_t v2 = UINT64_C(0x6c7967656e657261);
63+
uint64_t v3 = UINT64_C(0x7465646279746573);
64+
uint64_t k0 = endian::read64le(kk);
65+
uint64_t k1 = endian::read64le(kk + 8);
66+
uint64_t m;
67+
int i;
68+
const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
69+
const int left = inlen & 7;
70+
uint64_t b = ((uint64_t)inlen) << 56;
71+
v3 ^= k1;
72+
v2 ^= k0;
73+
v1 ^= k1;
74+
v0 ^= k0;
75+
76+
if (outlen == 16)
77+
v1 ^= 0xee;
78+
79+
for (; ni != end; ni += 8) {
80+
m = endian::read64le(ni);
81+
v3 ^= m;
16482

165-
TRACE;
166-
for (i = 0; i < dROUNDS; ++i)
167-
SIPROUND;
168-
169-
b = v0 ^ v1 ^ v2 ^ v3;
170-
U64TO8_LE(out, b);
171-
172-
if (outlen == 8)
173-
return 0;
174-
175-
v1 ^= 0xdd;
83+
for (i = 0; i < cROUNDS; ++i)
84+
SIPROUND;
85+
86+
v0 ^= m;
87+
}
88+
89+
switch (left) {
90+
case 7:
91+
b |= ((uint64_t)ni[6]) << 48;
92+
LLVM_FALLTHROUGH;
93+
case 6:
94+
b |= ((uint64_t)ni[5]) << 40;
95+
LLVM_FALLTHROUGH;
96+
case 5:
97+
b |= ((uint64_t)ni[4]) << 32;
98+
LLVM_FALLTHROUGH;
99+
case 4:
100+
b |= ((uint64_t)ni[3]) << 24;
101+
LLVM_FALLTHROUGH;
102+
case 3:
103+
b |= ((uint64_t)ni[2]) << 16;
104+
LLVM_FALLTHROUGH;
105+
case 2:
106+
b |= ((uint64_t)ni[1]) << 8;
107+
LLVM_FALLTHROUGH;
108+
case 1:
109+
b |= ((uint64_t)ni[0]);
110+
break;
111+
case 0:
112+
break;
113+
}
114+
115+
v3 ^= b;
116+
117+
for (i = 0; i < cROUNDS; ++i)
118+
SIPROUND;
119+
120+
v0 ^= b;
121+
122+
if (outlen == 16)
123+
v2 ^= 0xee;
124+
else
125+
v2 ^= 0xff;
126+
127+
for (i = 0; i < dROUNDS; ++i)
128+
SIPROUND;
129+
130+
b = v0 ^ v1 ^ v2 ^ v3;
131+
endian::write64le(out, b);
132+
133+
if (outlen == 8)
134+
return;
135+
136+
v1 ^= 0xdd;
137+
138+
for (i = 0; i < dROUNDS; ++i)
139+
SIPROUND;
140+
141+
b = v0 ^ v1 ^ v2 ^ v3;
142+
endian::write64le(out + 8, b);
143+
}
176144

177-
TRACE;
178-
for (i = 0; i < dROUNDS; ++i)
179-
SIPROUND;
145+
} // end anonymous namespace
180146

181-
b = v0 ^ v1 ^ v2 ^ v3;
182-
U64TO8_LE(out + 8, b);
147+
void llvm::getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
148+
uint8_t (&Out)[8]) {
149+
siphash<2, 4>(In.data(), In.size(), K, Out);
150+
}
183151

184-
return 0;
152+
void llvm::getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
153+
uint8_t (&Out)[16]) {
154+
siphash<2, 4>(In.data(), In.size(), K, Out);
185155
}

0 commit comments

Comments
 (0)