myl7/fss 1.1.0
Function secret sharing (FSS) primitives including distributed point/comparison function (DPF/DCF)
Loading...
Searching...
No Matches
blake3.cuh
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
8#pragma once
9#include <fss/hash.cuh>
10#include <cuda_runtime.h>
11#include <cuda/std/array>
12#include <cuda/std/span>
13#include <cuda/std/tuple>
14#include <fss/util.cuh>
15
16namespace fss::hash {
17
24class Blake3 {
25private:
26 int4 iv_[2];
27
28 template <int n>
29 __host__ __device__ static int RotateRight(int a) {
30 auto u = static_cast<unsigned>(a);
31 return static_cast<int>((u >> n) | (u << (32 - n)));
32 }
33
34 __host__ __device__ static void G(int &a, int &b, int &c, int &d, int x, int y) {
35 a += b + x;
36 d = RotateRight<16>(d ^ a);
37 c += d;
38 b = RotateRight<12>(b ^ c);
39 a += b + y;
40 d = RotateRight<8>(d ^ a);
41 c += d;
42 b = RotateRight<7>(b ^ c);
43 }
44
45 __host__ __device__ static int GetWord(const int4 m[4], int i) {
46 return reinterpret_cast<const int *>(m)[i];
47 }
48
49 __host__ __device__ static void SetWord(int4 m[4], int i, int val) {
50 reinterpret_cast<int *>(m)[i] = val;
51 }
52
53 __host__ __device__ static void Permute(int4 m[4]) {
54 constexpr int perm[16] = {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8};
55 int tmp[16];
56 for (int i = 0; i < 16; ++i) {
57 tmp[i] = GetWord(m, perm[i]);
58 }
59 for (int i = 0; i < 16; ++i) {
60 SetWord(m, i, tmp[i]);
61 }
62 }
63
64 __host__ __device__ static void Round(int4 v[4], const int4 m[4]) {
65 // Column rounds: G calls use message words (0,1), (2,3), (4,5), (6,7)
66 G(v[0].x, v[1].x, v[2].x, v[3].x, GetWord(m, 0), GetWord(m, 1));
67 G(v[0].y, v[1].y, v[2].y, v[3].y, GetWord(m, 2), GetWord(m, 3));
68 G(v[0].z, v[1].z, v[2].z, v[3].z, GetWord(m, 4), GetWord(m, 5));
69 G(v[0].w, v[1].w, v[2].w, v[3].w, GetWord(m, 6), GetWord(m, 7));
70 // Diagonal rounds: G calls use message words (8,9), (10,11), (12,13), (14,15)
71 G(v[0].x, v[1].y, v[2].z, v[3].w, GetWord(m, 8), GetWord(m, 9));
72 G(v[0].y, v[1].z, v[2].w, v[3].x, GetWord(m, 10), GetWord(m, 11));
73 G(v[0].z, v[1].w, v[2].x, v[3].y, GetWord(m, 12), GetWord(m, 13));
74 G(v[0].w, v[1].x, v[2].y, v[3].z, GetWord(m, 14), GetWord(m, 15));
75 }
76
77 // First 4 standard BLAKE3 IV words
78 constexpr static int4 kIv0 = {
79 static_cast<int>(0x6A09E667),
80 static_cast<int>(0xBB67AE85),
81 static_cast<int>(0x3C6EF372),
82 static_cast<int>(0xA54FF53A),
83 };
84
85 constexpr static int kChunkStart = 1;
86 constexpr static int kChunkEnd = 2;
87 constexpr static int kRoot = 8;
88 constexpr static int kKeyedHash = 16;
89
100 __host__ __device__ static cuda::std::array<int4, 4> Compress(
101 const int4 h[2], const int4 msg[4], unsigned long long counter, int block_len, int flags) {
102 int4 v[4];
103 v[0] = h[0];
104 v[1] = h[1];
105 v[2] = kIv0;
106 v[3] = {static_cast<int>(counter & 0xFFFFFFFF),
107 static_cast<int>((counter >> 32) & 0xFFFFFFFF), block_len, flags};
108
109 int4 m[4] = {msg[0], msg[1], msg[2], msg[3]};
110
111 // 7 rounds with permutation between rounds
112 for (int i = 0; i < 7; ++i) {
113 Round(v, m);
114 if (i < 6) {
115 Permute(m);
116 }
117 }
118
119 // Finalization
120 v[0] = util::Xor(v[0], v[2]);
121 v[1] = util::Xor(v[1], v[3]);
122 v[2] = util::Xor(v[2], h[0]);
123 v[3] = util::Xor(v[3], h[1]);
124
125 return {v[0], v[1], v[2], v[3]};
126 }
127
128public:
135 __host__ __device__ explicit Blake3(cuda::std::span<const int4, 2> iv) : iv_{iv[0], iv[1]} {}
136
146 __host__ __device__ cuda::std::array<int4, 2> Hash(cuda::std::span<const int4, 4> msg) {
147 constexpr int flags = kChunkStart | kChunkEnd | kRoot | kKeyedHash;
148 auto out = Compress(iv_, msg.data(), 0, 64, flags);
149 return {out[0], out[1]};
150 }
151
161 __host__ __device__ cuda::std::array<int4, 4> Hash(cuda::std::tuple<int4, const int4> msg) {
162 constexpr int flags = kChunkStart | kChunkEnd | kRoot | kKeyedHash;
163 auto [a, b] = msg;
164 int4 padded[4] = {util::SetLsb(a, false), b, {0, 0, 0, 0}, {0, 0, 0, 0}};
165
166 auto out0 = Compress(iv_, padded, 0, 32, flags);
167
168 padded[0] = util::SetLsb(a, true);
169 auto out1 = Compress(iv_, padded, 0, 32, flags);
170
171 return {out0[0], out0[1], out1[0], out1[1]};
172 }
173};
174static_assert(Hashable<Blake3> && XorHashable<Blake3>);
175
176} // namespace fss::hash
BLAKE3 keyed hash (CPU+GPU).
Definition blake3.cuh:24
Blake3(cuda::std::span< const int4, 2 > iv)
Constructor.
Definition blake3.cuh:135
cuda::std::array< int4, 2 > Hash(cuda::std::span< const int4, 4 > msg)
Hash a 64B message.
Definition blake3.cuh:146
cuda::std::array< int4, 4 > Hash(cuda::std::tuple< int4, const int4 > msg)
XOR-collision-resistant hash.
Definition blake3.cuh:161
Collision-resistant hash interface.
Definition hash.cuh:19
Collision-resistant and XOR-collision-resistant hash interface.
Definition hash.cuh:27