31 __host__ __device__
static int RotateLeft(
int a) {
32 auto u =
static_cast<unsigned>(a);
33 return static_cast<int>((u << n) | (u >> (32 - n)));
36 __host__ __device__
static void QuarterRound(
int &a,
int &b,
int &c,
int &d) {
39 d = RotateLeft<16>(d);
42 b = RotateLeft<12>(b);
51 __host__ __device__
static void Rounds(int4 x[4]) {
52 for (
int i = 0; i < rounds; i += 2) {
54 QuarterRound(x[0].x, x[1].x, x[2].x, x[3].x);
55 QuarterRound(x[0].y, x[1].y, x[2].y, x[3].y);
56 QuarterRound(x[0].z, x[1].z, x[2].z, x[3].z);
57 QuarterRound(x[0].w, x[1].w, x[2].w, x[3].w);
59 QuarterRound(x[0].x, x[1].y, x[2].z, x[3].w);
60 QuarterRound(x[0].y, x[1].z, x[2].w, x[3].x);
61 QuarterRound(x[0].z, x[1].w, x[2].x, x[3].y);
62 QuarterRound(x[0].w, x[1].x, x[2].y, x[3].z);
66 __host__ __device__
static void ExpandKey(int4 x[4]) {
71 constexpr static int4 kConstant32 = {
78 constexpr static int4 kConstant16 = {
93 __host__ __device__
ChaCha(
const int *nonce) : nonce_(nonce) {}
95 __host__ __device__ cuda::std::array<int4, mul> Gen(int4 seed) {
99 if constexpr (mul <= 2) buf[0] = kConstant16;
100 else buf[0] = kConstant32;
110 buf[3].z = nonce_[0];
111 buf[3].w = nonce_[1];
115 buf[1] = util::Xor(buf[1], seed);
116 if constexpr (mul == 1) {
118 }
else if constexpr (mul == 2) {
119 buf[0] = util::Xor(buf[0], kConstant16);
120 return {buf[0], buf[1]};
122 buf[0] = util::Xor(buf[0], kConstant32);
123 buf[2] = util::Xor(buf[2], seed);
124 buf[3] = util::Xor(buf[3], {0, 0, nonce_[0], nonce_[1]});
125 return {buf[0], buf[1], buf[2], buf[3]};