vendoring blake2b

This commit is contained in:
Armon Dadgar 2017-08-23 15:30:53 -07:00
parent af9caef4b9
commit 568ccf0485
9 changed files with 1721 additions and 0 deletions

207
vendor/golang.org/x/crypto/blake2b/blake2b.go generated vendored Normal file
View file

@ -0,0 +1,207 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693
// and the extendable output function (XOF) BLAKE2Xb.
//
// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf
// and for BLAKE2Xb see https://blake2.net/blake2x.pdf
//
// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512).
// If you need a secret-key MAC (message authentication code), use the New512
// function with a non-nil key.
//
// BLAKE2X is a construction to compute hash values larger than 64 bytes. It
// can produce hash values between 0 and 4 GiB.
package blake2b
import (
"encoding/binary"
"errors"
"hash"
)
const (
// The blocksize of BLAKE2b in bytes.
BlockSize = 128
// The hash size of BLAKE2b-512 in bytes.
Size = 64
// The hash size of BLAKE2b-384 in bytes.
Size384 = 48
// The hash size of BLAKE2b-256 in bytes.
Size256 = 32
)
var (
useAVX2 bool
useAVX bool
useSSE4 bool
)
var errKeySize = errors.New("blake2b: invalid key size")
var iv = [8]uint64{
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}
// Sum512 returns the BLAKE2b-512 checksum of the data.
func Sum512(data []byte) [Size]byte {
var sum [Size]byte
checkSum(&sum, Size, data)
return sum
}
// Sum384 returns the BLAKE2b-384 checksum of the data.
func Sum384(data []byte) [Size384]byte {
var sum [Size]byte
var sum384 [Size384]byte
checkSum(&sum, Size384, data)
copy(sum384[:], sum[:Size384])
return sum384
}
// Sum256 returns the BLAKE2b-256 checksum of the data.
func Sum256(data []byte) [Size256]byte {
var sum [Size]byte
var sum256 [Size256]byte
checkSum(&sum, Size256, data)
copy(sum256[:], sum[:Size256])
return sum256
}
// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) }
// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) }
// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) }
func newDigest(hashSize int, key []byte) (*digest, error) {
if len(key) > Size {
return nil, errKeySize
}
d := &digest{
size: hashSize,
keyLen: len(key),
}
copy(d.key[:], key)
d.Reset()
return d, nil
}
func checkSum(sum *[Size]byte, hashSize int, data []byte) {
h := iv
h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24)
var c [2]uint64
if length := len(data); length > BlockSize {
n := length &^ (BlockSize - 1)
if length == n {
n -= BlockSize
}
hashBlocks(&h, &c, 0, data[:n])
data = data[n:]
}
var block [BlockSize]byte
offset := copy(block[:], data)
remaining := uint64(BlockSize - offset)
if c[0] < remaining {
c[1]--
}
c[0] -= remaining
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
for i, v := range h[:(hashSize+7)/8] {
binary.LittleEndian.PutUint64(sum[8*i:], v)
}
}
type digest struct {
h [8]uint64
c [2]uint64
size int
block [BlockSize]byte
offset int
key [BlockSize]byte
keyLen int
}
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Size() int { return d.size }
func (d *digest) Reset() {
d.h = iv
d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24)
d.offset, d.c[0], d.c[1] = 0, 0, 0
if d.keyLen > 0 {
d.block = d.key
d.offset = BlockSize
}
}
func (d *digest) Write(p []byte) (n int, err error) {
n = len(p)
if d.offset > 0 {
remaining := BlockSize - d.offset
if n <= remaining {
d.offset += copy(d.block[d.offset:], p)
return
}
copy(d.block[d.offset:], p[:remaining])
hashBlocks(&d.h, &d.c, 0, d.block[:])
d.offset = 0
p = p[remaining:]
}
if length := len(p); length > BlockSize {
nn := length &^ (BlockSize - 1)
if length == nn {
nn -= BlockSize
}
hashBlocks(&d.h, &d.c, 0, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
d.offset += copy(d.block[:], p)
}
return
}
func (d *digest) Sum(sum []byte) []byte {
var hash [Size]byte
d.finalize(&hash)
return append(sum, hash[:d.size]...)
}
func (d *digest) finalize(hash *[Size]byte) {
var block [BlockSize]byte
copy(block[:], d.block[:d.offset])
remaining := uint64(BlockSize - d.offset)
c := d.c
if c[0] < remaining {
c[1]--
}
c[0] -= remaining
h := d.h
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
for i, v := range h {
binary.LittleEndian.PutUint64(hash[8*i:], v)
}
}

View file

@ -0,0 +1,43 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine
package blake2b
func init() {
useAVX2 = supportsAVX2()
useAVX = supportsAVX()
useSSE4 = supportsSSE4()
}
//go:noescape
func supportsSSE4() bool
//go:noescape
func supportsAVX() bool
//go:noescape
func supportsAVX2() bool
//go:noescape
func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
//go:noescape
func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
//go:noescape
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
if useAVX2 {
hashBlocksAVX2(h, c, flag, blocks)
} else if useAVX {
hashBlocksAVX(h, c, flag, blocks)
} else if useSSE4 {
hashBlocksSSE4(h, c, flag, blocks)
} else {
hashBlocksGeneric(h, c, flag, blocks)
}
}

762
vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s generated vendored Normal file
View file

@ -0,0 +1,762 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine
#include "textflag.h"
DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
VPADDQ m0, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFD $-79, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPSHUFB c40, Y1, Y1; \
VPADDQ m1, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFB c48, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPADDQ Y1, Y1, t; \
VPSRLQ $63, Y1, Y1; \
VPXOR t, Y1, Y1; \
VPERMQ_0x39_Y1_Y1; \
VPERMQ_0x4E_Y2_Y2; \
VPERMQ_0x93_Y3_Y3; \
VPADDQ m2, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFD $-79, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPSHUFB c40, Y1, Y1; \
VPADDQ m3, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFB c48, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPADDQ Y1, Y1, t; \
VPSRLQ $63, Y1, Y1; \
VPXOR t, Y1, Y1; \
VPERMQ_0x39_Y3_Y3; \
VPERMQ_0x4E_Y2_Y2; \
VPERMQ_0x93_Y1_Y1
#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
// load msg: Y12 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
VMOVQ_SI_X12(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X12(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y12, Y12
// load msg: Y13 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
VMOVQ_SI_X13(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X13(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y13, Y13
// load msg: Y14 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
VMOVQ_SI_X14(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X14(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y14, Y14
// load msg: Y15 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
VMOVQ_SI_X15(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X15(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
VMOVQ_SI_X12_0; \
VMOVQ_SI_X11(4*8); \
VPINSRQ_1_SI_X12(2*8); \
VPINSRQ_1_SI_X11(6*8); \
VINSERTI128 $1, X11, Y12, Y12; \
LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \
LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
VMOVQ_SI_X11(11*8); \
VPSHUFD $0x4E, 0*8(SI), X14; \
VPINSRQ_1_SI_X11(5*8); \
VINSERTI128 $1, X11, Y14, Y14; \
LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
VMOVQ_SI_X11(5*8); \
VMOVDQU 11*8(SI), X12; \
VPINSRQ_1_SI_X11(15*8); \
VINSERTI128 $1, X11, Y12, Y12; \
VMOVQ_SI_X13(8*8); \
VMOVQ_SI_X11(2*8); \
VPINSRQ_1_SI_X13_0; \
VPINSRQ_1_SI_X11(13*8); \
VINSERTI128 $1, X11, Y13, Y13; \
LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \
VMOVQ_SI_X15(6*8); \
VMOVQ_SI_X11_0; \
VPINSRQ_1_SI_X15(10*8); \
VPINSRQ_1_SI_X11(8*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \
VMOVQ_SI_X13_0; \
VMOVQ_SI_X11(4*8); \
VPINSRQ_1_SI_X13(7*8); \
VPINSRQ_1_SI_X11(15*8); \
VINSERTI128 $1, X11, Y13, Y13; \
LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
VMOVQ_SI_X12(2*8); \
VMOVQ_SI_X11_0; \
VPINSRQ_1_SI_X12(6*8); \
VPINSRQ_1_SI_X11(8*8); \
VINSERTI128 $1, X11, Y12, Y12; \
LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \
LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \
LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
VMOVQ_SI_X14_0; \
VPSHUFD $0x4E, 8*8(SI), X11; \
VPINSRQ_1_SI_X14(6*8); \
VINSERTI128 $1, X11, Y14, Y14; \
LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \
VMOVQ_SI_X15_0; \
VMOVQ_SI_X11(6*8); \
VPINSRQ_1_SI_X15(4*8); \
VPINSRQ_1_SI_X11(10*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
VMOVQ_SI_X12(6*8); \
VMOVQ_SI_X11(11*8); \
VPINSRQ_1_SI_X12(14*8); \
VPINSRQ_1_SI_X11_0; \
VINSERTI128 $1, X11, Y12, Y12; \
LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
VMOVQ_SI_X11(1*8); \
VMOVDQU 12*8(SI), X14; \
VPINSRQ_1_SI_X11(10*8); \
VINSERTI128 $1, X11, Y14, Y14; \
VMOVQ_SI_X15(2*8); \
VMOVDQU 4*8(SI), X11; \
VPINSRQ_1_SI_X15(7*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \
VMOVQ_SI_X13(2*8); \
VPSHUFD $0x4E, 5*8(SI), X11; \
VPINSRQ_1_SI_X13(4*8); \
VINSERTI128 $1, X11, Y13, Y13; \
LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
VMOVQ_SI_X15(11*8); \
VMOVQ_SI_X11(12*8); \
VPINSRQ_1_SI_X15(14*8); \
VPINSRQ_1_SI_X11_0; \
VINSERTI128 $1, X11, Y15, Y15
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, DX
MOVQ SP, R9
ADDQ $31, R9
ANDQ $~31, R9
MOVQ R9, SP
MOVQ CX, 16(SP)
XORQ CX, CX
MOVQ CX, 24(SP)
VMOVDQU ·AVX2_c40<>(SB), Y4
VMOVDQU ·AVX2_c48<>(SB), Y5
VMOVDQU 0(AX), Y8
VMOVDQU 32(AX), Y9
VMOVDQU ·AVX2_iv0<>(SB), Y6
VMOVDQU ·AVX2_iv1<>(SB), Y7
MOVQ 0(BX), R8
MOVQ 8(BX), R9
MOVQ R9, 8(SP)
loop:
ADDQ $128, R8
MOVQ R8, 0(SP)
CMPQ R8, $128
JGE noinc
INCQ R9
MOVQ R9, 8(SP)
noinc:
VMOVDQA Y8, Y0
VMOVDQA Y9, Y1
VMOVDQA Y6, Y2
VPXOR 0(SP), Y7, Y3
LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
VMOVDQA Y12, 32(SP)
VMOVDQA Y13, 64(SP)
VMOVDQA Y14, 96(SP)
VMOVDQA Y15, 128(SP)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
VMOVDQA Y12, 160(SP)
VMOVDQA Y13, 192(SP)
VMOVDQA Y14, 224(SP)
VMOVDQA Y15, 256(SP)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5)
ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5)
VPXOR Y0, Y8, Y8
VPXOR Y1, Y9, Y9
VPXOR Y2, Y8, Y8
VPXOR Y3, Y9, Y9
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
VMOVDQU Y8, 0(AX)
VMOVDQU Y9, 32(AX)
VZEROUPPER
MOVQ DX, SP
RET
#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
#define SHUFFLE_AVX() \
VMOVDQA X6, X13; \
VMOVDQA X2, X14; \
VMOVDQA X4, X6; \
VPUNPCKLQDQ_X13_X13_X15; \
VMOVDQA X5, X4; \
VMOVDQA X6, X5; \
VPUNPCKHQDQ_X15_X7_X6; \
VPUNPCKLQDQ_X7_X7_X15; \
VPUNPCKHQDQ_X15_X13_X7; \
VPUNPCKLQDQ_X3_X3_X15; \
VPUNPCKHQDQ_X15_X2_X2; \
VPUNPCKLQDQ_X14_X14_X15; \
VPUNPCKHQDQ_X15_X3_X3; \
#define SHUFFLE_AVX_INV() \
VMOVDQA X2, X13; \
VMOVDQA X4, X14; \
VPUNPCKLQDQ_X2_X2_X15; \
VMOVDQA X5, X4; \
VPUNPCKHQDQ_X15_X3_X2; \
VMOVDQA X14, X5; \
VPUNPCKLQDQ_X3_X3_X15; \
VMOVDQA X6, X14; \
VPUNPCKHQDQ_X15_X13_X3; \
VPUNPCKLQDQ_X7_X7_X15; \
VPUNPCKHQDQ_X15_X6_X6; \
VPUNPCKLQDQ_X14_X14_X15; \
VPUNPCKHQDQ_X15_X7_X7; \
#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
VPADDQ m0, v0, v0; \
VPADDQ v2, v0, v0; \
VPADDQ m1, v1, v1; \
VPADDQ v3, v1, v1; \
VPXOR v0, v6, v6; \
VPXOR v1, v7, v7; \
VPSHUFD $-79, v6, v6; \
VPSHUFD $-79, v7, v7; \
VPADDQ v6, v4, v4; \
VPADDQ v7, v5, v5; \
VPXOR v4, v2, v2; \
VPXOR v5, v3, v3; \
VPSHUFB c40, v2, v2; \
VPSHUFB c40, v3, v3; \
VPADDQ m2, v0, v0; \
VPADDQ v2, v0, v0; \
VPADDQ m3, v1, v1; \
VPADDQ v3, v1, v1; \
VPXOR v0, v6, v6; \
VPXOR v1, v7, v7; \
VPSHUFB c48, v6, v6; \
VPSHUFB c48, v7, v7; \
VPADDQ v6, v4, v4; \
VPADDQ v7, v5, v5; \
VPXOR v4, v2, v2; \
VPXOR v5, v3, v3; \
VPADDQ v2, v2, t0; \
VPSRLQ $63, v2, v2; \
VPXOR t0, v2, v2; \
VPADDQ v3, v3, t0; \
VPSRLQ $63, v3, v3; \
VPXOR t0, v3, v3
// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
VMOVQ_SI_X12(i0*8); \
VMOVQ_SI_X13(i2*8); \
VMOVQ_SI_X14(i4*8); \
VMOVQ_SI_X15(i6*8); \
VPINSRQ_1_SI_X12(i1*8); \
VPINSRQ_1_SI_X13(i3*8); \
VPINSRQ_1_SI_X14(i5*8); \
VPINSRQ_1_SI_X15(i7*8)
// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
VMOVQ_SI_X12_0; \
VMOVQ_SI_X13(4*8); \
VMOVQ_SI_X14(1*8); \
VMOVQ_SI_X15(5*8); \
VPINSRQ_1_SI_X12(2*8); \
VPINSRQ_1_SI_X13(6*8); \
VPINSRQ_1_SI_X14(3*8); \
VPINSRQ_1_SI_X15(7*8)
// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
VPSHUFD $0x4E, 0*8(SI), X12; \
VMOVQ_SI_X13(11*8); \
VMOVQ_SI_X14(12*8); \
VMOVQ_SI_X15(7*8); \
VPINSRQ_1_SI_X13(5*8); \
VPINSRQ_1_SI_X14(2*8); \
VPINSRQ_1_SI_X15(3*8)
// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
VMOVDQU 11*8(SI), X12; \
VMOVQ_SI_X13(5*8); \
VMOVQ_SI_X14(8*8); \
VMOVQ_SI_X15(2*8); \
VPINSRQ_1_SI_X13(15*8); \
VPINSRQ_1_SI_X14_0; \
VPINSRQ_1_SI_X15(13*8)
// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
VMOVQ_SI_X12(2*8); \
VMOVQ_SI_X13(4*8); \
VMOVQ_SI_X14(6*8); \
VMOVQ_SI_X15_0; \
VPINSRQ_1_SI_X12(5*8); \
VPINSRQ_1_SI_X13(15*8); \
VPINSRQ_1_SI_X14(10*8); \
VPINSRQ_1_SI_X15(8*8)
// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
VMOVQ_SI_X12(9*8); \
VMOVQ_SI_X13(2*8); \
VMOVQ_SI_X14_0; \
VMOVQ_SI_X15(4*8); \
VPINSRQ_1_SI_X12(5*8); \
VPINSRQ_1_SI_X13(10*8); \
VPINSRQ_1_SI_X14(7*8); \
VPINSRQ_1_SI_X15(15*8)
// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
VMOVQ_SI_X12(2*8); \
VMOVQ_SI_X13_0; \
VMOVQ_SI_X14(12*8); \
VMOVQ_SI_X15(11*8); \
VPINSRQ_1_SI_X12(6*8); \
VPINSRQ_1_SI_X13(8*8); \
VPINSRQ_1_SI_X14(10*8); \
VPINSRQ_1_SI_X15(3*8)
// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
MOVQ 0*8(SI), X12; \
VPSHUFD $0x4E, 8*8(SI), X13; \
MOVQ 7*8(SI), X14; \
MOVQ 2*8(SI), X15; \
VPINSRQ_1_SI_X12(6*8); \
VPINSRQ_1_SI_X14(3*8); \
VPINSRQ_1_SI_X15(11*8)
// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
MOVQ 6*8(SI), X12; \
MOVQ 11*8(SI), X13; \
MOVQ 15*8(SI), X14; \
MOVQ 3*8(SI), X15; \
VPINSRQ_1_SI_X12(14*8); \
VPINSRQ_1_SI_X13_0; \
VPINSRQ_1_SI_X14(9*8); \
VPINSRQ_1_SI_X15(8*8)
// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
MOVQ 5*8(SI), X12; \
MOVQ 8*8(SI), X13; \
MOVQ 0*8(SI), X14; \
MOVQ 6*8(SI), X15; \
VPINSRQ_1_SI_X12(15*8); \
VPINSRQ_1_SI_X13(2*8); \
VPINSRQ_1_SI_X14(4*8); \
VPINSRQ_1_SI_X15(10*8)
// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
VMOVDQU 12*8(SI), X12; \
MOVQ 1*8(SI), X13; \
MOVQ 2*8(SI), X14; \
VPINSRQ_1_SI_X13(10*8); \
VPINSRQ_1_SI_X14(7*8); \
VMOVDQU 4*8(SI), X15
// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
MOVQ 15*8(SI), X12; \
MOVQ 3*8(SI), X13; \
MOVQ 11*8(SI), X14; \
MOVQ 12*8(SI), X15; \
VPINSRQ_1_SI_X12(9*8); \
VPINSRQ_1_SI_X13(13*8); \
VPINSRQ_1_SI_X14(14*8); \
VPINSRQ_1_SI_X15_0
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
MOVQ SP, R9
ADDQ $15, R9
ANDQ $~15, R9
MOVQ R9, SP
VMOVDQU ·AVX_c40<>(SB), X0
VMOVDQU ·AVX_c48<>(SB), X1
VMOVDQA X0, X8
VMOVDQA X1, X9
VMOVDQU ·AVX_iv3<>(SB), X0
VMOVDQA X0, 0(SP)
XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0)
VMOVDQU 0(AX), X10
VMOVDQU 16(AX), X11
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
MOVQ 0(BX), R8
MOVQ 8(BX), R9
loop:
ADDQ $128, R8
CMPQ R8, $128
JGE noinc
INCQ R9
noinc:
VMOVQ_R8_X15
VPINSRQ_1_R9_X15
VMOVDQA X10, X0
VMOVDQA X11, X1
VMOVDQU ·AVX_iv0<>(SB), X4
VMOVDQU ·AVX_iv1<>(SB), X5
VMOVDQU ·AVX_iv2<>(SB), X6
VPXOR X15, X6, X6
VMOVDQA 0(SP), X7
LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
VMOVDQA X12, 16(SP)
VMOVDQA X13, 32(SP)
VMOVDQA X14, 48(SP)
VMOVDQA X15, 64(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
VMOVDQA X12, 80(SP)
VMOVDQA X13, 96(SP)
VMOVDQA X14, 112(SP)
VMOVDQA X15, 128(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
VMOVDQA X12, 144(SP)
VMOVDQA X13, 160(SP)
VMOVDQA X14, 176(SP)
VMOVDQA X15, 192(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
VMOVDQA X12, 208(SP)
VMOVDQA X13, 224(SP)
VMOVDQA X14, 240(SP)
VMOVDQA X15, 256(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X15, X8, X9)
SHUFFLE_AVX()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X15, X8, X9)
SHUFFLE_AVX_INV()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X15, X8, X9)
SHUFFLE_AVX()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X15, X8, X9)
SHUFFLE_AVX_INV()
VMOVDQU 32(AX), X14
VMOVDQU 48(AX), X15
VPXOR X0, X10, X10
VPXOR X1, X11, X11
VPXOR X2, X14, X14
VPXOR X3, X15, X15
VPXOR X4, X10, X10
VPXOR X5, X11, X11
VPXOR X6, X14, X2
VPXOR X7, X15, X3
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
VMOVDQU X10, 0(AX)
VMOVDQU X11, 16(AX)
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
VZEROUPPER
MOVQ BP, SP
RET
// func supportsAVX2() bool
TEXT ·supportsAVX2(SB), 4, $0-1
MOVQ runtime·support_avx2(SB), AX
MOVB AX, ret+0(FP)
RET
// func supportsAVX() bool
TEXT ·supportsAVX(SB), 4, $0-1
MOVQ runtime·support_avx(SB), AX
MOVB AX, ret+0(FP)
RET

25
vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go generated vendored Normal file
View file

@ -0,0 +1,25 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.7,amd64,!gccgo,!appengine
package blake2b
func init() {
useSSE4 = supportsSSE4()
}
//go:noescape
func supportsSSE4() bool
//go:noescape
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
if useSSE4 {
hashBlocksSSE4(h, c, flag, blocks)
} else {
hashBlocksGeneric(h, c, flag, blocks)
}
}

290
vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
#include "textflag.h"
DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
MOVO v4, t1; \
MOVO v5, v4; \
MOVO t1, v5; \
MOVO v6, t1; \
PUNPCKLQDQ v6, t2; \
PUNPCKHQDQ v7, v6; \
PUNPCKHQDQ t2, v6; \
PUNPCKLQDQ v7, t2; \
MOVO t1, v7; \
MOVO v2, t1; \
PUNPCKHQDQ t2, v7; \
PUNPCKLQDQ v3, t2; \
PUNPCKHQDQ t2, v2; \
PUNPCKLQDQ t1, t2; \
PUNPCKHQDQ t2, v3
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
MOVO v4, t1; \
MOVO v5, v4; \
MOVO t1, v5; \
MOVO v2, t1; \
PUNPCKLQDQ v2, t2; \
PUNPCKHQDQ v3, v2; \
PUNPCKHQDQ t2, v2; \
PUNPCKLQDQ v3, t2; \
MOVO t1, v3; \
MOVO v6, t1; \
PUNPCKHQDQ t2, v3; \
PUNPCKLQDQ v7, t2; \
PUNPCKHQDQ t2, v6; \
PUNPCKLQDQ t1, t2; \
PUNPCKHQDQ t2, v7
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
PADDQ m0, v0; \
PADDQ m1, v1; \
PADDQ v2, v0; \
PADDQ v3, v1; \
PXOR v0, v6; \
PXOR v1, v7; \
PSHUFD $0xB1, v6, v6; \
PSHUFD $0xB1, v7, v7; \
PADDQ v6, v4; \
PADDQ v7, v5; \
PXOR v4, v2; \
PXOR v5, v3; \
PSHUFB c40, v2; \
PSHUFB c40, v3; \
PADDQ m2, v0; \
PADDQ m3, v1; \
PADDQ v2, v0; \
PADDQ v3, v1; \
PXOR v0, v6; \
PXOR v1, v7; \
PSHUFB c48, v6; \
PSHUFB c48, v7; \
PADDQ v6, v4; \
PADDQ v7, v5; \
PXOR v4, v2; \
PXOR v5, v3; \
MOVOU v2, t0; \
PADDQ v2, t0; \
PSRLQ $63, v2; \
PXOR t0, v2; \
MOVOU v3, t0; \
PADDQ v3, t0; \
PSRLQ $63, v3; \
PXOR t0, v3
#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
MOVQ i0*8(src), m0; \
PINSRQ $1, i1*8(src), m0; \
MOVQ i2*8(src), m1; \
PINSRQ $1, i3*8(src), m1; \
MOVQ i4*8(src), m2; \
PINSRQ $1, i5*8(src), m2; \
MOVQ i6*8(src), m3; \
PINSRQ $1, i7*8(src), m3
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
MOVQ SP, R9
ADDQ $15, R9
ANDQ $~15, R9
MOVQ R9, SP
MOVOU ·iv3<>(SB), X0
MOVO X0, 0(SP)
XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0)
MOVOU ·c40<>(SB), X13
MOVOU ·c48<>(SB), X14
MOVOU 0(AX), X12
MOVOU 16(AX), X15
MOVQ 0(BX), R8
MOVQ 8(BX), R9
loop:
ADDQ $128, R8
CMPQ R8, $128
JGE noinc
INCQ R9
noinc:
MOVQ R8, X8
PINSRQ $1, R9, X8
MOVO X12, X0
MOVO X15, X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU ·iv0<>(SB), X4
MOVOU ·iv1<>(SB), X5
MOVOU ·iv2<>(SB), X6
PXOR X8, X6
MOVO 0(SP), X7
LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
MOVO X8, 16(SP)
MOVO X9, 32(SP)
MOVO X10, 48(SP)
MOVO X11, 64(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
MOVO X8, 80(SP)
MOVO X9, 96(SP)
MOVO X10, 112(SP)
MOVO X11, 128(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
MOVO X8, 144(SP)
MOVO X9, 160(SP)
MOVO X10, 176(SP)
MOVO X11, 192(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
MOVO X8, 208(SP)
MOVO X9, 224(SP)
MOVO X10, 240(SP)
MOVO X11, 256(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
MOVOU 32(AX), X10
MOVOU 48(AX), X11
PXOR X0, X12
PXOR X1, X15
PXOR X2, X10
PXOR X3, X11
PXOR X4, X12
PXOR X5, X15
PXOR X6, X10
PXOR X7, X11
MOVOU X10, 32(AX)
MOVOU X11, 48(AX)
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
MOVOU X12, 0(AX)
MOVOU X15, 16(AX)
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
MOVQ BP, SP
RET
// func supportsSSE4() bool
TEXT ·supportsSSE4(SB), 4, $0-1
MOVL $1, AX
CPUID
SHRL $19, CX // Bit 19 indicates SSE4 support
ANDL $1, CX // CX != 0 if support SSE4
MOVB CX, ret+0(FP)
RET

179
vendor/golang.org/x/crypto/blake2b/blake2b_generic.go generated vendored Normal file
View file

@ -0,0 +1,179 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blake2b
import "encoding/binary"
// the precomputed values for BLAKE2b
// there are 12 16-byte arrays - one for each round
// the entries are calculated from the sigma constants.
var precomputed = [12][16]byte{
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
}
func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
var m [16]uint64
c0, c1 := c[0], c[1]
for i := 0; i < len(blocks); {
c0 += BlockSize
if c0 < BlockSize {
c1++
}
v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]
v12 ^= c0
v13 ^= c1
v14 ^= flag
for j := range m {
m[j] = binary.LittleEndian.Uint64(blocks[i:])
i += 8
}
for j := range precomputed {
s := &(precomputed[j])
v0 += m[s[0]]
v0 += v4
v12 ^= v0
v12 = v12<<(64-32) | v12>>32
v8 += v12
v4 ^= v8
v4 = v4<<(64-24) | v4>>24
v1 += m[s[1]]
v1 += v5
v13 ^= v1
v13 = v13<<(64-32) | v13>>32
v9 += v13
v5 ^= v9
v5 = v5<<(64-24) | v5>>24
v2 += m[s[2]]
v2 += v6
v14 ^= v2
v14 = v14<<(64-32) | v14>>32
v10 += v14
v6 ^= v10
v6 = v6<<(64-24) | v6>>24
v3 += m[s[3]]
v3 += v7
v15 ^= v3
v15 = v15<<(64-32) | v15>>32
v11 += v15
v7 ^= v11
v7 = v7<<(64-24) | v7>>24
v0 += m[s[4]]
v0 += v4
v12 ^= v0
v12 = v12<<(64-16) | v12>>16
v8 += v12
v4 ^= v8
v4 = v4<<(64-63) | v4>>63
v1 += m[s[5]]
v1 += v5
v13 ^= v1
v13 = v13<<(64-16) | v13>>16
v9 += v13
v5 ^= v9
v5 = v5<<(64-63) | v5>>63
v2 += m[s[6]]
v2 += v6
v14 ^= v2
v14 = v14<<(64-16) | v14>>16
v10 += v14
v6 ^= v10
v6 = v6<<(64-63) | v6>>63
v3 += m[s[7]]
v3 += v7
v15 ^= v3
v15 = v15<<(64-16) | v15>>16
v11 += v15
v7 ^= v11
v7 = v7<<(64-63) | v7>>63
v0 += m[s[8]]
v0 += v5
v15 ^= v0
v15 = v15<<(64-32) | v15>>32
v10 += v15
v5 ^= v10
v5 = v5<<(64-24) | v5>>24
v1 += m[s[9]]
v1 += v6
v12 ^= v1
v12 = v12<<(64-32) | v12>>32
v11 += v12
v6 ^= v11
v6 = v6<<(64-24) | v6>>24
v2 += m[s[10]]
v2 += v7
v13 ^= v2
v13 = v13<<(64-32) | v13>>32
v8 += v13
v7 ^= v8
v7 = v7<<(64-24) | v7>>24
v3 += m[s[11]]
v3 += v4
v14 ^= v3
v14 = v14<<(64-32) | v14>>32
v9 += v14
v4 ^= v9
v4 = v4<<(64-24) | v4>>24
v0 += m[s[12]]
v0 += v5
v15 ^= v0
v15 = v15<<(64-16) | v15>>16
v10 += v15
v5 ^= v10
v5 = v5<<(64-63) | v5>>63
v1 += m[s[13]]
v1 += v6
v12 ^= v1
v12 = v12<<(64-16) | v12>>16
v11 += v12
v6 ^= v11
v6 = v6<<(64-63) | v6>>63
v2 += m[s[14]]
v2 += v7
v13 ^= v2
v13 = v13<<(64-16) | v13>>16
v8 += v13
v7 ^= v8
v7 = v7<<(64-63) | v7>>63
v3 += m[s[15]]
v3 += v4
v14 ^= v3
v14 = v14<<(64-16) | v14>>16
v9 += v14
v4 ^= v9
v4 = v4<<(64-63) | v4>>63
}
h[0] ^= v0 ^ v8
h[1] ^= v1 ^ v9
h[2] ^= v2 ^ v10
h[3] ^= v3 ^ v11
h[4] ^= v4 ^ v12
h[5] ^= v5 ^ v13
h[6] ^= v6 ^ v14
h[7] ^= v7 ^ v15
}
c[0], c[1] = c0, c1
}

177
vendor/golang.org/x/crypto/blake2b/blake2x.go generated vendored Normal file
View file

@ -0,0 +1,177 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blake2b
import (
"encoding/binary"
"errors"
"io"
)
// XOF defines the interface to hash functions that
// support arbitrary-length output.
type XOF interface {
// Write absorbs more data into the hash's state. It panics if called
// after Read.
io.Writer
// Read reads more output from the hash. It returns io.EOF if the limit
// has been reached.
io.Reader
// Clone returns a copy of the XOF in its current state.
Clone() XOF
// Reset resets the XOF to its initial state.
Reset()
}
// OutputLengthUnknown can be used as the size argument to NewXOF to indicate
// the the length of the output is not known in advance.
const OutputLengthUnknown = 0
// magicUnknownOutputLength is a magic value for the output size that indicates
// an unknown number of output bytes.
const magicUnknownOutputLength = (1 << 32) - 1
// maxOutputLength is the absolute maximum number of bytes to produce when the
// number of output bytes is unknown.
const maxOutputLength = (1 << 32) * 64
// NewXOF creates a new variable-output-length hash. The hash either produce a
// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes
// (size == OutputLengthUnknown). In the latter case, an absolute limit of
// 256GiB applies.
//
// A non-nil key turns the hash into a MAC. The key must between
// zero and 32 bytes long.
func NewXOF(size uint32, key []byte) (XOF, error) {
if len(key) > Size {
return nil, errKeySize
}
if size == magicUnknownOutputLength {
// 2^32-1 indicates an unknown number of bytes and thus isn't a
// valid length.
return nil, errors.New("blake2b: XOF length too large")
}
if size == OutputLengthUnknown {
size = magicUnknownOutputLength
}
x := &xof{
d: digest{
size: Size,
keyLen: len(key),
},
length: size,
}
copy(x.d.key[:], key)
x.Reset()
return x, nil
}
type xof struct {
d digest
length uint32
remaining uint64
cfg, root, block [Size]byte
offset int
nodeOffset uint32
readMode bool
}
func (x *xof) Write(p []byte) (n int, err error) {
if x.readMode {
panic("blake2b: write to XOF after read")
}
return x.d.Write(p)
}
func (x *xof) Clone() XOF {
clone := *x
return &clone
}
func (x *xof) Reset() {
x.cfg[0] = byte(Size)
binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length
binary.LittleEndian.PutUint32(x.cfg[12:], x.length) // XOF length
x.cfg[17] = byte(Size) // inner hash size
x.d.Reset()
x.d.h[1] ^= uint64(x.length) << 32
x.remaining = uint64(x.length)
if x.remaining == magicUnknownOutputLength {
x.remaining = maxOutputLength
}
x.offset, x.nodeOffset = 0, 0
x.readMode = false
}
func (x *xof) Read(p []byte) (n int, err error) {
if !x.readMode {
x.d.finalize(&x.root)
x.readMode = true
}
if x.remaining == 0 {
return 0, io.EOF
}
n = len(p)
if uint64(n) > x.remaining {
n = int(x.remaining)
p = p[:n]
}
if x.offset > 0 {
blockRemaining := Size - x.offset
if n < blockRemaining {
x.offset += copy(p, x.block[x.offset:])
x.remaining -= uint64(n)
return
}
copy(p, x.block[x.offset:])
p = p[blockRemaining:]
x.offset = 0
x.remaining -= uint64(blockRemaining)
}
for len(p) >= Size {
binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
x.nodeOffset++
x.d.initConfig(&x.cfg)
x.d.Write(x.root[:])
x.d.finalize(&x.block)
copy(p, x.block[:])
p = p[Size:]
x.remaining -= uint64(Size)
}
if todo := len(p); todo > 0 {
if x.remaining < uint64(Size) {
x.cfg[0] = byte(x.remaining)
}
binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
x.nodeOffset++
x.d.initConfig(&x.cfg)
x.d.Write(x.root[:])
x.d.finalize(&x.block)
x.offset = copy(p, x.block[:todo])
x.remaining -= uint64(todo)
}
return
}
func (d *digest) initConfig(cfg *[Size]byte) {
d.offset, d.c[0], d.c[1] = 0, 0, 0
for i := range d.h {
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:])
}
}

32
vendor/golang.org/x/crypto/blake2b/register.go generated vendored Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.9
package blake2b
import (
"crypto"
"hash"
)
func init() {
newHash256 := func() hash.Hash {
h, _ := New256(nil)
return h
}
newHash384 := func() hash.Hash {
h, _ := New384(nil)
return h
}
newHash512 := func() hash.Hash {
h, _ := New512(nil)
return h
}
crypto.RegisterHash(crypto.BLAKE2b_256, newHash256)
crypto.RegisterHash(crypto.BLAKE2b_384, newHash384)
crypto.RegisterHash(crypto.BLAKE2b_512, newHash512)
}

6
vendor/vendor.json vendored
View file

@ -1290,6 +1290,12 @@
"path": "golang.org/x/crypto/ssh/terminal",
"revision": "eb71ad9bd329b5ac0fd0148dd99bd62e8be8e035",
"revisionTime": "2017-08-07T10:11:13Z"
},
{
"checksumSHA1": "pE9lQ5mMiW10+m6CS9XQDhSACNU=",
"path": "golang.org/x/crypto/blake2b",
"revision": "eb71ad9bd329b5ac0fd0148dd99bd62e8be8e035",
"revisionTime": "2017-08-07T10:11:13Z"
},
{
"checksumSHA1": "9jjO5GjLa0XF/nfWihF02RoH4qc=",