feat: replace md5 to highwayhash
This commit is contained in:
		
							parent
							
								
									e048b87222
								
							
						
					
					
						commit
						c87d2dba9c
					
				|  | @ -620,6 +620,14 @@ | |||
|   revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c" | ||||
|   version = "v1.0.1" | ||||
| 
 | ||||
| [[projects]] | ||||
|   digest = "1:56eaee71300a91f7a2f096b5d1d1d5389ebe8e69c068ec7d84c20459f599ddde" | ||||
|   name = "github.com/minio/HighwayHash" | ||||
|   packages = ["."] | ||||
|   pruneopts = "NUT" | ||||
|   revision = "02ca4b43caa3297fbb615700d8800acc7933be98" | ||||
|   version = "v1.0.0" | ||||
| 
 | ||||
| [[projects]] | ||||
|   digest = "1:a4df73029d2c42fabcb6b41e327d2f87e685284ec03edf76921c267d9cfc9c23" | ||||
|   name = "github.com/mitchellh/go-homedir" | ||||
|  | @ -983,9 +991,10 @@ | |||
| 
 | ||||
| [[projects]] | ||||
|   branch = "master" | ||||
|   digest = "1:eeb413d109f4b2813de0b5b23645d7a503db926cae8f10dfdcf248d15499314f" | ||||
|   digest = "1:2d5f7cd5c2bc42a4d5b18f711d482f14689a30212bbe0e398e151b3e2147cb86" | ||||
|   name = "golang.org/x/sys" | ||||
|   packages = [ | ||||
|     "cpu", | ||||
|     "unix", | ||||
|     "windows", | ||||
|     "windows/registry", | ||||
|  | @ -1376,6 +1385,7 @@ | |||
|     "github.com/google/go-containerregistry/pkg/v1/tarball", | ||||
|     "github.com/google/go-github/github", | ||||
|     "github.com/karrick/godirwalk", | ||||
|     "github.com/minio/HighwayHash", | ||||
|     "github.com/moby/buildkit/frontend/dockerfile/instructions", | ||||
|     "github.com/moby/buildkit/frontend/dockerfile/parser", | ||||
|     "github.com/moby/buildkit/frontend/dockerfile/shell", | ||||
|  |  | |||
|  | @ -46,3 +46,7 @@ required = [ | |||
| [[constraint]] | ||||
|   name = "gopkg.in/src-d/go-git.v4" | ||||
|   version = "4.6.0" | ||||
| 
 | ||||
| [[constraint]] | ||||
|   name = "github.com/minio/HighwayHash" | ||||
|   version = "1.0.0" | ||||
|  |  | |||
|  | @ -23,8 +23,10 @@ import ( | |||
| 	"io" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 	"sync" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	highwayhash "github.com/minio/HighwayHash" | ||||
| 	"github.com/pkg/errors" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| ) | ||||
|  | @ -44,8 +46,14 @@ func ConfigureLogging(logLevel string) error { | |||
| 
 | ||||
| // Hasher returns a hash function, used in snapshotting to determine if a file has changed
 | ||||
| func Hasher() func(string) (string, error) { | ||||
| 	pool := sync.Pool{ | ||||
| 		New: func() interface{} { | ||||
| 			return make([]byte, highwayhash.Size * 10 * 1024) | ||||
| 		}, | ||||
| 	} | ||||
| 	key := make([]byte, highwayhash.Size) | ||||
| 	hasher := func(p string) (string, error) { | ||||
| 		h := md5.New() | ||||
| 		h, _ := highwayhash.New(key) | ||||
| 		fi, err := os.Lstat(p) | ||||
| 		if err != nil { | ||||
| 			return "", err | ||||
|  | @ -63,7 +71,9 @@ func Hasher() func(string) (string, error) { | |||
| 				return "", err | ||||
| 			} | ||||
| 			defer f.Close() | ||||
| 			if _, err := io.Copy(h, f); err != nil { | ||||
| 			buf := pool.Get().([]byte) | ||||
| 			defer pool.Put(buf) | ||||
| 			if _, err := io.CopyBuffer(h, f, buf); err != nil { | ||||
| 				return "", err | ||||
| 			} | ||||
| 		} | ||||
|  |  | |||
|  | @ -0,0 +1,21 @@ | |||
| MIT License | ||||
| 
 | ||||
| Copyright (c) 2017 Minio Inc. | ||||
| 
 | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
| 
 | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
| 
 | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
|  | @ -0,0 +1,225 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| // Package highwayhash implements the pseudo-random-function (PRF) HighwayHash.
 | ||||
| // HighwayHash is a fast hash function designed to defend hash-flooding attacks
 | ||||
| // or to authenticate short-lived messages.
 | ||||
| //
 | ||||
| // HighwayHash is not a general purpose cryptographic hash function and does not
 | ||||
| // provide (strong) collision resistance.
 | ||||
| package highwayhash | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"hash" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	// Size is the size of HighwayHash-256 checksum in bytes.
 | ||||
| 	Size = 32 | ||||
| 	// Size128 is the size of HighwayHash-128 checksum in bytes.
 | ||||
| 	Size128 = 16 | ||||
| 	// Size64 is the size of HighwayHash-64 checksum in bytes.
 | ||||
| 	Size64 = 8 | ||||
| ) | ||||
| 
 | ||||
| var errKeySize = errors.New("highwayhash: invalid key size") | ||||
| 
 | ||||
| // New returns a hash.Hash computing the HighwayHash-256 checksum.
 | ||||
| // It returns a non-nil error if the key is not 32 bytes long.
 | ||||
| func New(key []byte) (hash.Hash, error) { | ||||
| 	if len(key) != Size { | ||||
| 		return nil, errKeySize | ||||
| 	} | ||||
| 	h := &digest{size: Size} | ||||
| 	copy(h.key[:], key) | ||||
| 	h.Reset() | ||||
| 	return h, nil | ||||
| } | ||||
| 
 | ||||
| // New128 returns a hash.Hash computing the HighwayHash-128 checksum.
 | ||||
| // It returns a non-nil error if the key is not 32 bytes long.
 | ||||
| func New128(key []byte) (hash.Hash, error) { | ||||
| 	if len(key) != Size { | ||||
| 		return nil, errKeySize | ||||
| 	} | ||||
| 	h := &digest{size: Size128} | ||||
| 	copy(h.key[:], key) | ||||
| 	h.Reset() | ||||
| 	return h, nil | ||||
| } | ||||
| 
 | ||||
| // New64 returns a hash.Hash computing the HighwayHash-64 checksum.
 | ||||
| // It returns a non-nil error if the key is not 32 bytes long.
 | ||||
| func New64(key []byte) (hash.Hash64, error) { | ||||
| 	if len(key) != Size { | ||||
| 		return nil, errKeySize | ||||
| 	} | ||||
| 	h := new(digest64) | ||||
| 	h.size = Size64 | ||||
| 	copy(h.key[:], key) | ||||
| 	h.Reset() | ||||
| 	return h, nil | ||||
| } | ||||
| 
 | ||||
| // Sum computes the HighwayHash-256 checksum of data.
 | ||||
| // It panics if the key is not 32 bytes long.
 | ||||
| func Sum(data, key []byte) [Size]byte { | ||||
| 	if len(key) != Size { | ||||
| 		panic(errKeySize) | ||||
| 	} | ||||
| 	var state [16]uint64 | ||||
| 	initialize(&state, key) | ||||
| 	if n := len(data) & (^(Size - 1)); n > 0 { | ||||
| 		update(&state, data[:n]) | ||||
| 		data = data[n:] | ||||
| 	} | ||||
| 	if len(data) > 0 { | ||||
| 		var block [Size]byte | ||||
| 		offset := copy(block[:], data) | ||||
| 		hashBuffer(&state, &block, offset) | ||||
| 	} | ||||
| 	var hash [Size]byte | ||||
| 	finalize(hash[:], &state) | ||||
| 	return hash | ||||
| } | ||||
| 
 | ||||
| // Sum128 computes the HighwayHash-128 checksum of data.
 | ||||
| // It panics if the key is not 32 bytes long.
 | ||||
| func Sum128(data, key []byte) [Size128]byte { | ||||
| 	if len(key) != Size { | ||||
| 		panic(errKeySize) | ||||
| 	} | ||||
| 	var state [16]uint64 | ||||
| 	initialize(&state, key) | ||||
| 	if n := len(data) & (^(Size - 1)); n > 0 { | ||||
| 		update(&state, data[:n]) | ||||
| 		data = data[n:] | ||||
| 	} | ||||
| 	if len(data) > 0 { | ||||
| 		var block [Size]byte | ||||
| 		offset := copy(block[:], data) | ||||
| 		hashBuffer(&state, &block, offset) | ||||
| 	} | ||||
| 	var hash [Size128]byte | ||||
| 	finalize(hash[:], &state) | ||||
| 	return hash | ||||
| } | ||||
| 
 | ||||
| // Sum64 computes the HighwayHash-64 checksum of data.
 | ||||
| // It panics if the key is not 32 bytes long.
 | ||||
| func Sum64(data, key []byte) uint64 { | ||||
| 	if len(key) != Size { | ||||
| 		panic(errKeySize) | ||||
| 	} | ||||
| 	var state [16]uint64 | ||||
| 	initialize(&state, key) | ||||
| 	if n := len(data) & (^(Size - 1)); n > 0 { | ||||
| 		update(&state, data[:n]) | ||||
| 		data = data[n:] | ||||
| 	} | ||||
| 	if len(data) > 0 { | ||||
| 		var block [Size]byte | ||||
| 		offset := copy(block[:], data) | ||||
| 		hashBuffer(&state, &block, offset) | ||||
| 	} | ||||
| 	var hash [Size64]byte | ||||
| 	finalize(hash[:], &state) | ||||
| 	return binary.LittleEndian.Uint64(hash[:]) | ||||
| } | ||||
| 
 | ||||
| type digest64 struct{ digest } | ||||
| 
 | ||||
| func (d *digest64) Sum64() uint64 { | ||||
| 	state := d.state | ||||
| 	if d.offset > 0 { | ||||
| 		hashBuffer(&state, &d.buffer, d.offset) | ||||
| 	} | ||||
| 	var hash [8]byte | ||||
| 	finalize(hash[:], &state) | ||||
| 	return binary.LittleEndian.Uint64(hash[:]) | ||||
| } | ||||
| 
 | ||||
| type digest struct { | ||||
| 	state [16]uint64 // v0 | v1 | mul0 | mul1
 | ||||
| 
 | ||||
| 	key, buffer [Size]byte | ||||
| 	offset      int | ||||
| 
 | ||||
| 	size int | ||||
| } | ||||
| 
 | ||||
| func (d *digest) Size() int { return d.size } | ||||
| 
 | ||||
| func (d *digest) BlockSize() int { return Size } | ||||
| 
 | ||||
| func (d *digest) Reset() { | ||||
| 	initialize(&d.state, d.key[:]) | ||||
| 	d.offset = 0 | ||||
| } | ||||
| 
 | ||||
| func (d *digest) Write(p []byte) (n int, err error) { | ||||
| 	n = len(p) | ||||
| 	if d.offset > 0 { | ||||
| 		remaining := Size - d.offset | ||||
| 		if n < remaining { | ||||
| 			d.offset += copy(d.buffer[d.offset:], p) | ||||
| 			return | ||||
| 		} | ||||
| 		copy(d.buffer[d.offset:], p[:remaining]) | ||||
| 		update(&d.state, d.buffer[:]) | ||||
| 		p = p[remaining:] | ||||
| 		d.offset = 0 | ||||
| 	} | ||||
| 	if nn := len(p) & (^(Size - 1)); nn > 0 { | ||||
| 		update(&d.state, p[:nn]) | ||||
| 		p = p[nn:] | ||||
| 	} | ||||
| 	if len(p) > 0 { | ||||
| 		d.offset = copy(d.buffer[d.offset:], p) | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| func (d *digest) Sum(b []byte) []byte { | ||||
| 	state := d.state | ||||
| 	if d.offset > 0 { | ||||
| 		hashBuffer(&state, &d.buffer, d.offset) | ||||
| 	} | ||||
| 	var hash [Size]byte | ||||
| 	finalize(hash[:d.size], &state) | ||||
| 	return append(b, hash[:d.size]...) | ||||
| } | ||||
| 
 | ||||
| func hashBuffer(state *[16]uint64, buffer *[32]byte, offset int) { | ||||
| 	var block [Size]byte | ||||
| 	mod32 := (uint64(offset) << 32) + uint64(offset) | ||||
| 	for i := range state[:4] { | ||||
| 		state[i] += mod32 | ||||
| 	} | ||||
| 	for i := range state[4:8] { | ||||
| 		t0 := uint32(state[i+4]) | ||||
| 		t0 = (t0 << uint(offset)) | (t0 >> uint(32-offset)) | ||||
| 
 | ||||
| 		t1 := uint32(state[i+4] >> 32) | ||||
| 		t1 = (t1 << uint(offset)) | (t1 >> uint(32-offset)) | ||||
| 
 | ||||
| 		state[i+4] = (uint64(t1) << 32) | uint64(t0) | ||||
| 	} | ||||
| 
 | ||||
| 	mod4 := offset & 3 | ||||
| 	remain := offset - mod4 | ||||
| 
 | ||||
| 	copy(block[:], buffer[:remain]) | ||||
| 	if offset >= 16 { | ||||
| 		copy(block[28:], buffer[offset-4:]) | ||||
| 	} else if mod4 != 0 { | ||||
| 		last := uint32(buffer[remain]) | ||||
| 		last += uint32(buffer[remain+mod4>>1]) << 8 | ||||
| 		last += uint32(buffer[offset-1]) << 16 | ||||
| 		binary.LittleEndian.PutUint32(block[16:], last) | ||||
| 	} | ||||
| 	update(state, block[:]) | ||||
| } | ||||
|  | @ -0,0 +1,68 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build go1.8
 | ||||
| // +build amd64 !gccgo !appengine !nacl
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| import "golang.org/x/sys/cpu" | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE4 = cpu.X86.HasSSE41 | ||||
| 	useAVX2 = cpu.X86.HasAVX2 | ||||
| 	useNEON = false | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func initializeSSE4(state *[16]uint64, key []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func initializeAVX2(state *[16]uint64, key []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateSSE4(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateAVX2(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func finalizeSSE4(out []byte, state *[16]uint64) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func finalizeAVX2(out []byte, state *[16]uint64) | ||||
| 
 | ||||
| func initialize(state *[16]uint64, key []byte) { | ||||
| 	switch { | ||||
| 	case useAVX2: | ||||
| 		initializeAVX2(state, key) | ||||
| 	case useSSE4: | ||||
| 		initializeSSE4(state, key) | ||||
| 	default: | ||||
| 		initializeGeneric(state, key) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func update(state *[16]uint64, msg []byte) { | ||||
| 	switch { | ||||
| 	case useAVX2: | ||||
| 		updateAVX2(state, msg) | ||||
| 	case useSSE4: | ||||
| 		updateSSE4(state, msg) | ||||
| 	default: | ||||
| 		updateGeneric(state, msg) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func finalize(out []byte, state *[16]uint64) { | ||||
| 	switch { | ||||
| 	case useAVX2: | ||||
| 		finalizeAVX2(out, state) | ||||
| 	case useSSE4: | ||||
| 		finalizeSSE4(out, state) | ||||
| 	default: | ||||
| 		finalizeGeneric(out, state) | ||||
| 	} | ||||
| } | ||||
|  | @ -0,0 +1,249 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build go1.8 | ||||
| // +build amd64 !gccgo !appengine !nacl | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| DATA ·consAVX2<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f | ||||
| DATA ·consAVX2<>+0x08(SB)/8, $0xa4093822299f31d0 | ||||
| DATA ·consAVX2<>+0x10(SB)/8, $0x13198a2e03707344 | ||||
| DATA ·consAVX2<>+0x18(SB)/8, $0x243f6a8885a308d3 | ||||
| DATA ·consAVX2<>+0x20(SB)/8, $0x3bd39e10cb0ef593 | ||||
| DATA ·consAVX2<>+0x28(SB)/8, $0xc0acf169b5f18a8c | ||||
| DATA ·consAVX2<>+0x30(SB)/8, $0xbe5466cf34e90c6c | ||||
| DATA ·consAVX2<>+0x38(SB)/8, $0x452821e638d01377 | ||||
| GLOBL ·consAVX2<>(SB), (NOPTR+RODATA), $64 | ||||
| 
 | ||||
| DATA ·zipperMergeAVX2<>+0x00(SB)/8, $0xf010e05020c03 | ||||
| DATA ·zipperMergeAVX2<>+0x08(SB)/8, $0x70806090d0a040b | ||||
| DATA ·zipperMergeAVX2<>+0x10(SB)/8, $0xf010e05020c03 | ||||
| DATA ·zipperMergeAVX2<>+0x18(SB)/8, $0x70806090d0a040b | ||||
| GLOBL ·zipperMergeAVX2<>(SB), (NOPTR+RODATA), $32 | ||||
| 
 | ||||
| #define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \ | ||||
| 	MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \ | ||||
| 	ANDQ tmp0, x3                  \ | ||||
| 	MOVQ x2, y0                    \ | ||||
| 	MOVQ x3, y1                    \ | ||||
| 	                               \ | ||||
| 	MOVQ x2, tmp0                  \ | ||||
| 	MOVQ x3, tmp1                  \ | ||||
| 	SHLQ $1, tmp1                  \ | ||||
| 	SHRQ $63, tmp0                 \ | ||||
| 	MOVQ tmp1, x3                  \ | ||||
| 	ORQ  tmp0, x3                  \ | ||||
| 	                               \ | ||||
| 	SHLQ $1, x2                    \ | ||||
| 	                               \ | ||||
| 	MOVQ y0, tmp0                  \ | ||||
| 	MOVQ y1, tmp1                  \ | ||||
| 	SHLQ $2, tmp1                  \ | ||||
| 	SHRQ $62, tmp0                 \ | ||||
| 	MOVQ tmp1, y1                  \ | ||||
| 	ORQ  tmp0, y1                  \ | ||||
| 	                               \ | ||||
| 	SHLQ $2, y0                    \ | ||||
| 	                               \ | ||||
| 	XORQ x0, y0                    \ | ||||
| 	XORQ x2, y0                    \ | ||||
| 	XORQ x1, y1                    \ | ||||
| 	XORQ x3, y1 | ||||
| 
 | ||||
| #define UPDATE(msg) \ | ||||
| 	VPADDQ  msg, Y2, Y2                               \ | ||||
| 	VPADDQ  Y3, Y2, Y2                                \ | ||||
| 	                                                  \ | ||||
| 	VPSRLQ  $32, Y1, Y0                               \ | ||||
| 	BYTE    $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC2 \ // VPMULUDQ Y2, Y0, Y0
 | ||||
| 	VPXOR   Y0, Y3, Y3                                \ | ||||
| 	                                                  \ | ||||
| 	VPADDQ  Y4, Y1, Y1                                \ | ||||
| 	                                                  \ | ||||
| 	VPSRLQ  $32, Y2, Y0                               \ | ||||
| 	BYTE    $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC1 \ // VPMULUDQ Y1, Y0, Y0
 | ||||
| 	VPXOR   Y0, Y4, Y4                                \ | ||||
| 	                                                  \ | ||||
| 	VPSHUFB Y5, Y2, Y0                                \ | ||||
| 	VPADDQ  Y0, Y1, Y1                                \ | ||||
| 	                                                  \ | ||||
| 	VPSHUFB Y5, Y1, Y0                                \ | ||||
| 	VPADDQ  Y0, Y2, Y2 | ||||
| 
 | ||||
| // func initializeAVX2(state *[16]uint64, key []byte) | ||||
| TEXT ·initializeAVX2(SB), 4, $0-32 | ||||
| 	MOVQ state+0(FP), AX | ||||
| 	MOVQ key_base+8(FP), BX | ||||
| 	MOVQ $·consAVX2<>(SB), CX | ||||
| 
 | ||||
| 	VMOVDQU 0(BX), Y1 | ||||
| 	VPSHUFD $177, Y1, Y2 | ||||
| 
 | ||||
| 	VMOVDQU 0(CX), Y3 | ||||
| 	VMOVDQU 32(CX), Y4 | ||||
| 
 | ||||
| 	VPXOR Y3, Y1, Y1 | ||||
| 	VPXOR Y4, Y2, Y2 | ||||
| 
 | ||||
| 	VMOVDQU Y1, 0(AX) | ||||
| 	VMOVDQU Y2, 32(AX) | ||||
| 	VMOVDQU Y3, 64(AX) | ||||
| 	VMOVDQU Y4, 96(AX) | ||||
| 	VZEROUPPER | ||||
| 	RET | ||||
| 
 | ||||
| // func updateAVX2(state *[16]uint64, msg []byte) | ||||
| TEXT ·updateAVX2(SB), 4, $0-32 | ||||
| 	MOVQ state+0(FP), AX | ||||
| 	MOVQ msg_base+8(FP), BX | ||||
| 	MOVQ msg_len+16(FP), CX | ||||
| 
 | ||||
| 	CMPQ CX, $32 | ||||
| 	JB   DONE | ||||
| 
 | ||||
| 	VMOVDQU 0(AX), Y1 | ||||
| 	VMOVDQU 32(AX), Y2 | ||||
| 	VMOVDQU 64(AX), Y3 | ||||
| 	VMOVDQU 96(AX), Y4 | ||||
| 
 | ||||
| 	VMOVDQU ·zipperMergeAVX2<>(SB), Y5 | ||||
| 
 | ||||
| LOOP: | ||||
| 	VMOVDQU 0(BX), Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	ADDQ $32, BX | ||||
| 	SUBQ $32, CX | ||||
| 	JA   LOOP | ||||
| 
 | ||||
| 	VMOVDQU Y1, 0(AX) | ||||
| 	VMOVDQU Y2, 32(AX) | ||||
| 	VMOVDQU Y3, 64(AX) | ||||
| 	VMOVDQU Y4, 96(AX) | ||||
| 	VZEROUPPER | ||||
| 
 | ||||
| DONE: | ||||
| 	RET | ||||
| 
 | ||||
| // func finalizeAVX2(out []byte, state *[16]uint64) | ||||
| TEXT ·finalizeAVX2(SB), 4, $0-32 | ||||
| 	MOVQ state+24(FP), AX | ||||
| 	MOVQ out_base+0(FP), BX | ||||
| 	MOVQ out_len+8(FP), CX | ||||
| 
 | ||||
| 	VMOVDQU 0(AX), Y1 | ||||
| 	VMOVDQU 32(AX), Y2 | ||||
| 	VMOVDQU 64(AX), Y3 | ||||
| 	VMOVDQU 96(AX), Y4 | ||||
| 
 | ||||
| 	VMOVDQU ·zipperMergeAVX2<>(SB), Y5 | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	CMPQ CX, $8 | ||||
| 	JE   skipUpdate // Just 4 rounds for 64-bit checksum | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	CMPQ CX, $16 | ||||
| 	JE   skipUpdate // 6 rounds for 128-bit checksum | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| 	VPERM2I128 $1, Y1, Y1, Y0 | ||||
| 	VPSHUFD    $177, Y0, Y0 | ||||
| 	UPDATE(Y0) | ||||
| 
 | ||||
| skipUpdate: | ||||
| 	VMOVDQU Y1, 0(AX) | ||||
| 	VMOVDQU Y2, 32(AX) | ||||
| 	VMOVDQU Y3, 64(AX) | ||||
| 	VMOVDQU Y4, 96(AX) | ||||
| 	VZEROUPPER | ||||
| 
 | ||||
| 	CMPQ CX, $8 | ||||
| 	JE   hash64 | ||||
| 	CMPQ CX, $16 | ||||
| 	JE   hash128 | ||||
| 
 | ||||
| 	// 256-bit checksum | ||||
| 	MOVQ 0*8(AX), R8 | ||||
| 	MOVQ 1*8(AX), R9 | ||||
| 	MOVQ 4*8(AX), R10 | ||||
| 	MOVQ 5*8(AX), R11 | ||||
| 	ADDQ 8*8(AX), R8 | ||||
| 	ADDQ 9*8(AX), R9 | ||||
| 	ADDQ 12*8(AX), R10 | ||||
| 	ADDQ 13*8(AX), R11 | ||||
| 
 | ||||
| 	REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) | ||||
| 	MOVQ R14, 0(BX) | ||||
| 	MOVQ R15, 8(BX) | ||||
| 
 | ||||
| 	MOVQ 2*8(AX), R8 | ||||
| 	MOVQ 3*8(AX), R9 | ||||
| 	MOVQ 6*8(AX), R10 | ||||
| 	MOVQ 7*8(AX), R11 | ||||
| 	ADDQ 10*8(AX), R8 | ||||
| 	ADDQ 11*8(AX), R9 | ||||
| 	ADDQ 14*8(AX), R10 | ||||
| 	ADDQ 15*8(AX), R11 | ||||
| 
 | ||||
| 	REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) | ||||
| 	MOVQ R14, 16(BX) | ||||
| 	MOVQ R15, 24(BX) | ||||
| 	RET | ||||
| 
 | ||||
| hash128: | ||||
| 	MOVQ 0*8(AX), R8 | ||||
| 	MOVQ 1*8(AX), R9 | ||||
| 	ADDQ 6*8(AX), R8 | ||||
| 	ADDQ 7*8(AX), R9 | ||||
| 	ADDQ 8*8(AX), R8 | ||||
| 	ADDQ 9*8(AX), R9 | ||||
| 	ADDQ 14*8(AX), R8 | ||||
| 	ADDQ 15*8(AX), R9 | ||||
| 	MOVQ R8, 0(BX) | ||||
| 	MOVQ R9, 8(BX) | ||||
| 	RET | ||||
| 
 | ||||
| hash64: | ||||
| 	MOVQ 0*8(AX), DX | ||||
| 	ADDQ 4*8(AX), DX | ||||
| 	ADDQ 8*8(AX), DX | ||||
| 	ADDQ 12*8(AX), DX | ||||
| 	MOVQ DX, 0(BX) | ||||
| 	RET | ||||
| 
 | ||||
|  | @ -0,0 +1,50 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build !go1.8
 | ||||
| // +build amd64 !gccgo !appengine !nacl
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| import "golang.org/x/sys/cpu" | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE4 = cpu.X86.HasSSE41 | ||||
| 	useAVX2 = false | ||||
| 	useNEON = false | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func initializeSSE4(state *[16]uint64, key []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateSSE4(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func finalizeSSE4(out []byte, state *[16]uint64) | ||||
| 
 | ||||
| func initialize(state *[16]uint64, key []byte) { | ||||
| 	if useSSE4 { | ||||
| 		initializeSSE4(state, key) | ||||
| 	} else { | ||||
| 		initializeGeneric(state, key) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func update(state *[16]uint64, msg []byte) { | ||||
| 	if useSSE4 { | ||||
| 		updateSSE4(state, msg) | ||||
| 	} else { | ||||
| 		updateGeneric(state, msg) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func finalize(out []byte, state *[16]uint64) { | ||||
| 	if useSSE4 { | ||||
| 		finalizeSSE4(out, state) | ||||
| 	} else { | ||||
| 		finalizeGeneric(out, state) | ||||
| 	} | ||||
| } | ||||
|  | @ -0,0 +1,294 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build amd64 !gccgo !appengine !nacl | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| DATA ·cons<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f | ||||
| DATA ·cons<>+0x08(SB)/8, $0xa4093822299f31d0 | ||||
| DATA ·cons<>+0x10(SB)/8, $0x13198a2e03707344 | ||||
| DATA ·cons<>+0x18(SB)/8, $0x243f6a8885a308d3 | ||||
| DATA ·cons<>+0x20(SB)/8, $0x3bd39e10cb0ef593 | ||||
| DATA ·cons<>+0x28(SB)/8, $0xc0acf169b5f18a8c | ||||
| DATA ·cons<>+0x30(SB)/8, $0xbe5466cf34e90c6c | ||||
| DATA ·cons<>+0x38(SB)/8, $0x452821e638d01377 | ||||
| GLOBL ·cons<>(SB), (NOPTR+RODATA), $64 | ||||
| 
 | ||||
| DATA ·zipperMerge<>+0x00(SB)/8, $0xf010e05020c03 | ||||
| DATA ·zipperMerge<>+0x08(SB)/8, $0x70806090d0a040b | ||||
| GLOBL ·zipperMerge<>(SB), (NOPTR+RODATA), $16 | ||||
| 
 | ||||
| #define v00 X0 | ||||
| #define v01 X1 | ||||
| #define v10 X2 | ||||
| #define v11 X3 | ||||
| #define m00 X4 | ||||
| #define m01 X5 | ||||
| #define m10 X6 | ||||
| #define m11 X7 | ||||
| 
 | ||||
| #define t0 X8 | ||||
| #define t1 X9 | ||||
| #define t2 X10 | ||||
| 
 | ||||
| #define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \ | ||||
| 	MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \ | ||||
| 	ANDQ tmp0, x3                  \ | ||||
| 	MOVQ x2, y0                    \ | ||||
| 	MOVQ x3, y1                    \ | ||||
| 	                               \ | ||||
| 	MOVQ x2, tmp0                  \ | ||||
| 	MOVQ x3, tmp1                  \ | ||||
| 	SHLQ $1, tmp1                  \ | ||||
| 	SHRQ $63, tmp0                 \ | ||||
| 	MOVQ tmp1, x3                  \ | ||||
| 	ORQ  tmp0, x3                  \ | ||||
| 	                               \ | ||||
| 	SHLQ $1, x2                    \ | ||||
| 	                               \ | ||||
| 	MOVQ y0, tmp0                  \ | ||||
| 	MOVQ y1, tmp1                  \ | ||||
| 	SHLQ $2, tmp1                  \ | ||||
| 	SHRQ $62, tmp0                 \ | ||||
| 	MOVQ tmp1, y1                  \ | ||||
| 	ORQ  tmp0, y1                  \ | ||||
| 	                               \ | ||||
| 	SHLQ $2, y0                    \ | ||||
| 	                               \ | ||||
| 	XORQ x0, y0                    \ | ||||
| 	XORQ x2, y0                    \ | ||||
| 	XORQ x1, y1                    \ | ||||
| 	XORQ x3, y1 | ||||
| 
 | ||||
| #define UPDATE(msg0, msg1) \ | ||||
| 	PADDQ   msg0, v10 \ | ||||
| 	PADDQ   m00, v10  \ | ||||
| 	PADDQ   msg1, v11 \ | ||||
| 	PADDQ   m01, v11  \ | ||||
| 	                  \ | ||||
| 	MOVO    v00, t0   \ | ||||
| 	MOVO    v01, t1   \ | ||||
| 	PSRLQ   $32, t0   \ | ||||
| 	PSRLQ   $32, t1   \ | ||||
| 	PMULULQ v10, t0   \ | ||||
| 	PMULULQ v11, t1   \ | ||||
| 	PXOR    t0, m00   \ | ||||
| 	PXOR    t1, m01   \ | ||||
| 	                  \ | ||||
| 	PADDQ   m10, v00  \ | ||||
| 	PADDQ   m11, v01  \ | ||||
| 	                  \ | ||||
| 	MOVO    v10, t0   \ | ||||
| 	MOVO    v11, t1   \ | ||||
| 	PSRLQ   $32, t0   \ | ||||
| 	PSRLQ   $32, t1   \ | ||||
| 	PMULULQ v00, t0   \ | ||||
| 	PMULULQ v01, t1   \ | ||||
| 	PXOR    t0, m10   \ | ||||
| 	PXOR    t1, m11   \ | ||||
| 	                  \ | ||||
| 	MOVO    v10, t0   \ | ||||
| 	PSHUFB  t2, t0    \ | ||||
| 	MOVO    v11, t1   \ | ||||
| 	PSHUFB  t2, t1    \ | ||||
| 	PADDQ   t0, v00   \ | ||||
| 	PADDQ   t1, v01   \ | ||||
| 	                  \ | ||||
| 	MOVO    v00, t0   \ | ||||
| 	PSHUFB  t2, t0    \ | ||||
| 	MOVO    v01, t1   \ | ||||
| 	PSHUFB  t2, t1    \ | ||||
| 	PADDQ   t0, v10   \ | ||||
| 	PADDQ   t1, v11 | ||||
| 
 | ||||
| // func initializeSSE4(state *[16]uint64, key []byte) | ||||
| TEXT ·initializeSSE4(SB), 4, $0-32 | ||||
| 	MOVQ state+0(FP), AX | ||||
| 	MOVQ key_base+8(FP), BX | ||||
| 	MOVQ $·cons<>(SB), CX | ||||
| 
 | ||||
| 	MOVOU 0(BX), v00 | ||||
| 	MOVOU 16(BX), v01 | ||||
| 
 | ||||
| 	PSHUFD $177, v00, v10 | ||||
| 	PSHUFD $177, v01, v11 | ||||
| 
 | ||||
| 	MOVOU 0(CX), m00 | ||||
| 	MOVOU 16(CX), m01 | ||||
| 	MOVOU 32(CX), m10 | ||||
| 	MOVOU 48(CX), m11 | ||||
| 
 | ||||
| 	PXOR m00, v00 | ||||
| 	PXOR m01, v01 | ||||
| 	PXOR m10, v10 | ||||
| 	PXOR m11, v11 | ||||
| 
 | ||||
| 	MOVOU v00, 0(AX) | ||||
| 	MOVOU v01, 16(AX) | ||||
| 	MOVOU v10, 32(AX) | ||||
| 	MOVOU v11, 48(AX) | ||||
| 	MOVOU m00, 64(AX) | ||||
| 	MOVOU m01, 80(AX) | ||||
| 	MOVOU m10, 96(AX) | ||||
| 	MOVOU m11, 112(AX) | ||||
| 	RET | ||||
| 
 | ||||
| // func updateSSE4(state *[16]uint64, msg []byte) | ||||
| TEXT ·updateSSE4(SB), 4, $0-32 | ||||
| 	MOVQ state+0(FP), AX | ||||
| 	MOVQ msg_base+8(FP), BX | ||||
| 	MOVQ msg_len+16(FP), CX | ||||
| 
 | ||||
| 	CMPQ CX, $32 | ||||
| 	JB   DONE | ||||
| 
 | ||||
| 	MOVOU 0(AX), v00 | ||||
| 	MOVOU 16(AX), v01 | ||||
| 	MOVOU 32(AX), v10 | ||||
| 	MOVOU 48(AX), v11 | ||||
| 	MOVOU 64(AX), m00 | ||||
| 	MOVOU 80(AX), m01 | ||||
| 	MOVOU 96(AX), m10 | ||||
| 	MOVOU 112(AX), m11 | ||||
| 
 | ||||
| 	MOVOU ·zipperMerge<>(SB), t2 | ||||
| 
 | ||||
| LOOP: | ||||
| 	MOVOU 0(BX), t0 | ||||
| 	MOVOU 16(BX), t1 | ||||
| 
 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	ADDQ $32, BX | ||||
| 	SUBQ $32, CX | ||||
| 	JA   LOOP | ||||
| 
 | ||||
| 	MOVOU v00, 0(AX) | ||||
| 	MOVOU v01, 16(AX) | ||||
| 	MOVOU v10, 32(AX) | ||||
| 	MOVOU v11, 48(AX) | ||||
| 	MOVOU m00, 64(AX) | ||||
| 	MOVOU m01, 80(AX) | ||||
| 	MOVOU m10, 96(AX) | ||||
| 	MOVOU m11, 112(AX) | ||||
| 
 | ||||
| DONE: | ||||
| 	RET | ||||
| 
 | ||||
| // func finalizeSSE4(out []byte, state *[16]uint64) | ||||
| TEXT ·finalizeSSE4(SB), 4, $0-32 | ||||
| 	MOVQ state+24(FP), AX | ||||
| 	MOVQ out_base+0(FP), BX | ||||
| 	MOVQ out_len+8(FP), CX | ||||
| 
 | ||||
| 	MOVOU 0(AX), v00 | ||||
| 	MOVOU 16(AX), v01 | ||||
| 	MOVOU 32(AX), v10 | ||||
| 	MOVOU 48(AX), v11 | ||||
| 	MOVOU 64(AX), m00 | ||||
| 	MOVOU 80(AX), m01 | ||||
| 	MOVOU 96(AX), m10 | ||||
| 	MOVOU 112(AX), m11 | ||||
| 
 | ||||
| 	MOVOU ·zipperMerge<>(SB), t2 | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	CMPQ CX, $8 | ||||
| 	JE   skipUpdate // Just 4 rounds for 64-bit checksum | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	CMPQ CX, $16 | ||||
| 	JE   skipUpdate // 6 rounds for 128-bit checksum | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| 	PSHUFD $177, v01, t0 | ||||
| 	PSHUFD $177, v00, t1 | ||||
| 	UPDATE(t0, t1) | ||||
| 
 | ||||
| skipUpdate: | ||||
| 	MOVOU v00, 0(AX) | ||||
| 	MOVOU v01, 16(AX) | ||||
| 	MOVOU v10, 32(AX) | ||||
| 	MOVOU v11, 48(AX) | ||||
| 	MOVOU m00, 64(AX) | ||||
| 	MOVOU m01, 80(AX) | ||||
| 	MOVOU m10, 96(AX) | ||||
| 	MOVOU m11, 112(AX) | ||||
| 
 | ||||
| 	CMPQ CX, $8 | ||||
| 	JE   hash64 | ||||
| 	CMPQ CX, $16 | ||||
| 	JE   hash128 | ||||
| 
 | ||||
| 	// 256-bit checksum | ||||
| 	PADDQ v00, m00 | ||||
| 	PADDQ v10, m10 | ||||
| 	PADDQ v01, m01 | ||||
| 	PADDQ v11, m11 | ||||
| 
 | ||||
| 	MOVQ   m00, R8 | ||||
| 	PEXTRQ $1, m00, R9 | ||||
| 	MOVQ   m10, R10 | ||||
| 	PEXTRQ $1, m10, R11 | ||||
| 	REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) | ||||
| 	MOVQ   R14, 0(BX) | ||||
| 	MOVQ   R15, 8(BX) | ||||
| 
 | ||||
| 	MOVQ   m01, R8 | ||||
| 	PEXTRQ $1, m01, R9 | ||||
| 	MOVQ   m11, R10 | ||||
| 	PEXTRQ $1, m11, R11 | ||||
| 	REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) | ||||
| 	MOVQ   R14, 16(BX) | ||||
| 	MOVQ   R15, 24(BX) | ||||
| 	RET | ||||
| 
 | ||||
| hash128: | ||||
| 	PADDQ v00, v11 | ||||
| 	PADDQ m00, m11 | ||||
| 	PADDQ v11, m11 | ||||
| 	MOVOU m11, 0(BX) | ||||
| 	RET | ||||
| 
 | ||||
| hash64: | ||||
| 	PADDQ v00, v10 | ||||
| 	PADDQ m00, m10 | ||||
| 	PADDQ v10, m10 | ||||
| 	MOVQ  m10, DX | ||||
| 	MOVQ  DX, 0(BX) | ||||
| 	RET | ||||
|  | @ -0,0 +1,33 @@ | |||
| //+build !noasm
 | ||||
| 
 | ||||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE4 = false | ||||
| 	useAVX2 = false | ||||
| 	useNEON = true | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateArm64(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| func initialize(state *[16]uint64, key []byte) { | ||||
| 	initializeGeneric(state, key) | ||||
| } | ||||
| 
 | ||||
| func update(state *[16]uint64, msg []byte) { | ||||
| 	if useNEON { | ||||
| 		updateArm64(state, msg) | ||||
| 	} else { | ||||
| 		updateGeneric(state, msg) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func finalize(out []byte, state *[16]uint64) { | ||||
| 	finalizeGeneric(out, state) | ||||
| } | ||||
|  | @ -0,0 +1,116 @@ | |||
| //+build !noasm !appengine | ||||
| 
 | ||||
| // | ||||
| // Minio Cloud Storage, (C) 2017 Minio, Inc. | ||||
| // | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License. | ||||
| // You may obtain a copy of the License at | ||||
| // | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, software | ||||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| // See the License for the specific language governing permissions and | ||||
| // limitations under the License. | ||||
| // | ||||
| 
 | ||||
| // Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to | ||||
| // the opcodes of their Plan9 equivalents | ||||
| 
 | ||||
| TEXT ·updateArm64(SB), 7, $0 | ||||
| 	MOVD state+0(FP), R0 | ||||
| 	MOVD msg_base+8(FP), R1 | ||||
| 	MOVD msg_len+16(FP), R2 // length of message | ||||
| 	SUBS $32, R2 | ||||
| 	BMI  complete | ||||
| 
 | ||||
| 	// Definition of registers | ||||
| 	//  v0 = v0.lo | ||||
| 	//  v1 = v0.hi | ||||
| 	//  v2 = v1.lo | ||||
| 	//  v3 = v1.hi | ||||
| 	//  v4 = mul0.lo | ||||
| 	//  v5 = mul0.hi | ||||
| 	//  v6 = mul1.lo | ||||
| 	//  v7 = mul1.hi | ||||
| 
 | ||||
| 	// Load constants table pointer | ||||
| 	MOVD $·constants(SB), R3 | ||||
| 
 | ||||
| 	// and load constants into v28, v29, and v30 | ||||
| 	WORD $0x4c40607c // ld1    {v28.16b-v30.16b}, [x3] | ||||
| 
 | ||||
| 	WORD $0x4cdf2c00 // ld1   {v0.2d-v3.2d}, [x0], #64 | ||||
| 	WORD $0x4c402c04 // ld1   {v4.2d-v7.2d}, [x0] | ||||
| 	SUBS $64, R0 | ||||
| 
 | ||||
| loop: | ||||
| 	// Main loop | ||||
| 	WORD $0x4cdfa83a // ld1   {v26.4s-v27.4s}, [x1], #32 | ||||
| 
 | ||||
| 	// Add message | ||||
| 	WORD $0x4efa8442 // add   v2.2d, v2.2d, v26.2d | ||||
| 	WORD $0x4efb8463 // add   v3.2d, v3.2d, v27.2d | ||||
| 
 | ||||
| 	// v1 += mul0 | ||||
| 	WORD $0x4ee48442 // add   v2.2d, v2.2d, v4.2d | ||||
| 	WORD $0x4ee58463 // add   v3.2d, v3.2d, v5.2d | ||||
| 
 | ||||
| 	// First pair of multiplies | ||||
| 	WORD $0x4e1d200a // tbl    v10.16b,{v0.16b,v1.16b},v29.16b | ||||
| 	WORD $0x4e1e204b // tbl    v11.16b,{v2.16b,v3.16b},v30.16b | ||||
| 	WORD $0x2eaac16c // umull  v12.2d, v11.2s, v10.2s | ||||
| 	WORD $0x6eaac16d // umull2 v13.2d, v11.4s, v10.4s | ||||
| 
 | ||||
| 	// v0 += mul1 | ||||
| 	WORD $0x4ee68400 // add   v0.2d, v0.2d, v6.2d | ||||
| 	WORD $0x4ee78421 // add   v1.2d, v1.2d, v7.2d | ||||
| 
 | ||||
| 	// Second pair of multiplies | ||||
| 	WORD $0x4e1d204f // tbl    v15.16b,{v2.16b,v3.16b},v29.16b | ||||
| 	WORD $0x4e1e200e // tbl    v14.16b,{v0.16b,v1.16b},v30.16b | ||||
| 
 | ||||
| 	// EOR multiplication result in | ||||
| 	WORD $0x6e2c1c84 // eor    v4.16b,v4.16b,v12.16b | ||||
| 	WORD $0x6e2d1ca5 // eor    v5.16b,v5.16b,v13.16b | ||||
| 
 | ||||
| 	WORD $0x2eaec1f0 // umull  v16.2d, v15.2s, v14.2s | ||||
| 	WORD $0x6eaec1f1 // umull2 v17.2d, v15.4s, v14.4s | ||||
| 
 | ||||
| 	// First pair of zipper-merges | ||||
| 	WORD $0x4e1c0052 // tbl v18.16b,{v2.16b},v28.16b | ||||
| 	WORD $0x4ef28400 // add v0.2d, v0.2d, v18.2d | ||||
| 	WORD $0x4e1c0073 // tbl v19.16b,{v3.16b},v28.16b | ||||
| 	WORD $0x4ef38421 // add v1.2d, v1.2d, v19.2d | ||||
| 
 | ||||
| 	// Second pair of zipper-merges | ||||
| 	WORD $0x4e1c0014 // tbl v20.16b,{v0.16b},v28.16b | ||||
| 	WORD $0x4ef48442 // add v2.2d, v2.2d, v20.2d | ||||
| 	WORD $0x4e1c0035 // tbl v21.16b,{v1.16b},v28.16b | ||||
| 	WORD $0x4ef58463 // add v3.2d, v3.2d, v21.2d | ||||
| 
 | ||||
| 	// EOR multiplication result in | ||||
| 	WORD $0x6e301cc6 // eor    v6.16b,v6.16b,v16.16b | ||||
| 	WORD $0x6e311ce7 // eor    v7.16b,v7.16b,v17.16b | ||||
| 
 | ||||
| 	SUBS $32, R2 | ||||
| 	BPL  loop | ||||
| 
 | ||||
| 	// Store result | ||||
| 	WORD $0x4c9f2c00 // st1    {v0.2d-v3.2d}, [x0], #64 | ||||
| 	WORD $0x4c002c04 // st1    {v4.2d-v7.2d}, [x0] | ||||
| 
 | ||||
| complete: | ||||
| 	RET | ||||
| 
 | ||||
| // Constants for TBL instructions | ||||
| DATA ·constants+0x0(SB)/8, $0x000f010e05020c03 // zipper merge constant | ||||
| DATA ·constants+0x8(SB)/8, $0x070806090d0a040b | ||||
| DATA ·constants+0x10(SB)/8, $0x0f0e0d0c07060504 // setup first register for multiply | ||||
| DATA ·constants+0x18(SB)/8, $0x1f1e1d1c17161514 | ||||
| DATA ·constants+0x20(SB)/8, $0x0b0a090803020100 // setup second register for multiply | ||||
| DATA ·constants+0x28(SB)/8, $0x1b1a191813121110 | ||||
| 
 | ||||
| GLOBL ·constants(SB), 8, $48 | ||||
|  | @ -0,0 +1,161 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	v0   = 0 | ||||
| 	v1   = 4 | ||||
| 	mul0 = 8 | ||||
| 	mul1 = 12 | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	init0 = [4]uint64{0xdbe6d5d5fe4cce2f, 0xa4093822299f31d0, 0x13198a2e03707344, 0x243f6a8885a308d3} | ||||
| 	init1 = [4]uint64{0x3bd39e10cb0ef593, 0xc0acf169b5f18a8c, 0xbe5466cf34e90c6c, 0x452821e638d01377} | ||||
| ) | ||||
| 
 | ||||
| func initializeGeneric(state *[16]uint64, k []byte) { | ||||
| 	var key [4]uint64 | ||||
| 
 | ||||
| 	key[0] = binary.LittleEndian.Uint64(k[0:]) | ||||
| 	key[1] = binary.LittleEndian.Uint64(k[8:]) | ||||
| 	key[2] = binary.LittleEndian.Uint64(k[16:]) | ||||
| 	key[3] = binary.LittleEndian.Uint64(k[24:]) | ||||
| 
 | ||||
| 	copy(state[mul0:], init0[:]) | ||||
| 	copy(state[mul1:], init1[:]) | ||||
| 
 | ||||
| 	for i, k := range key { | ||||
| 		state[v0+i] = init0[i] ^ k | ||||
| 	} | ||||
| 
 | ||||
| 	key[0] = key[0]>>32 | key[0]<<32 | ||||
| 	key[1] = key[1]>>32 | key[1]<<32 | ||||
| 	key[2] = key[2]>>32 | key[2]<<32 | ||||
| 	key[3] = key[3]>>32 | key[3]<<32 | ||||
| 
 | ||||
| 	for i, k := range key { | ||||
| 		state[v1+i] = init1[i] ^ k | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func updateGeneric(state *[16]uint64, msg []byte) { | ||||
| 	for len(msg) > 0 { | ||||
| 		// add message
 | ||||
| 		state[v1+0] += binary.LittleEndian.Uint64(msg) | ||||
| 		state[v1+1] += binary.LittleEndian.Uint64(msg[8:]) | ||||
| 		state[v1+2] += binary.LittleEndian.Uint64(msg[16:]) | ||||
| 		state[v1+3] += binary.LittleEndian.Uint64(msg[24:]) | ||||
| 
 | ||||
| 		// v1 += mul0
 | ||||
| 		state[v1+0] += state[mul0+0] | ||||
| 		state[v1+1] += state[mul0+1] | ||||
| 		state[v1+2] += state[mul0+2] | ||||
| 		state[v1+3] += state[mul0+3] | ||||
| 
 | ||||
| 		state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32) | ||||
| 		state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) | ||||
| 		state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) | ||||
| 		state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) | ||||
| 
 | ||||
| 		// v0 += mul1
 | ||||
| 		state[v0+0] += state[mul1+0] | ||||
| 		state[v0+1] += state[mul1+1] | ||||
| 		state[v0+2] += state[mul1+2] | ||||
| 		state[v0+3] += state[mul1+3] | ||||
| 
 | ||||
| 		state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32) | ||||
| 		state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32) | ||||
| 		state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32) | ||||
| 		state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32) | ||||
| 
 | ||||
| 		zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) | ||||
| 		zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) | ||||
| 
 | ||||
| 		zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) | ||||
| 		zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) | ||||
| 		msg = msg[32:] | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func finalizeGeneric(out []byte, state *[16]uint64) { | ||||
| 	var perm [4]uint64 | ||||
| 	var tmp [32]byte | ||||
| 	runs := 4 | ||||
| 	if len(out) == 16 { | ||||
| 		runs = 6 | ||||
| 	} else if len(out) == 32 { | ||||
| 		runs = 10 | ||||
| 	} | ||||
| 	for i := 0; i < runs; i++ { | ||||
| 		perm[0] = state[v0+2]>>32 | state[v0+2]<<32 | ||||
| 		perm[1] = state[v0+3]>>32 | state[v0+3]<<32 | ||||
| 		perm[2] = state[v0+0]>>32 | state[v0+0]<<32 | ||||
| 		perm[3] = state[v0+1]>>32 | state[v0+1]<<32 | ||||
| 
 | ||||
| 		binary.LittleEndian.PutUint64(tmp[0:], perm[0]) | ||||
| 		binary.LittleEndian.PutUint64(tmp[8:], perm[1]) | ||||
| 		binary.LittleEndian.PutUint64(tmp[16:], perm[2]) | ||||
| 		binary.LittleEndian.PutUint64(tmp[24:], perm[3]) | ||||
| 
 | ||||
| 		update(state, tmp[:]) | ||||
| 	} | ||||
| 
 | ||||
| 	switch len(out) { | ||||
| 	case 8: | ||||
| 		binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+0]+state[mul0+0]+state[mul1+0]) | ||||
| 	case 16: | ||||
| 		binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+2]+state[mul0+0]+state[mul1+2]) | ||||
| 		binary.LittleEndian.PutUint64(out[8:], state[v0+1]+state[v1+3]+state[mul0+1]+state[mul1+3]) | ||||
| 	case 32: | ||||
| 		h0, h1 := reduceMod(state[v0+0]+state[mul0+0], state[v0+1]+state[mul0+1], state[v1+0]+state[mul1+0], state[v1+1]+state[mul1+1]) | ||||
| 		binary.LittleEndian.PutUint64(out[0:], h0) | ||||
| 		binary.LittleEndian.PutUint64(out[8:], h1) | ||||
| 
 | ||||
| 		h0, h1 = reduceMod(state[v0+2]+state[mul0+2], state[v0+3]+state[mul0+3], state[v1+2]+state[mul1+2], state[v1+3]+state[mul1+3]) | ||||
| 		binary.LittleEndian.PutUint64(out[16:], h0) | ||||
| 		binary.LittleEndian.PutUint64(out[24:], h1) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func zipperMerge(v0, v1 uint64, d0, d1 *uint64) { | ||||
| 	m0 := v0 & (0xFF << (2 * 8)) | ||||
| 	m1 := (v1 & (0xFF << (7 * 8))) >> 8 | ||||
| 	m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16 | ||||
| 	m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24 | ||||
| 	m4 := (v0 & (0xFF << (1 * 8))) << 32 | ||||
| 	m5 := v0 << 56 | ||||
| 
 | ||||
| 	*d0 += m0 + m1 + m2 + m3 + m4 + m5 | ||||
| 
 | ||||
| 	m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8))) | ||||
| 	m1 = (v0 & (0xFF << (6 * 8))) >> 8 | ||||
| 	m2 = (v1 & (0xFF << (5 * 8))) >> 16 | ||||
| 	m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24 | ||||
| 	m4 = (v1 & 0xFF) << 48 | ||||
| 	m5 = (v1 & (0xFF << (1 * 8))) << 24 | ||||
| 
 | ||||
| 	*d1 += m3 + m2 + m5 + m1 + m4 + m0 | ||||
| } | ||||
| 
 | ||||
| // reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
 | ||||
| func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) { | ||||
| 	v3 &= 0x3FFFFFFFFFFFFFFF | ||||
| 
 | ||||
| 	r0, r1 = v2, v3 | ||||
| 
 | ||||
| 	v3 = (v3 << 1) | (v2 >> (64 - 1)) | ||||
| 	v2 <<= 1 | ||||
| 	r1 = (r1 << 2) | (r0 >> (64 - 2)) | ||||
| 	r0 <<= 2 | ||||
| 
 | ||||
| 	r0 ^= v0 ^ v2 | ||||
| 	r1 ^= v1 ^ v3 | ||||
| 	return | ||||
| } | ||||
|  | @ -0,0 +1,33 @@ | |||
| //+build !noasm
 | ||||
| 
 | ||||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE4 = false | ||||
| 	useAVX2 = false | ||||
| 	useNEON = false | ||||
| 	useVMX  = true | ||||
| ) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updatePpc64Le(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| func initialize(state *[16]uint64, key []byte) { | ||||
| 	initializeGeneric(state, key) | ||||
| } | ||||
| 
 | ||||
| func update(state *[16]uint64, msg []byte) { | ||||
| 	if useVMX { | ||||
| 		updatePpc64Le(state, msg) | ||||
| 	} else { | ||||
| 		updateGeneric(state, msg) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func finalize(out []byte, state *[16]uint64) { | ||||
| 	finalizeGeneric(out, state) | ||||
| } | ||||
|  | @ -0,0 +1,182 @@ | |||
| //+build !noasm !appengine | ||||
| 
 | ||||
| // | ||||
| // Minio Cloud Storage, (C) 2018 Minio, Inc. | ||||
| // | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License. | ||||
| // You may obtain a copy of the License at | ||||
| // | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, software | ||||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| // See the License for the specific language governing permissions and | ||||
| // limitations under the License. | ||||
| // | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // Definition of registers | ||||
| #define V0_LO    VS32 | ||||
| #define V0_LO_   V0 | ||||
| #define V0_HI    VS33 | ||||
| #define V0_HI_   V1 | ||||
| #define V1_LO    VS34 | ||||
| #define V1_LO_   V2 | ||||
| #define V1_HI    VS35 | ||||
| #define V1_HI_   V3 | ||||
| #define MUL0_LO  VS36 | ||||
| #define MUL0_LO_ V4 | ||||
| #define MUL0_HI  VS37 | ||||
| #define MUL0_HI_ V5 | ||||
| #define MUL1_LO  VS38 | ||||
| #define MUL1_LO_ V6 | ||||
| #define MUL1_HI  VS39 | ||||
| #define MUL1_HI_ V7 | ||||
| 
 | ||||
| // Message | ||||
| #define MSG_LO   VS40 | ||||
| #define MSG_LO_  V8 | ||||
| #define MSG_HI   VS41 | ||||
| 
 | ||||
| // Constants | ||||
| #define ROTATE   VS42 | ||||
| #define ROTATE_  V10 | ||||
| #define MASK     VS43 | ||||
| #define MASK_    V11 | ||||
| 
 | ||||
| // Temps | ||||
| #define TEMP1    VS44 | ||||
| #define TEMP1_   V12 | ||||
| #define TEMP2    VS45 | ||||
| #define TEMP2_   V13 | ||||
| #define TEMP3    VS46 | ||||
| #define TEMP3_   V14 | ||||
| #define TEMP4_   V15 | ||||
| #define TEMP5_   V16 | ||||
| #define TEMP6_   V17 | ||||
| #define TEMP7_   V18 | ||||
| 
 | ||||
| // Regular registers | ||||
| #define STATE     R3 | ||||
| #define MSG_BASE  R4 | ||||
| #define MSG_LEN   R5 | ||||
| #define CONSTANTS R6 | ||||
| #define P1        R7 | ||||
| #define P2        R8 | ||||
| #define P3        R9 | ||||
| #define P4        R10 | ||||
| #define P5        R11 | ||||
| #define P6        R12 | ||||
| #define P7        R14 // avoid using R13 | ||||
| 
 | ||||
| TEXT ·updatePpc64Le(SB), NOFRAME|NOSPLIT, $0-32 | ||||
| 	MOVD state+0(FP), STATE | ||||
| 	MOVD msg_base+8(FP), MSG_BASE | ||||
| 	MOVD msg_len+16(FP), MSG_LEN  // length of message | ||||
| 
 | ||||
| 	// Sanity check for length | ||||
| 	CMPU MSG_LEN, $31 | ||||
| 	BLE  complete | ||||
| 
 | ||||
| 	// Setup offsets | ||||
| 	MOVD $16, P1 | ||||
| 	MOVD $32, P2 | ||||
| 	MOVD $48, P3 | ||||
| 	MOVD $64, P4 | ||||
| 	MOVD $80, P5 | ||||
| 	MOVD $96, P6 | ||||
| 	MOVD $112, P7 | ||||
| 
 | ||||
| 	// Load state | ||||
| 	LXVD2X   (STATE)(R0), V0_LO | ||||
| 	LXVD2X   (STATE)(P1), V0_HI | ||||
| 	LXVD2X   (STATE)(P2), V1_LO | ||||
| 	LXVD2X   (STATE)(P3), V1_HI | ||||
| 	LXVD2X   (STATE)(P4), MUL0_LO | ||||
| 	LXVD2X   (STATE)(P5), MUL0_HI | ||||
| 	LXVD2X   (STATE)(P6), MUL1_LO | ||||
| 	LXVD2X   (STATE)(P7), MUL1_HI | ||||
| 	XXPERMDI V0_LO, V0_LO, $2, V0_LO | ||||
| 	XXPERMDI V0_HI, V0_HI, $2, V0_HI | ||||
| 	XXPERMDI V1_LO, V1_LO, $2, V1_LO | ||||
| 	XXPERMDI V1_HI, V1_HI, $2, V1_HI | ||||
| 	XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO | ||||
| 	XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI | ||||
| 	XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO | ||||
| 	XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI | ||||
| 
 | ||||
| 	// Load constants table pointer | ||||
| 	MOVD    $·constants(SB), CONSTANTS | ||||
| 	LXVD2X  (CONSTANTS)(R0), ROTATE | ||||
| 	LXVD2X  (CONSTANTS)(P1), MASK | ||||
| 	XXLNAND MASK, MASK, MASK | ||||
| 
 | ||||
| loop: | ||||
| 	// Main highwayhash update loop | ||||
| 	LXVD2X   (MSG_BASE)(R0), MSG_LO | ||||
| 	VADDUDM  V0_LO_, MUL1_LO_, TEMP1_ | ||||
| 	VRLD     V0_LO_, ROTATE_, TEMP2_ | ||||
| 	VADDUDM  MUL1_HI_, V0_HI_, TEMP3_ | ||||
| 	LXVD2X   (MSG_BASE)(P1), MSG_HI | ||||
| 	ADD      $32, MSG_BASE, MSG_BASE | ||||
| 	XXPERMDI MSG_LO, MSG_LO, $2, MSG_LO | ||||
| 	XXPERMDI MSG_HI, MSG_HI, $2, V0_LO | ||||
| 	VADDUDM  MSG_LO_, MUL0_LO_, MSG_LO_ | ||||
| 	VADDUDM  V0_LO_, MUL0_HI_, V0_LO_ | ||||
| 	VADDUDM  MSG_LO_, V1_LO_, V1_LO_ | ||||
| 	VSRD     V0_HI_, ROTATE_, MSG_LO_ | ||||
| 	VADDUDM  V0_LO_, V1_HI_, V1_HI_ | ||||
| 	VPERM    V1_LO_, V1_LO_, MASK_, V0_LO_ | ||||
| 	VMULOUW  V1_LO_, TEMP2_, TEMP2_ | ||||
| 	VPERM    V1_HI_, V1_HI_, MASK_, TEMP7_ | ||||
| 	VADDUDM  V0_LO_, TEMP1_, V0_LO_ | ||||
| 	VMULOUW  V1_HI_, MSG_LO_, MSG_LO_ | ||||
| 	VADDUDM  TEMP7_, TEMP3_, V0_HI_ | ||||
| 	VPERM    V0_LO_, V0_LO_, MASK_, TEMP6_ | ||||
| 	VRLD     V1_LO_, ROTATE_, TEMP4_ | ||||
| 	VSRD     V1_HI_, ROTATE_, TEMP5_ | ||||
| 	VPERM    V0_HI_, V0_HI_, MASK_, TEMP7_ | ||||
| 	XXLXOR   MUL0_LO, TEMP2, MUL0_LO | ||||
| 	VMULOUW  TEMP1_, TEMP4_, TEMP1_ | ||||
| 	VMULOUW  TEMP3_, TEMP5_, TEMP3_ | ||||
| 	XXLXOR   MUL0_HI, MSG_LO, MUL0_HI | ||||
| 	XXLXOR   MUL1_LO, TEMP1, MUL1_LO | ||||
| 	XXLXOR   MUL1_HI, TEMP3, MUL1_HI | ||||
| 	VADDUDM  TEMP6_, V1_LO_, V1_LO_ | ||||
| 	VADDUDM  TEMP7_, V1_HI_, V1_HI_ | ||||
| 
 | ||||
| 	SUB  $32, MSG_LEN, MSG_LEN | ||||
| 	CMPU MSG_LEN, $32 | ||||
| 	BGE  loop | ||||
| 
 | ||||
| 	// Save state | ||||
| 	XXPERMDI V0_LO, V0_LO, $2, V0_LO | ||||
| 	XXPERMDI V0_HI, V0_HI, $2, V0_HI | ||||
| 	XXPERMDI V1_LO, V1_LO, $2, V1_LO | ||||
| 	XXPERMDI V1_HI, V1_HI, $2, V1_HI | ||||
| 	XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO | ||||
| 	XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI | ||||
| 	XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO | ||||
| 	XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI | ||||
| 	STXVD2X  V0_LO, (STATE)(R0) | ||||
| 	STXVD2X  V0_HI, (STATE)(P1) | ||||
| 	STXVD2X  V1_LO, (STATE)(P2) | ||||
| 	STXVD2X  V1_HI, (STATE)(P3) | ||||
| 	STXVD2X  MUL0_LO, (STATE)(P4) | ||||
| 	STXVD2X  MUL0_HI, (STATE)(P5) | ||||
| 	STXVD2X  MUL1_LO, (STATE)(P6) | ||||
| 	STXVD2X  MUL1_HI, (STATE)(P7) | ||||
| 
 | ||||
| complete: | ||||
| 	RET | ||||
| 
 | ||||
| // Constants table | ||||
| DATA ·constants+0x0(SB)/8, $0x0000000000000020 | ||||
| DATA ·constants+0x8(SB)/8, $0x0000000000000020 | ||||
| DATA ·constants+0x10(SB)/8, $0x070806090d0a040b // zipper merge constant | ||||
| DATA ·constants+0x18(SB)/8, $0x000f010e05020c03 // zipper merge constant | ||||
| 
 | ||||
| GLOBL ·constants(SB), 8, $32 | ||||
|  | @ -0,0 +1,28 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build !amd64
 | ||||
| // +build !arm64
 | ||||
| // +build !ppc64le
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE4 = false | ||||
| 	useAVX2 = false | ||||
| 	useNEON = false | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
| func initialize(state *[16]uint64, k []byte) { | ||||
| 	initializeGeneric(state, k) | ||||
| } | ||||
| 
 | ||||
| func update(state *[16]uint64, msg []byte) { | ||||
| 	updateGeneric(state, msg) | ||||
| } | ||||
| 
 | ||||
| func finalize(out []byte, state *[16]uint64) { | ||||
| 	finalizeGeneric(out, state) | ||||
| } | ||||
|  | @ -0,0 +1,38 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // Package cpu implements processor feature detection for
 | ||||
| // various CPU architectures.
 | ||||
| package cpu | ||||
| 
 | ||||
| // CacheLinePad is used to pad structs to avoid false sharing.
 | ||||
| type CacheLinePad struct{ _ [cacheLineSize]byte } | ||||
| 
 | ||||
| // X86 contains the supported CPU features of the
 | ||||
| // current X86/AMD64 platform. If the current platform
 | ||||
| // is not X86/AMD64 then all feature flags are false.
 | ||||
| //
 | ||||
| // X86 is padded to avoid false sharing. Further the HasAVX
 | ||||
| // and HasAVX2 are only set if the OS supports XMM and YMM
 | ||||
| // registers in addition to the CPUID feature bit being set.
 | ||||
| var X86 struct { | ||||
| 	_            CacheLinePad | ||||
| 	HasAES       bool // AES hardware implementation (AES NI)
 | ||||
| 	HasADX       bool // Multi-precision add-carry instruction extensions
 | ||||
| 	HasAVX       bool // Advanced vector extension
 | ||||
| 	HasAVX2      bool // Advanced vector extension 2
 | ||||
| 	HasBMI1      bool // Bit manipulation instruction set 1
 | ||||
| 	HasBMI2      bool // Bit manipulation instruction set 2
 | ||||
| 	HasERMS      bool // Enhanced REP for MOVSB and STOSB
 | ||||
| 	HasFMA       bool // Fused-multiply-add instructions
 | ||||
| 	HasOSXSAVE   bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
 | ||||
| 	HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM
 | ||||
| 	HasPOPCNT    bool // Hamming weight instruction POPCNT.
 | ||||
| 	HasSSE2      bool // Streaming SIMD extension 2 (always available on amd64)
 | ||||
| 	HasSSE3      bool // Streaming SIMD extension 3
 | ||||
| 	HasSSSE3     bool // Supplemental streaming SIMD extension 3
 | ||||
| 	HasSSE41     bool // Streaming SIMD extension 4 and 4.1
 | ||||
| 	HasSSE42     bool // Streaming SIMD extension 4 and 4.2
 | ||||
| 	_            CacheLinePad | ||||
| } | ||||
|  | @ -0,0 +1,7 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 32 | ||||
|  | @ -0,0 +1,7 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 64 | ||||
|  | @ -0,0 +1,16 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build 386 amd64 amd64p32
 | ||||
| // +build !gccgo
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| // cpuid is implemented in cpu_x86.s for gc compiler
 | ||||
| // and in cpu_gccgo.c for gccgo.
 | ||||
| func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) | ||||
| 
 | ||||
| // xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
 | ||||
| // and in cpu_gccgo.c for gccgo.
 | ||||
| func xgetbv() (eax, edx uint32) | ||||
|  | @ -0,0 +1,43 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build 386 amd64 amd64p32
 | ||||
| // +build gccgo
 | ||||
| 
 | ||||
| #include <cpuid.h> | ||||
| #include <stdint.h> | ||||
| 
 | ||||
| // Need to wrap __get_cpuid_count because it's declared as static.
 | ||||
| int | ||||
| gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf, | ||||
|                    uint32_t *eax, uint32_t *ebx, | ||||
|                    uint32_t *ecx, uint32_t *edx) | ||||
| { | ||||
| 	return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); | ||||
| } | ||||
| 
 | ||||
| // xgetbv reads the contents of an XCR (Extended Control Register)
 | ||||
| // specified in the ECX register into registers EDX:EAX.
 | ||||
| // Currently, the only supported value for XCR is 0.
 | ||||
| //
 | ||||
| // TODO: Replace with a better alternative:
 | ||||
| //
 | ||||
| //     #include <xsaveintrin.h>
 | ||||
| //
 | ||||
| //     #pragma GCC target("xsave")
 | ||||
| //
 | ||||
| //     void gccgoXgetbv(uint32_t *eax, uint32_t *edx) {
 | ||||
| //       unsigned long long x = _xgetbv(0);
 | ||||
| //       *eax = x & 0xffffffff;
 | ||||
| //       *edx = (x >> 32) & 0xffffffff;
 | ||||
| //     }
 | ||||
| //
 | ||||
| // Note that _xgetbv is defined starting with GCC 8.
 | ||||
| void | ||||
| gccgoXgetbv(uint32_t *eax, uint32_t *edx) | ||||
| { | ||||
| 	__asm("  xorl %%ecx, %%ecx\n" | ||||
| 	      "  xgetbv" | ||||
| 	    : "=a"(*eax), "=d"(*edx)); | ||||
| } | ||||
|  | @ -0,0 +1,26 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build 386 amd64 amd64p32
 | ||||
| // +build gccgo
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| //extern gccgoGetCpuidCount
 | ||||
| func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32) | ||||
| 
 | ||||
| func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) { | ||||
| 	var a, b, c, d uint32 | ||||
| 	gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d) | ||||
| 	return a, b, c, d | ||||
| } | ||||
| 
 | ||||
| //extern gccgoXgetbv
 | ||||
| func gccgoXgetbv(eax, edx *uint32) | ||||
| 
 | ||||
| func xgetbv() (eax, edx uint32) { | ||||
| 	var a, d uint32 | ||||
| 	gccgoXgetbv(&a, &d) | ||||
| 	return a, d | ||||
| } | ||||
|  | @ -0,0 +1,9 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build mips64 mips64le
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 32 | ||||
|  | @ -0,0 +1,9 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build mips mipsle
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 32 | ||||
|  | @ -0,0 +1,9 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build ppc64 ppc64le
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 128 | ||||
|  | @ -0,0 +1,7 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 256 | ||||
|  | @ -0,0 +1,55 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build 386 amd64 amd64p32
 | ||||
| 
 | ||||
| package cpu | ||||
| 
 | ||||
| const cacheLineSize = 64 | ||||
| 
 | ||||
| func init() { | ||||
| 	maxID, _, _, _ := cpuid(0, 0) | ||||
| 
 | ||||
| 	if maxID < 1 { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	_, _, ecx1, edx1 := cpuid(1, 0) | ||||
| 	X86.HasSSE2 = isSet(26, edx1) | ||||
| 
 | ||||
| 	X86.HasSSE3 = isSet(0, ecx1) | ||||
| 	X86.HasPCLMULQDQ = isSet(1, ecx1) | ||||
| 	X86.HasSSSE3 = isSet(9, ecx1) | ||||
| 	X86.HasFMA = isSet(12, ecx1) | ||||
| 	X86.HasSSE41 = isSet(19, ecx1) | ||||
| 	X86.HasSSE42 = isSet(20, ecx1) | ||||
| 	X86.HasPOPCNT = isSet(23, ecx1) | ||||
| 	X86.HasAES = isSet(25, ecx1) | ||||
| 	X86.HasOSXSAVE = isSet(27, ecx1) | ||||
| 
 | ||||
| 	osSupportsAVX := false | ||||
| 	// For XGETBV, OSXSAVE bit is required and sufficient.
 | ||||
| 	if X86.HasOSXSAVE { | ||||
| 		eax, _ := xgetbv() | ||||
| 		// Check if XMM and YMM registers have OS support.
 | ||||
| 		osSupportsAVX = isSet(1, eax) && isSet(2, eax) | ||||
| 	} | ||||
| 
 | ||||
| 	X86.HasAVX = isSet(28, ecx1) && osSupportsAVX | ||||
| 
 | ||||
| 	if maxID < 7 { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	_, ebx7, _, _ := cpuid(7, 0) | ||||
| 	X86.HasBMI1 = isSet(3, ebx7) | ||||
| 	X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX | ||||
| 	X86.HasBMI2 = isSet(8, ebx7) | ||||
| 	X86.HasERMS = isSet(9, ebx7) | ||||
| 	X86.HasADX = isSet(19, ebx7) | ||||
| } | ||||
| 
 | ||||
| func isSet(bitpos uint, value uint32) bool { | ||||
| 	return value&(1<<bitpos) != 0 | ||||
| } | ||||
|  | @ -0,0 +1,27 @@ | |||
| // Copyright 2018 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build 386 amd64 amd64p32 | ||||
| // +build !gccgo | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) | ||||
| TEXT ·cpuid(SB), NOSPLIT, $0-24 | ||||
| 	MOVL eaxArg+0(FP), AX | ||||
| 	MOVL ecxArg+4(FP), CX | ||||
| 	CPUID | ||||
| 	MOVL AX, eax+8(FP) | ||||
| 	MOVL BX, ebx+12(FP) | ||||
| 	MOVL CX, ecx+16(FP) | ||||
| 	MOVL DX, edx+20(FP) | ||||
| 	RET | ||||
| 
 | ||||
| // func xgetbv() (eax, edx uint32) | ||||
| TEXT ·xgetbv(SB),NOSPLIT,$0-8 | ||||
| 	MOVL $0, CX | ||||
| 	XGETBV | ||||
| 	MOVL AX, eax+0(FP) | ||||
| 	MOVL DX, edx+4(FP) | ||||
| 	RET | ||||
		Loading…
	
		Reference in New Issue