feat: replace md5 to highwayhash
This commit is contained in:
parent
e048b87222
commit
c87d2dba9c
|
|
@ -620,6 +620,14 @@
|
|||
revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c"
|
||||
version = "v1.0.1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:56eaee71300a91f7a2f096b5d1d1d5389ebe8e69c068ec7d84c20459f599ddde"
|
||||
name = "github.com/minio/HighwayHash"
|
||||
packages = ["."]
|
||||
pruneopts = "NUT"
|
||||
revision = "02ca4b43caa3297fbb615700d8800acc7933be98"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:a4df73029d2c42fabcb6b41e327d2f87e685284ec03edf76921c267d9cfc9c23"
|
||||
name = "github.com/mitchellh/go-homedir"
|
||||
|
|
@ -983,9 +991,10 @@
|
|||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:eeb413d109f4b2813de0b5b23645d7a503db926cae8f10dfdcf248d15499314f"
|
||||
digest = "1:2d5f7cd5c2bc42a4d5b18f711d482f14689a30212bbe0e398e151b3e2147cb86"
|
||||
name = "golang.org/x/sys"
|
||||
packages = [
|
||||
"cpu",
|
||||
"unix",
|
||||
"windows",
|
||||
"windows/registry",
|
||||
|
|
@ -1376,6 +1385,7 @@
|
|||
"github.com/google/go-containerregistry/pkg/v1/tarball",
|
||||
"github.com/google/go-github/github",
|
||||
"github.com/karrick/godirwalk",
|
||||
"github.com/minio/HighwayHash",
|
||||
"github.com/moby/buildkit/frontend/dockerfile/instructions",
|
||||
"github.com/moby/buildkit/frontend/dockerfile/parser",
|
||||
"github.com/moby/buildkit/frontend/dockerfile/shell",
|
||||
|
|
|
|||
|
|
@ -46,3 +46,7 @@ required = [
|
|||
[[constraint]]
|
||||
name = "gopkg.in/src-d/go-git.v4"
|
||||
version = "4.6.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/minio/HighwayHash"
|
||||
version = "1.0.0"
|
||||
|
|
|
|||
|
|
@ -23,8 +23,10 @@ import (
|
|||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
highwayhash "github.com/minio/HighwayHash"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
|
@ -44,8 +46,14 @@ func ConfigureLogging(logLevel string) error {
|
|||
|
||||
// Hasher returns a hash function, used in snapshotting to determine if a file has changed
|
||||
func Hasher() func(string) (string, error) {
|
||||
pool := sync.Pool{
|
||||
New: func() interface{} {
|
||||
return make([]byte, highwayhash.Size * 10 * 1024)
|
||||
},
|
||||
}
|
||||
key := make([]byte, highwayhash.Size)
|
||||
hasher := func(p string) (string, error) {
|
||||
h := md5.New()
|
||||
h, _ := highwayhash.New(key)
|
||||
fi, err := os.Lstat(p)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
|
@ -63,7 +71,9 @@ func Hasher() func(string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := io.Copy(h, f); err != nil {
|
||||
buf := pool.Get().([]byte)
|
||||
defer pool.Put(buf)
|
||||
if _, err := io.CopyBuffer(h, f, buf); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2017 Minio Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package highwayhash implements the pseudo-random-function (PRF) HighwayHash.
|
||||
// HighwayHash is a fast hash function designed to defend hash-flooding attacks
|
||||
// or to authenticate short-lived messages.
|
||||
//
|
||||
// HighwayHash is not a general purpose cryptographic hash function and does not
|
||||
// provide (strong) collision resistance.
|
||||
package highwayhash
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"hash"
|
||||
)
|
||||
|
||||
const (
|
||||
// Size is the size of HighwayHash-256 checksum in bytes.
|
||||
Size = 32
|
||||
// Size128 is the size of HighwayHash-128 checksum in bytes.
|
||||
Size128 = 16
|
||||
// Size64 is the size of HighwayHash-64 checksum in bytes.
|
||||
Size64 = 8
|
||||
)
|
||||
|
||||
var errKeySize = errors.New("highwayhash: invalid key size")
|
||||
|
||||
// New returns a hash.Hash computing the HighwayHash-256 checksum.
|
||||
// It returns a non-nil error if the key is not 32 bytes long.
|
||||
func New(key []byte) (hash.Hash, error) {
|
||||
if len(key) != Size {
|
||||
return nil, errKeySize
|
||||
}
|
||||
h := &digest{size: Size}
|
||||
copy(h.key[:], key)
|
||||
h.Reset()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// New128 returns a hash.Hash computing the HighwayHash-128 checksum.
|
||||
// It returns a non-nil error if the key is not 32 bytes long.
|
||||
func New128(key []byte) (hash.Hash, error) {
|
||||
if len(key) != Size {
|
||||
return nil, errKeySize
|
||||
}
|
||||
h := &digest{size: Size128}
|
||||
copy(h.key[:], key)
|
||||
h.Reset()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// New64 returns a hash.Hash computing the HighwayHash-64 checksum.
|
||||
// It returns a non-nil error if the key is not 32 bytes long.
|
||||
func New64(key []byte) (hash.Hash64, error) {
|
||||
if len(key) != Size {
|
||||
return nil, errKeySize
|
||||
}
|
||||
h := new(digest64)
|
||||
h.size = Size64
|
||||
copy(h.key[:], key)
|
||||
h.Reset()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// Sum computes the HighwayHash-256 checksum of data.
|
||||
// It panics if the key is not 32 bytes long.
|
||||
func Sum(data, key []byte) [Size]byte {
|
||||
if len(key) != Size {
|
||||
panic(errKeySize)
|
||||
}
|
||||
var state [16]uint64
|
||||
initialize(&state, key)
|
||||
if n := len(data) & (^(Size - 1)); n > 0 {
|
||||
update(&state, data[:n])
|
||||
data = data[n:]
|
||||
}
|
||||
if len(data) > 0 {
|
||||
var block [Size]byte
|
||||
offset := copy(block[:], data)
|
||||
hashBuffer(&state, &block, offset)
|
||||
}
|
||||
var hash [Size]byte
|
||||
finalize(hash[:], &state)
|
||||
return hash
|
||||
}
|
||||
|
||||
// Sum128 computes the HighwayHash-128 checksum of data.
|
||||
// It panics if the key is not 32 bytes long.
|
||||
func Sum128(data, key []byte) [Size128]byte {
|
||||
if len(key) != Size {
|
||||
panic(errKeySize)
|
||||
}
|
||||
var state [16]uint64
|
||||
initialize(&state, key)
|
||||
if n := len(data) & (^(Size - 1)); n > 0 {
|
||||
update(&state, data[:n])
|
||||
data = data[n:]
|
||||
}
|
||||
if len(data) > 0 {
|
||||
var block [Size]byte
|
||||
offset := copy(block[:], data)
|
||||
hashBuffer(&state, &block, offset)
|
||||
}
|
||||
var hash [Size128]byte
|
||||
finalize(hash[:], &state)
|
||||
return hash
|
||||
}
|
||||
|
||||
// Sum64 computes the HighwayHash-64 checksum of data.
|
||||
// It panics if the key is not 32 bytes long.
|
||||
func Sum64(data, key []byte) uint64 {
|
||||
if len(key) != Size {
|
||||
panic(errKeySize)
|
||||
}
|
||||
var state [16]uint64
|
||||
initialize(&state, key)
|
||||
if n := len(data) & (^(Size - 1)); n > 0 {
|
||||
update(&state, data[:n])
|
||||
data = data[n:]
|
||||
}
|
||||
if len(data) > 0 {
|
||||
var block [Size]byte
|
||||
offset := copy(block[:], data)
|
||||
hashBuffer(&state, &block, offset)
|
||||
}
|
||||
var hash [Size64]byte
|
||||
finalize(hash[:], &state)
|
||||
return binary.LittleEndian.Uint64(hash[:])
|
||||
}
|
||||
|
||||
type digest64 struct{ digest }
|
||||
|
||||
func (d *digest64) Sum64() uint64 {
|
||||
state := d.state
|
||||
if d.offset > 0 {
|
||||
hashBuffer(&state, &d.buffer, d.offset)
|
||||
}
|
||||
var hash [8]byte
|
||||
finalize(hash[:], &state)
|
||||
return binary.LittleEndian.Uint64(hash[:])
|
||||
}
|
||||
|
||||
type digest struct {
|
||||
state [16]uint64 // v0 | v1 | mul0 | mul1
|
||||
|
||||
key, buffer [Size]byte
|
||||
offset int
|
||||
|
||||
size int
|
||||
}
|
||||
|
||||
func (d *digest) Size() int { return d.size }
|
||||
|
||||
func (d *digest) BlockSize() int { return Size }
|
||||
|
||||
func (d *digest) Reset() {
|
||||
initialize(&d.state, d.key[:])
|
||||
d.offset = 0
|
||||
}
|
||||
|
||||
func (d *digest) Write(p []byte) (n int, err error) {
|
||||
n = len(p)
|
||||
if d.offset > 0 {
|
||||
remaining := Size - d.offset
|
||||
if n < remaining {
|
||||
d.offset += copy(d.buffer[d.offset:], p)
|
||||
return
|
||||
}
|
||||
copy(d.buffer[d.offset:], p[:remaining])
|
||||
update(&d.state, d.buffer[:])
|
||||
p = p[remaining:]
|
||||
d.offset = 0
|
||||
}
|
||||
if nn := len(p) & (^(Size - 1)); nn > 0 {
|
||||
update(&d.state, p[:nn])
|
||||
p = p[nn:]
|
||||
}
|
||||
if len(p) > 0 {
|
||||
d.offset = copy(d.buffer[d.offset:], p)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (d *digest) Sum(b []byte) []byte {
|
||||
state := d.state
|
||||
if d.offset > 0 {
|
||||
hashBuffer(&state, &d.buffer, d.offset)
|
||||
}
|
||||
var hash [Size]byte
|
||||
finalize(hash[:d.size], &state)
|
||||
return append(b, hash[:d.size]...)
|
||||
}
|
||||
|
||||
func hashBuffer(state *[16]uint64, buffer *[32]byte, offset int) {
|
||||
var block [Size]byte
|
||||
mod32 := (uint64(offset) << 32) + uint64(offset)
|
||||
for i := range state[:4] {
|
||||
state[i] += mod32
|
||||
}
|
||||
for i := range state[4:8] {
|
||||
t0 := uint32(state[i+4])
|
||||
t0 = (t0 << uint(offset)) | (t0 >> uint(32-offset))
|
||||
|
||||
t1 := uint32(state[i+4] >> 32)
|
||||
t1 = (t1 << uint(offset)) | (t1 >> uint(32-offset))
|
||||
|
||||
state[i+4] = (uint64(t1) << 32) | uint64(t0)
|
||||
}
|
||||
|
||||
mod4 := offset & 3
|
||||
remain := offset - mod4
|
||||
|
||||
copy(block[:], buffer[:remain])
|
||||
if offset >= 16 {
|
||||
copy(block[28:], buffer[offset-4:])
|
||||
} else if mod4 != 0 {
|
||||
last := uint32(buffer[remain])
|
||||
last += uint32(buffer[remain+mod4>>1]) << 8
|
||||
last += uint32(buffer[offset-1]) << 16
|
||||
binary.LittleEndian.PutUint32(block[16:], last)
|
||||
}
|
||||
update(state, block[:])
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build go1.8
|
||||
// +build amd64 !gccgo !appengine !nacl
|
||||
|
||||
package highwayhash
|
||||
|
||||
import "golang.org/x/sys/cpu"
|
||||
|
||||
var (
|
||||
useSSE4 = cpu.X86.HasSSE41
|
||||
useAVX2 = cpu.X86.HasAVX2
|
||||
useNEON = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func initializeSSE4(state *[16]uint64, key []byte)
|
||||
|
||||
//go:noescape
|
||||
func initializeAVX2(state *[16]uint64, key []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateSSE4(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateAVX2(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func finalizeSSE4(out []byte, state *[16]uint64)
|
||||
|
||||
//go:noescape
|
||||
func finalizeAVX2(out []byte, state *[16]uint64)
|
||||
|
||||
func initialize(state *[16]uint64, key []byte) {
|
||||
switch {
|
||||
case useAVX2:
|
||||
initializeAVX2(state, key)
|
||||
case useSSE4:
|
||||
initializeSSE4(state, key)
|
||||
default:
|
||||
initializeGeneric(state, key)
|
||||
}
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
switch {
|
||||
case useAVX2:
|
||||
updateAVX2(state, msg)
|
||||
case useSSE4:
|
||||
updateSSE4(state, msg)
|
||||
default:
|
||||
updateGeneric(state, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func finalize(out []byte, state *[16]uint64) {
|
||||
switch {
|
||||
case useAVX2:
|
||||
finalizeAVX2(out, state)
|
||||
case useSSE4:
|
||||
finalizeSSE4(out, state)
|
||||
default:
|
||||
finalizeGeneric(out, state)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,249 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build go1.8
|
||||
// +build amd64 !gccgo !appengine !nacl
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·consAVX2<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f
|
||||
DATA ·consAVX2<>+0x08(SB)/8, $0xa4093822299f31d0
|
||||
DATA ·consAVX2<>+0x10(SB)/8, $0x13198a2e03707344
|
||||
DATA ·consAVX2<>+0x18(SB)/8, $0x243f6a8885a308d3
|
||||
DATA ·consAVX2<>+0x20(SB)/8, $0x3bd39e10cb0ef593
|
||||
DATA ·consAVX2<>+0x28(SB)/8, $0xc0acf169b5f18a8c
|
||||
DATA ·consAVX2<>+0x30(SB)/8, $0xbe5466cf34e90c6c
|
||||
DATA ·consAVX2<>+0x38(SB)/8, $0x452821e638d01377
|
||||
GLOBL ·consAVX2<>(SB), (NOPTR+RODATA), $64
|
||||
|
||||
DATA ·zipperMergeAVX2<>+0x00(SB)/8, $0xf010e05020c03
|
||||
DATA ·zipperMergeAVX2<>+0x08(SB)/8, $0x70806090d0a040b
|
||||
DATA ·zipperMergeAVX2<>+0x10(SB)/8, $0xf010e05020c03
|
||||
DATA ·zipperMergeAVX2<>+0x18(SB)/8, $0x70806090d0a040b
|
||||
GLOBL ·zipperMergeAVX2<>(SB), (NOPTR+RODATA), $32
|
||||
|
||||
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \
|
||||
MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \
|
||||
ANDQ tmp0, x3 \
|
||||
MOVQ x2, y0 \
|
||||
MOVQ x3, y1 \
|
||||
\
|
||||
MOVQ x2, tmp0 \
|
||||
MOVQ x3, tmp1 \
|
||||
SHLQ $1, tmp1 \
|
||||
SHRQ $63, tmp0 \
|
||||
MOVQ tmp1, x3 \
|
||||
ORQ tmp0, x3 \
|
||||
\
|
||||
SHLQ $1, x2 \
|
||||
\
|
||||
MOVQ y0, tmp0 \
|
||||
MOVQ y1, tmp1 \
|
||||
SHLQ $2, tmp1 \
|
||||
SHRQ $62, tmp0 \
|
||||
MOVQ tmp1, y1 \
|
||||
ORQ tmp0, y1 \
|
||||
\
|
||||
SHLQ $2, y0 \
|
||||
\
|
||||
XORQ x0, y0 \
|
||||
XORQ x2, y0 \
|
||||
XORQ x1, y1 \
|
||||
XORQ x3, y1
|
||||
|
||||
#define UPDATE(msg) \
|
||||
VPADDQ msg, Y2, Y2 \
|
||||
VPADDQ Y3, Y2, Y2 \
|
||||
\
|
||||
VPSRLQ $32, Y1, Y0 \
|
||||
BYTE $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC2 \ // VPMULUDQ Y2, Y0, Y0
|
||||
VPXOR Y0, Y3, Y3 \
|
||||
\
|
||||
VPADDQ Y4, Y1, Y1 \
|
||||
\
|
||||
VPSRLQ $32, Y2, Y0 \
|
||||
BYTE $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC1 \ // VPMULUDQ Y1, Y0, Y0
|
||||
VPXOR Y0, Y4, Y4 \
|
||||
\
|
||||
VPSHUFB Y5, Y2, Y0 \
|
||||
VPADDQ Y0, Y1, Y1 \
|
||||
\
|
||||
VPSHUFB Y5, Y1, Y0 \
|
||||
VPADDQ Y0, Y2, Y2
|
||||
|
||||
// func initializeAVX2(state *[16]uint64, key []byte)
|
||||
TEXT ·initializeAVX2(SB), 4, $0-32
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key_base+8(FP), BX
|
||||
MOVQ $·consAVX2<>(SB), CX
|
||||
|
||||
VMOVDQU 0(BX), Y1
|
||||
VPSHUFD $177, Y1, Y2
|
||||
|
||||
VMOVDQU 0(CX), Y3
|
||||
VMOVDQU 32(CX), Y4
|
||||
|
||||
VPXOR Y3, Y1, Y1
|
||||
VPXOR Y4, Y2, Y2
|
||||
|
||||
VMOVDQU Y1, 0(AX)
|
||||
VMOVDQU Y2, 32(AX)
|
||||
VMOVDQU Y3, 64(AX)
|
||||
VMOVDQU Y4, 96(AX)
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
// func updateAVX2(state *[16]uint64, msg []byte)
|
||||
TEXT ·updateAVX2(SB), 4, $0-32
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ msg_base+8(FP), BX
|
||||
MOVQ msg_len+16(FP), CX
|
||||
|
||||
CMPQ CX, $32
|
||||
JB DONE
|
||||
|
||||
VMOVDQU 0(AX), Y1
|
||||
VMOVDQU 32(AX), Y2
|
||||
VMOVDQU 64(AX), Y3
|
||||
VMOVDQU 96(AX), Y4
|
||||
|
||||
VMOVDQU ·zipperMergeAVX2<>(SB), Y5
|
||||
|
||||
LOOP:
|
||||
VMOVDQU 0(BX), Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
ADDQ $32, BX
|
||||
SUBQ $32, CX
|
||||
JA LOOP
|
||||
|
||||
VMOVDQU Y1, 0(AX)
|
||||
VMOVDQU Y2, 32(AX)
|
||||
VMOVDQU Y3, 64(AX)
|
||||
VMOVDQU Y4, 96(AX)
|
||||
VZEROUPPER
|
||||
|
||||
DONE:
|
||||
RET
|
||||
|
||||
// func finalizeAVX2(out []byte, state *[16]uint64)
|
||||
TEXT ·finalizeAVX2(SB), 4, $0-32
|
||||
MOVQ state+24(FP), AX
|
||||
MOVQ out_base+0(FP), BX
|
||||
MOVQ out_len+8(FP), CX
|
||||
|
||||
VMOVDQU 0(AX), Y1
|
||||
VMOVDQU 32(AX), Y2
|
||||
VMOVDQU 64(AX), Y3
|
||||
VMOVDQU 96(AX), Y4
|
||||
|
||||
VMOVDQU ·zipperMergeAVX2<>(SB), Y5
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
CMPQ CX, $8
|
||||
JE skipUpdate // Just 4 rounds for 64-bit checksum
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
CMPQ CX, $16
|
||||
JE skipUpdate // 6 rounds for 128-bit checksum
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0
|
||||
VPSHUFD $177, Y0, Y0
|
||||
UPDATE(Y0)
|
||||
|
||||
skipUpdate:
|
||||
VMOVDQU Y1, 0(AX)
|
||||
VMOVDQU Y2, 32(AX)
|
||||
VMOVDQU Y3, 64(AX)
|
||||
VMOVDQU Y4, 96(AX)
|
||||
VZEROUPPER
|
||||
|
||||
CMPQ CX, $8
|
||||
JE hash64
|
||||
CMPQ CX, $16
|
||||
JE hash128
|
||||
|
||||
// 256-bit checksum
|
||||
MOVQ 0*8(AX), R8
|
||||
MOVQ 1*8(AX), R9
|
||||
MOVQ 4*8(AX), R10
|
||||
MOVQ 5*8(AX), R11
|
||||
ADDQ 8*8(AX), R8
|
||||
ADDQ 9*8(AX), R9
|
||||
ADDQ 12*8(AX), R10
|
||||
ADDQ 13*8(AX), R11
|
||||
|
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
|
||||
MOVQ R14, 0(BX)
|
||||
MOVQ R15, 8(BX)
|
||||
|
||||
MOVQ 2*8(AX), R8
|
||||
MOVQ 3*8(AX), R9
|
||||
MOVQ 6*8(AX), R10
|
||||
MOVQ 7*8(AX), R11
|
||||
ADDQ 10*8(AX), R8
|
||||
ADDQ 11*8(AX), R9
|
||||
ADDQ 14*8(AX), R10
|
||||
ADDQ 15*8(AX), R11
|
||||
|
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
|
||||
MOVQ R14, 16(BX)
|
||||
MOVQ R15, 24(BX)
|
||||
RET
|
||||
|
||||
hash128:
|
||||
MOVQ 0*8(AX), R8
|
||||
MOVQ 1*8(AX), R9
|
||||
ADDQ 6*8(AX), R8
|
||||
ADDQ 7*8(AX), R9
|
||||
ADDQ 8*8(AX), R8
|
||||
ADDQ 9*8(AX), R9
|
||||
ADDQ 14*8(AX), R8
|
||||
ADDQ 15*8(AX), R9
|
||||
MOVQ R8, 0(BX)
|
||||
MOVQ R9, 8(BX)
|
||||
RET
|
||||
|
||||
hash64:
|
||||
MOVQ 0*8(AX), DX
|
||||
ADDQ 4*8(AX), DX
|
||||
ADDQ 8*8(AX), DX
|
||||
ADDQ 12*8(AX), DX
|
||||
MOVQ DX, 0(BX)
|
||||
RET
|
||||
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !go1.8
|
||||
// +build amd64 !gccgo !appengine !nacl
|
||||
|
||||
package highwayhash
|
||||
|
||||
import "golang.org/x/sys/cpu"
|
||||
|
||||
var (
|
||||
useSSE4 = cpu.X86.HasSSE41
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func initializeSSE4(state *[16]uint64, key []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateSSE4(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func finalizeSSE4(out []byte, state *[16]uint64)
|
||||
|
||||
func initialize(state *[16]uint64, key []byte) {
|
||||
if useSSE4 {
|
||||
initializeSSE4(state, key)
|
||||
} else {
|
||||
initializeGeneric(state, key)
|
||||
}
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
if useSSE4 {
|
||||
updateSSE4(state, msg)
|
||||
} else {
|
||||
updateGeneric(state, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func finalize(out []byte, state *[16]uint64) {
|
||||
if useSSE4 {
|
||||
finalizeSSE4(out, state)
|
||||
} else {
|
||||
finalizeGeneric(out, state)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,294 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64 !gccgo !appengine !nacl
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·cons<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f
|
||||
DATA ·cons<>+0x08(SB)/8, $0xa4093822299f31d0
|
||||
DATA ·cons<>+0x10(SB)/8, $0x13198a2e03707344
|
||||
DATA ·cons<>+0x18(SB)/8, $0x243f6a8885a308d3
|
||||
DATA ·cons<>+0x20(SB)/8, $0x3bd39e10cb0ef593
|
||||
DATA ·cons<>+0x28(SB)/8, $0xc0acf169b5f18a8c
|
||||
DATA ·cons<>+0x30(SB)/8, $0xbe5466cf34e90c6c
|
||||
DATA ·cons<>+0x38(SB)/8, $0x452821e638d01377
|
||||
GLOBL ·cons<>(SB), (NOPTR+RODATA), $64
|
||||
|
||||
DATA ·zipperMerge<>+0x00(SB)/8, $0xf010e05020c03
|
||||
DATA ·zipperMerge<>+0x08(SB)/8, $0x70806090d0a040b
|
||||
GLOBL ·zipperMerge<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
#define v00 X0
|
||||
#define v01 X1
|
||||
#define v10 X2
|
||||
#define v11 X3
|
||||
#define m00 X4
|
||||
#define m01 X5
|
||||
#define m10 X6
|
||||
#define m11 X7
|
||||
|
||||
#define t0 X8
|
||||
#define t1 X9
|
||||
#define t2 X10
|
||||
|
||||
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \
|
||||
MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \
|
||||
ANDQ tmp0, x3 \
|
||||
MOVQ x2, y0 \
|
||||
MOVQ x3, y1 \
|
||||
\
|
||||
MOVQ x2, tmp0 \
|
||||
MOVQ x3, tmp1 \
|
||||
SHLQ $1, tmp1 \
|
||||
SHRQ $63, tmp0 \
|
||||
MOVQ tmp1, x3 \
|
||||
ORQ tmp0, x3 \
|
||||
\
|
||||
SHLQ $1, x2 \
|
||||
\
|
||||
MOVQ y0, tmp0 \
|
||||
MOVQ y1, tmp1 \
|
||||
SHLQ $2, tmp1 \
|
||||
SHRQ $62, tmp0 \
|
||||
MOVQ tmp1, y1 \
|
||||
ORQ tmp0, y1 \
|
||||
\
|
||||
SHLQ $2, y0 \
|
||||
\
|
||||
XORQ x0, y0 \
|
||||
XORQ x2, y0 \
|
||||
XORQ x1, y1 \
|
||||
XORQ x3, y1
|
||||
|
||||
#define UPDATE(msg0, msg1) \
|
||||
PADDQ msg0, v10 \
|
||||
PADDQ m00, v10 \
|
||||
PADDQ msg1, v11 \
|
||||
PADDQ m01, v11 \
|
||||
\
|
||||
MOVO v00, t0 \
|
||||
MOVO v01, t1 \
|
||||
PSRLQ $32, t0 \
|
||||
PSRLQ $32, t1 \
|
||||
PMULULQ v10, t0 \
|
||||
PMULULQ v11, t1 \
|
||||
PXOR t0, m00 \
|
||||
PXOR t1, m01 \
|
||||
\
|
||||
PADDQ m10, v00 \
|
||||
PADDQ m11, v01 \
|
||||
\
|
||||
MOVO v10, t0 \
|
||||
MOVO v11, t1 \
|
||||
PSRLQ $32, t0 \
|
||||
PSRLQ $32, t1 \
|
||||
PMULULQ v00, t0 \
|
||||
PMULULQ v01, t1 \
|
||||
PXOR t0, m10 \
|
||||
PXOR t1, m11 \
|
||||
\
|
||||
MOVO v10, t0 \
|
||||
PSHUFB t2, t0 \
|
||||
MOVO v11, t1 \
|
||||
PSHUFB t2, t1 \
|
||||
PADDQ t0, v00 \
|
||||
PADDQ t1, v01 \
|
||||
\
|
||||
MOVO v00, t0 \
|
||||
PSHUFB t2, t0 \
|
||||
MOVO v01, t1 \
|
||||
PSHUFB t2, t1 \
|
||||
PADDQ t0, v10 \
|
||||
PADDQ t1, v11
|
||||
|
||||
// func initializeSSE4(state *[16]uint64, key []byte)
|
||||
TEXT ·initializeSSE4(SB), 4, $0-32
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key_base+8(FP), BX
|
||||
MOVQ $·cons<>(SB), CX
|
||||
|
||||
MOVOU 0(BX), v00
|
||||
MOVOU 16(BX), v01
|
||||
|
||||
PSHUFD $177, v00, v10
|
||||
PSHUFD $177, v01, v11
|
||||
|
||||
MOVOU 0(CX), m00
|
||||
MOVOU 16(CX), m01
|
||||
MOVOU 32(CX), m10
|
||||
MOVOU 48(CX), m11
|
||||
|
||||
PXOR m00, v00
|
||||
PXOR m01, v01
|
||||
PXOR m10, v10
|
||||
PXOR m11, v11
|
||||
|
||||
MOVOU v00, 0(AX)
|
||||
MOVOU v01, 16(AX)
|
||||
MOVOU v10, 32(AX)
|
||||
MOVOU v11, 48(AX)
|
||||
MOVOU m00, 64(AX)
|
||||
MOVOU m01, 80(AX)
|
||||
MOVOU m10, 96(AX)
|
||||
MOVOU m11, 112(AX)
|
||||
RET
|
||||
|
||||
// func updateSSE4(state *[16]uint64, msg []byte)
|
||||
TEXT ·updateSSE4(SB), 4, $0-32
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ msg_base+8(FP), BX
|
||||
MOVQ msg_len+16(FP), CX
|
||||
|
||||
CMPQ CX, $32
|
||||
JB DONE
|
||||
|
||||
MOVOU 0(AX), v00
|
||||
MOVOU 16(AX), v01
|
||||
MOVOU 32(AX), v10
|
||||
MOVOU 48(AX), v11
|
||||
MOVOU 64(AX), m00
|
||||
MOVOU 80(AX), m01
|
||||
MOVOU 96(AX), m10
|
||||
MOVOU 112(AX), m11
|
||||
|
||||
MOVOU ·zipperMerge<>(SB), t2
|
||||
|
||||
LOOP:
|
||||
MOVOU 0(BX), t0
|
||||
MOVOU 16(BX), t1
|
||||
|
||||
UPDATE(t0, t1)
|
||||
|
||||
ADDQ $32, BX
|
||||
SUBQ $32, CX
|
||||
JA LOOP
|
||||
|
||||
MOVOU v00, 0(AX)
|
||||
MOVOU v01, 16(AX)
|
||||
MOVOU v10, 32(AX)
|
||||
MOVOU v11, 48(AX)
|
||||
MOVOU m00, 64(AX)
|
||||
MOVOU m01, 80(AX)
|
||||
MOVOU m10, 96(AX)
|
||||
MOVOU m11, 112(AX)
|
||||
|
||||
DONE:
|
||||
RET
|
||||
|
||||
// func finalizeSSE4(out []byte, state *[16]uint64)
|
||||
TEXT ·finalizeSSE4(SB), 4, $0-32
|
||||
MOVQ state+24(FP), AX
|
||||
MOVQ out_base+0(FP), BX
|
||||
MOVQ out_len+8(FP), CX
|
||||
|
||||
MOVOU 0(AX), v00
|
||||
MOVOU 16(AX), v01
|
||||
MOVOU 32(AX), v10
|
||||
MOVOU 48(AX), v11
|
||||
MOVOU 64(AX), m00
|
||||
MOVOU 80(AX), m01
|
||||
MOVOU 96(AX), m10
|
||||
MOVOU 112(AX), m11
|
||||
|
||||
MOVOU ·zipperMerge<>(SB), t2
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
CMPQ CX, $8
|
||||
JE skipUpdate // Just 4 rounds for 64-bit checksum
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
CMPQ CX, $16
|
||||
JE skipUpdate // 6 rounds for 128-bit checksum
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
PSHUFD $177, v01, t0
|
||||
PSHUFD $177, v00, t1
|
||||
UPDATE(t0, t1)
|
||||
|
||||
skipUpdate:
|
||||
MOVOU v00, 0(AX)
|
||||
MOVOU v01, 16(AX)
|
||||
MOVOU v10, 32(AX)
|
||||
MOVOU v11, 48(AX)
|
||||
MOVOU m00, 64(AX)
|
||||
MOVOU m01, 80(AX)
|
||||
MOVOU m10, 96(AX)
|
||||
MOVOU m11, 112(AX)
|
||||
|
||||
CMPQ CX, $8
|
||||
JE hash64
|
||||
CMPQ CX, $16
|
||||
JE hash128
|
||||
|
||||
// 256-bit checksum
|
||||
PADDQ v00, m00
|
||||
PADDQ v10, m10
|
||||
PADDQ v01, m01
|
||||
PADDQ v11, m11
|
||||
|
||||
MOVQ m00, R8
|
||||
PEXTRQ $1, m00, R9
|
||||
MOVQ m10, R10
|
||||
PEXTRQ $1, m10, R11
|
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
|
||||
MOVQ R14, 0(BX)
|
||||
MOVQ R15, 8(BX)
|
||||
|
||||
MOVQ m01, R8
|
||||
PEXTRQ $1, m01, R9
|
||||
MOVQ m11, R10
|
||||
PEXTRQ $1, m11, R11
|
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
|
||||
MOVQ R14, 16(BX)
|
||||
MOVQ R15, 24(BX)
|
||||
RET
|
||||
|
||||
hash128:
|
||||
PADDQ v00, v11
|
||||
PADDQ m00, m11
|
||||
PADDQ v11, m11
|
||||
MOVOU m11, 0(BX)
|
||||
RET
|
||||
|
||||
hash64:
|
||||
PADDQ v00, v10
|
||||
PADDQ m00, m10
|
||||
PADDQ v10, m10
|
||||
MOVQ m10, DX
|
||||
MOVQ DX, 0(BX)
|
||||
RET
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
//+build !noasm
|
||||
|
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package highwayhash
|
||||
|
||||
var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = true
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64(state *[16]uint64, msg []byte)
|
||||
|
||||
func initialize(state *[16]uint64, key []byte) {
|
||||
initializeGeneric(state, key)
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
if useNEON {
|
||||
updateArm64(state, msg)
|
||||
} else {
|
||||
updateGeneric(state, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func finalize(out []byte, state *[16]uint64) {
|
||||
finalizeGeneric(out, state)
|
||||
}
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
//+build !noasm !appengine
|
||||
|
||||
//
|
||||
// Minio Cloud Storage, (C) 2017 Minio, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
|
||||
// the opcodes of their Plan9 equivalents
|
||||
|
||||
TEXT ·updateArm64(SB), 7, $0
|
||||
MOVD state+0(FP), R0
|
||||
MOVD msg_base+8(FP), R1
|
||||
MOVD msg_len+16(FP), R2 // length of message
|
||||
SUBS $32, R2
|
||||
BMI complete
|
||||
|
||||
// Definition of registers
|
||||
// v0 = v0.lo
|
||||
// v1 = v0.hi
|
||||
// v2 = v1.lo
|
||||
// v3 = v1.hi
|
||||
// v4 = mul0.lo
|
||||
// v5 = mul0.hi
|
||||
// v6 = mul1.lo
|
||||
// v7 = mul1.hi
|
||||
|
||||
// Load constants table pointer
|
||||
MOVD $·constants(SB), R3
|
||||
|
||||
// and load constants into v28, v29, and v30
|
||||
WORD $0x4c40607c // ld1 {v28.16b-v30.16b}, [x3]
|
||||
|
||||
WORD $0x4cdf2c00 // ld1 {v0.2d-v3.2d}, [x0], #64
|
||||
WORD $0x4c402c04 // ld1 {v4.2d-v7.2d}, [x0]
|
||||
SUBS $64, R0
|
||||
|
||||
loop:
|
||||
// Main loop
|
||||
WORD $0x4cdfa83a // ld1 {v26.4s-v27.4s}, [x1], #32
|
||||
|
||||
// Add message
|
||||
WORD $0x4efa8442 // add v2.2d, v2.2d, v26.2d
|
||||
WORD $0x4efb8463 // add v3.2d, v3.2d, v27.2d
|
||||
|
||||
// v1 += mul0
|
||||
WORD $0x4ee48442 // add v2.2d, v2.2d, v4.2d
|
||||
WORD $0x4ee58463 // add v3.2d, v3.2d, v5.2d
|
||||
|
||||
// First pair of multiplies
|
||||
WORD $0x4e1d200a // tbl v10.16b,{v0.16b,v1.16b},v29.16b
|
||||
WORD $0x4e1e204b // tbl v11.16b,{v2.16b,v3.16b},v30.16b
|
||||
WORD $0x2eaac16c // umull v12.2d, v11.2s, v10.2s
|
||||
WORD $0x6eaac16d // umull2 v13.2d, v11.4s, v10.4s
|
||||
|
||||
// v0 += mul1
|
||||
WORD $0x4ee68400 // add v0.2d, v0.2d, v6.2d
|
||||
WORD $0x4ee78421 // add v1.2d, v1.2d, v7.2d
|
||||
|
||||
// Second pair of multiplies
|
||||
WORD $0x4e1d204f // tbl v15.16b,{v2.16b,v3.16b},v29.16b
|
||||
WORD $0x4e1e200e // tbl v14.16b,{v0.16b,v1.16b},v30.16b
|
||||
|
||||
// EOR multiplication result in
|
||||
WORD $0x6e2c1c84 // eor v4.16b,v4.16b,v12.16b
|
||||
WORD $0x6e2d1ca5 // eor v5.16b,v5.16b,v13.16b
|
||||
|
||||
WORD $0x2eaec1f0 // umull v16.2d, v15.2s, v14.2s
|
||||
WORD $0x6eaec1f1 // umull2 v17.2d, v15.4s, v14.4s
|
||||
|
||||
// First pair of zipper-merges
|
||||
WORD $0x4e1c0052 // tbl v18.16b,{v2.16b},v28.16b
|
||||
WORD $0x4ef28400 // add v0.2d, v0.2d, v18.2d
|
||||
WORD $0x4e1c0073 // tbl v19.16b,{v3.16b},v28.16b
|
||||
WORD $0x4ef38421 // add v1.2d, v1.2d, v19.2d
|
||||
|
||||
// Second pair of zipper-merges
|
||||
WORD $0x4e1c0014 // tbl v20.16b,{v0.16b},v28.16b
|
||||
WORD $0x4ef48442 // add v2.2d, v2.2d, v20.2d
|
||||
WORD $0x4e1c0035 // tbl v21.16b,{v1.16b},v28.16b
|
||||
WORD $0x4ef58463 // add v3.2d, v3.2d, v21.2d
|
||||
|
||||
// EOR multiplication result in
|
||||
WORD $0x6e301cc6 // eor v6.16b,v6.16b,v16.16b
|
||||
WORD $0x6e311ce7 // eor v7.16b,v7.16b,v17.16b
|
||||
|
||||
SUBS $32, R2
|
||||
BPL loop
|
||||
|
||||
// Store result
|
||||
WORD $0x4c9f2c00 // st1 {v0.2d-v3.2d}, [x0], #64
|
||||
WORD $0x4c002c04 // st1 {v4.2d-v7.2d}, [x0]
|
||||
|
||||
complete:
|
||||
RET
|
||||
|
||||
// Constants for TBL instructions
|
||||
DATA ·constants+0x0(SB)/8, $0x000f010e05020c03 // zipper merge constant
|
||||
DATA ·constants+0x8(SB)/8, $0x070806090d0a040b
|
||||
DATA ·constants+0x10(SB)/8, $0x0f0e0d0c07060504 // setup first register for multiply
|
||||
DATA ·constants+0x18(SB)/8, $0x1f1e1d1c17161514
|
||||
DATA ·constants+0x20(SB)/8, $0x0b0a090803020100 // setup second register for multiply
|
||||
DATA ·constants+0x28(SB)/8, $0x1b1a191813121110
|
||||
|
||||
GLOBL ·constants(SB), 8, $48
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package highwayhash
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
const (
|
||||
v0 = 0
|
||||
v1 = 4
|
||||
mul0 = 8
|
||||
mul1 = 12
|
||||
)
|
||||
|
||||
var (
|
||||
init0 = [4]uint64{0xdbe6d5d5fe4cce2f, 0xa4093822299f31d0, 0x13198a2e03707344, 0x243f6a8885a308d3}
|
||||
init1 = [4]uint64{0x3bd39e10cb0ef593, 0xc0acf169b5f18a8c, 0xbe5466cf34e90c6c, 0x452821e638d01377}
|
||||
)
|
||||
|
||||
func initializeGeneric(state *[16]uint64, k []byte) {
|
||||
var key [4]uint64
|
||||
|
||||
key[0] = binary.LittleEndian.Uint64(k[0:])
|
||||
key[1] = binary.LittleEndian.Uint64(k[8:])
|
||||
key[2] = binary.LittleEndian.Uint64(k[16:])
|
||||
key[3] = binary.LittleEndian.Uint64(k[24:])
|
||||
|
||||
copy(state[mul0:], init0[:])
|
||||
copy(state[mul1:], init1[:])
|
||||
|
||||
for i, k := range key {
|
||||
state[v0+i] = init0[i] ^ k
|
||||
}
|
||||
|
||||
key[0] = key[0]>>32 | key[0]<<32
|
||||
key[1] = key[1]>>32 | key[1]<<32
|
||||
key[2] = key[2]>>32 | key[2]<<32
|
||||
key[3] = key[3]>>32 | key[3]<<32
|
||||
|
||||
for i, k := range key {
|
||||
state[v1+i] = init1[i] ^ k
|
||||
}
|
||||
}
|
||||
|
||||
func updateGeneric(state *[16]uint64, msg []byte) {
|
||||
for len(msg) > 0 {
|
||||
// add message
|
||||
state[v1+0] += binary.LittleEndian.Uint64(msg)
|
||||
state[v1+1] += binary.LittleEndian.Uint64(msg[8:])
|
||||
state[v1+2] += binary.LittleEndian.Uint64(msg[16:])
|
||||
state[v1+3] += binary.LittleEndian.Uint64(msg[24:])
|
||||
|
||||
// v1 += mul0
|
||||
state[v1+0] += state[mul0+0]
|
||||
state[v1+1] += state[mul0+1]
|
||||
state[v1+2] += state[mul0+2]
|
||||
state[v1+3] += state[mul0+3]
|
||||
|
||||
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
|
||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
||||
|
||||
// v0 += mul1
|
||||
state[v0+0] += state[mul1+0]
|
||||
state[v0+1] += state[mul1+1]
|
||||
state[v0+2] += state[mul1+2]
|
||||
state[v0+3] += state[mul1+3]
|
||||
|
||||
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
|
||||
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
|
||||
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
|
||||
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
|
||||
|
||||
zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
||||
zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
||||
|
||||
zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
||||
zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
||||
msg = msg[32:]
|
||||
}
|
||||
}
|
||||
|
||||
func finalizeGeneric(out []byte, state *[16]uint64) {
|
||||
var perm [4]uint64
|
||||
var tmp [32]byte
|
||||
runs := 4
|
||||
if len(out) == 16 {
|
||||
runs = 6
|
||||
} else if len(out) == 32 {
|
||||
runs = 10
|
||||
}
|
||||
for i := 0; i < runs; i++ {
|
||||
perm[0] = state[v0+2]>>32 | state[v0+2]<<32
|
||||
perm[1] = state[v0+3]>>32 | state[v0+3]<<32
|
||||
perm[2] = state[v0+0]>>32 | state[v0+0]<<32
|
||||
perm[3] = state[v0+1]>>32 | state[v0+1]<<32
|
||||
|
||||
binary.LittleEndian.PutUint64(tmp[0:], perm[0])
|
||||
binary.LittleEndian.PutUint64(tmp[8:], perm[1])
|
||||
binary.LittleEndian.PutUint64(tmp[16:], perm[2])
|
||||
binary.LittleEndian.PutUint64(tmp[24:], perm[3])
|
||||
|
||||
update(state, tmp[:])
|
||||
}
|
||||
|
||||
switch len(out) {
|
||||
case 8:
|
||||
binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+0]+state[mul0+0]+state[mul1+0])
|
||||
case 16:
|
||||
binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+2]+state[mul0+0]+state[mul1+2])
|
||||
binary.LittleEndian.PutUint64(out[8:], state[v0+1]+state[v1+3]+state[mul0+1]+state[mul1+3])
|
||||
case 32:
|
||||
h0, h1 := reduceMod(state[v0+0]+state[mul0+0], state[v0+1]+state[mul0+1], state[v1+0]+state[mul1+0], state[v1+1]+state[mul1+1])
|
||||
binary.LittleEndian.PutUint64(out[0:], h0)
|
||||
binary.LittleEndian.PutUint64(out[8:], h1)
|
||||
|
||||
h0, h1 = reduceMod(state[v0+2]+state[mul0+2], state[v0+3]+state[mul0+3], state[v1+2]+state[mul1+2], state[v1+3]+state[mul1+3])
|
||||
binary.LittleEndian.PutUint64(out[16:], h0)
|
||||
binary.LittleEndian.PutUint64(out[24:], h1)
|
||||
}
|
||||
}
|
||||
|
||||
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
|
||||
m0 := v0 & (0xFF << (2 * 8))
|
||||
m1 := (v1 & (0xFF << (7 * 8))) >> 8
|
||||
m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16
|
||||
m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24
|
||||
m4 := (v0 & (0xFF << (1 * 8))) << 32
|
||||
m5 := v0 << 56
|
||||
|
||||
*d0 += m0 + m1 + m2 + m3 + m4 + m5
|
||||
|
||||
m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8)))
|
||||
m1 = (v0 & (0xFF << (6 * 8))) >> 8
|
||||
m2 = (v1 & (0xFF << (5 * 8))) >> 16
|
||||
m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24
|
||||
m4 = (v1 & 0xFF) << 48
|
||||
m5 = (v1 & (0xFF << (1 * 8))) << 24
|
||||
|
||||
*d1 += m3 + m2 + m5 + m1 + m4 + m0
|
||||
}
|
||||
|
||||
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
|
||||
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {
|
||||
v3 &= 0x3FFFFFFFFFFFFFFF
|
||||
|
||||
r0, r1 = v2, v3
|
||||
|
||||
v3 = (v3 << 1) | (v2 >> (64 - 1))
|
||||
v2 <<= 1
|
||||
r1 = (r1 << 2) | (r0 >> (64 - 2))
|
||||
r0 <<= 2
|
||||
|
||||
r0 ^= v0 ^ v2
|
||||
r1 ^= v1 ^ v3
|
||||
return
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
//+build !noasm
|
||||
|
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package highwayhash
|
||||
|
||||
var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useVMX = true
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func updatePpc64Le(state *[16]uint64, msg []byte)
|
||||
|
||||
func initialize(state *[16]uint64, key []byte) {
|
||||
initializeGeneric(state, key)
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
if useVMX {
|
||||
updatePpc64Le(state, msg)
|
||||
} else {
|
||||
updateGeneric(state, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func finalize(out []byte, state *[16]uint64) {
|
||||
finalizeGeneric(out, state)
|
||||
}
|
||||
|
|
@ -0,0 +1,182 @@
|
|||
//+build !noasm !appengine
|
||||
|
||||
//
|
||||
// Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Definition of registers
|
||||
#define V0_LO VS32
|
||||
#define V0_LO_ V0
|
||||
#define V0_HI VS33
|
||||
#define V0_HI_ V1
|
||||
#define V1_LO VS34
|
||||
#define V1_LO_ V2
|
||||
#define V1_HI VS35
|
||||
#define V1_HI_ V3
|
||||
#define MUL0_LO VS36
|
||||
#define MUL0_LO_ V4
|
||||
#define MUL0_HI VS37
|
||||
#define MUL0_HI_ V5
|
||||
#define MUL1_LO VS38
|
||||
#define MUL1_LO_ V6
|
||||
#define MUL1_HI VS39
|
||||
#define MUL1_HI_ V7
|
||||
|
||||
// Message
|
||||
#define MSG_LO VS40
|
||||
#define MSG_LO_ V8
|
||||
#define MSG_HI VS41
|
||||
|
||||
// Constants
|
||||
#define ROTATE VS42
|
||||
#define ROTATE_ V10
|
||||
#define MASK VS43
|
||||
#define MASK_ V11
|
||||
|
||||
// Temps
|
||||
#define TEMP1 VS44
|
||||
#define TEMP1_ V12
|
||||
#define TEMP2 VS45
|
||||
#define TEMP2_ V13
|
||||
#define TEMP3 VS46
|
||||
#define TEMP3_ V14
|
||||
#define TEMP4_ V15
|
||||
#define TEMP5_ V16
|
||||
#define TEMP6_ V17
|
||||
#define TEMP7_ V18
|
||||
|
||||
// Regular registers
|
||||
#define STATE R3
|
||||
#define MSG_BASE R4
|
||||
#define MSG_LEN R5
|
||||
#define CONSTANTS R6
|
||||
#define P1 R7
|
||||
#define P2 R8
|
||||
#define P3 R9
|
||||
#define P4 R10
|
||||
#define P5 R11
|
||||
#define P6 R12
|
||||
#define P7 R14 // avoid using R13
|
||||
|
||||
TEXT ·updatePpc64Le(SB), NOFRAME|NOSPLIT, $0-32
|
||||
MOVD state+0(FP), STATE
|
||||
MOVD msg_base+8(FP), MSG_BASE
|
||||
MOVD msg_len+16(FP), MSG_LEN // length of message
|
||||
|
||||
// Sanity check for length
|
||||
CMPU MSG_LEN, $31
|
||||
BLE complete
|
||||
|
||||
// Setup offsets
|
||||
MOVD $16, P1
|
||||
MOVD $32, P2
|
||||
MOVD $48, P3
|
||||
MOVD $64, P4
|
||||
MOVD $80, P5
|
||||
MOVD $96, P6
|
||||
MOVD $112, P7
|
||||
|
||||
// Load state
|
||||
LXVD2X (STATE)(R0), V0_LO
|
||||
LXVD2X (STATE)(P1), V0_HI
|
||||
LXVD2X (STATE)(P2), V1_LO
|
||||
LXVD2X (STATE)(P3), V1_HI
|
||||
LXVD2X (STATE)(P4), MUL0_LO
|
||||
LXVD2X (STATE)(P5), MUL0_HI
|
||||
LXVD2X (STATE)(P6), MUL1_LO
|
||||
LXVD2X (STATE)(P7), MUL1_HI
|
||||
XXPERMDI V0_LO, V0_LO, $2, V0_LO
|
||||
XXPERMDI V0_HI, V0_HI, $2, V0_HI
|
||||
XXPERMDI V1_LO, V1_LO, $2, V1_LO
|
||||
XXPERMDI V1_HI, V1_HI, $2, V1_HI
|
||||
XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO
|
||||
XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI
|
||||
XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO
|
||||
XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI
|
||||
|
||||
// Load constants table pointer
|
||||
MOVD $·constants(SB), CONSTANTS
|
||||
LXVD2X (CONSTANTS)(R0), ROTATE
|
||||
LXVD2X (CONSTANTS)(P1), MASK
|
||||
XXLNAND MASK, MASK, MASK
|
||||
|
||||
loop:
|
||||
// Main highwayhash update loop
|
||||
LXVD2X (MSG_BASE)(R0), MSG_LO
|
||||
VADDUDM V0_LO_, MUL1_LO_, TEMP1_
|
||||
VRLD V0_LO_, ROTATE_, TEMP2_
|
||||
VADDUDM MUL1_HI_, V0_HI_, TEMP3_
|
||||
LXVD2X (MSG_BASE)(P1), MSG_HI
|
||||
ADD $32, MSG_BASE, MSG_BASE
|
||||
XXPERMDI MSG_LO, MSG_LO, $2, MSG_LO
|
||||
XXPERMDI MSG_HI, MSG_HI, $2, V0_LO
|
||||
VADDUDM MSG_LO_, MUL0_LO_, MSG_LO_
|
||||
VADDUDM V0_LO_, MUL0_HI_, V0_LO_
|
||||
VADDUDM MSG_LO_, V1_LO_, V1_LO_
|
||||
VSRD V0_HI_, ROTATE_, MSG_LO_
|
||||
VADDUDM V0_LO_, V1_HI_, V1_HI_
|
||||
VPERM V1_LO_, V1_LO_, MASK_, V0_LO_
|
||||
VMULOUW V1_LO_, TEMP2_, TEMP2_
|
||||
VPERM V1_HI_, V1_HI_, MASK_, TEMP7_
|
||||
VADDUDM V0_LO_, TEMP1_, V0_LO_
|
||||
VMULOUW V1_HI_, MSG_LO_, MSG_LO_
|
||||
VADDUDM TEMP7_, TEMP3_, V0_HI_
|
||||
VPERM V0_LO_, V0_LO_, MASK_, TEMP6_
|
||||
VRLD V1_LO_, ROTATE_, TEMP4_
|
||||
VSRD V1_HI_, ROTATE_, TEMP5_
|
||||
VPERM V0_HI_, V0_HI_, MASK_, TEMP7_
|
||||
XXLXOR MUL0_LO, TEMP2, MUL0_LO
|
||||
VMULOUW TEMP1_, TEMP4_, TEMP1_
|
||||
VMULOUW TEMP3_, TEMP5_, TEMP3_
|
||||
XXLXOR MUL0_HI, MSG_LO, MUL0_HI
|
||||
XXLXOR MUL1_LO, TEMP1, MUL1_LO
|
||||
XXLXOR MUL1_HI, TEMP3, MUL1_HI
|
||||
VADDUDM TEMP6_, V1_LO_, V1_LO_
|
||||
VADDUDM TEMP7_, V1_HI_, V1_HI_
|
||||
|
||||
SUB $32, MSG_LEN, MSG_LEN
|
||||
CMPU MSG_LEN, $32
|
||||
BGE loop
|
||||
|
||||
// Save state
|
||||
XXPERMDI V0_LO, V0_LO, $2, V0_LO
|
||||
XXPERMDI V0_HI, V0_HI, $2, V0_HI
|
||||
XXPERMDI V1_LO, V1_LO, $2, V1_LO
|
||||
XXPERMDI V1_HI, V1_HI, $2, V1_HI
|
||||
XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO
|
||||
XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI
|
||||
XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO
|
||||
XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI
|
||||
STXVD2X V0_LO, (STATE)(R0)
|
||||
STXVD2X V0_HI, (STATE)(P1)
|
||||
STXVD2X V1_LO, (STATE)(P2)
|
||||
STXVD2X V1_HI, (STATE)(P3)
|
||||
STXVD2X MUL0_LO, (STATE)(P4)
|
||||
STXVD2X MUL0_HI, (STATE)(P5)
|
||||
STXVD2X MUL1_LO, (STATE)(P6)
|
||||
STXVD2X MUL1_HI, (STATE)(P7)
|
||||
|
||||
complete:
|
||||
RET
|
||||
|
||||
// Constants table
|
||||
DATA ·constants+0x0(SB)/8, $0x0000000000000020
|
||||
DATA ·constants+0x8(SB)/8, $0x0000000000000020
|
||||
DATA ·constants+0x10(SB)/8, $0x070806090d0a040b // zipper merge constant
|
||||
DATA ·constants+0x18(SB)/8, $0x000f010e05020c03 // zipper merge constant
|
||||
|
||||
GLOBL ·constants(SB), 8, $32
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !amd64
|
||||
// +build !arm64
|
||||
// +build !ppc64le
|
||||
|
||||
package highwayhash
|
||||
|
||||
var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
func initialize(state *[16]uint64, k []byte) {
|
||||
initializeGeneric(state, k)
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
updateGeneric(state, msg)
|
||||
}
|
||||
|
||||
func finalize(out []byte, state *[16]uint64) {
|
||||
finalizeGeneric(out, state)
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package cpu implements processor feature detection for
|
||||
// various CPU architectures.
|
||||
package cpu
|
||||
|
||||
// CacheLinePad is used to pad structs to avoid false sharing.
|
||||
type CacheLinePad struct{ _ [cacheLineSize]byte }
|
||||
|
||||
// X86 contains the supported CPU features of the
|
||||
// current X86/AMD64 platform. If the current platform
|
||||
// is not X86/AMD64 then all feature flags are false.
|
||||
//
|
||||
// X86 is padded to avoid false sharing. Further the HasAVX
|
||||
// and HasAVX2 are only set if the OS supports XMM and YMM
|
||||
// registers in addition to the CPUID feature bit being set.
|
||||
var X86 struct {
|
||||
_ CacheLinePad
|
||||
HasAES bool // AES hardware implementation (AES NI)
|
||||
HasADX bool // Multi-precision add-carry instruction extensions
|
||||
HasAVX bool // Advanced vector extension
|
||||
HasAVX2 bool // Advanced vector extension 2
|
||||
HasBMI1 bool // Bit manipulation instruction set 1
|
||||
HasBMI2 bool // Bit manipulation instruction set 2
|
||||
HasERMS bool // Enhanced REP for MOVSB and STOSB
|
||||
HasFMA bool // Fused-multiply-add instructions
|
||||
HasOSXSAVE bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
|
||||
HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM
|
||||
HasPOPCNT bool // Hamming weight instruction POPCNT.
|
||||
HasSSE2 bool // Streaming SIMD extension 2 (always available on amd64)
|
||||
HasSSE3 bool // Streaming SIMD extension 3
|
||||
HasSSSE3 bool // Supplemental streaming SIMD extension 3
|
||||
HasSSE41 bool // Streaming SIMD extension 4 and 4.1
|
||||
HasSSE42 bool // Streaming SIMD extension 4 and 4.2
|
||||
_ CacheLinePad
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 32
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 64
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
// +build !gccgo
|
||||
|
||||
package cpu
|
||||
|
||||
// cpuid is implemented in cpu_x86.s for gc compiler
|
||||
// and in cpu_gccgo.c for gccgo.
|
||||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
|
||||
// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
|
||||
// and in cpu_gccgo.c for gccgo.
|
||||
func xgetbv() (eax, edx uint32)
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
// +build gccgo
|
||||
|
||||
#include <cpuid.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// Need to wrap __get_cpuid_count because it's declared as static.
|
||||
int
|
||||
gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf,
|
||||
uint32_t *eax, uint32_t *ebx,
|
||||
uint32_t *ecx, uint32_t *edx)
|
||||
{
|
||||
return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx);
|
||||
}
|
||||
|
||||
// xgetbv reads the contents of an XCR (Extended Control Register)
|
||||
// specified in the ECX register into registers EDX:EAX.
|
||||
// Currently, the only supported value for XCR is 0.
|
||||
//
|
||||
// TODO: Replace with a better alternative:
|
||||
//
|
||||
// #include <xsaveintrin.h>
|
||||
//
|
||||
// #pragma GCC target("xsave")
|
||||
//
|
||||
// void gccgoXgetbv(uint32_t *eax, uint32_t *edx) {
|
||||
// unsigned long long x = _xgetbv(0);
|
||||
// *eax = x & 0xffffffff;
|
||||
// *edx = (x >> 32) & 0xffffffff;
|
||||
// }
|
||||
//
|
||||
// Note that _xgetbv is defined starting with GCC 8.
|
||||
void
|
||||
gccgoXgetbv(uint32_t *eax, uint32_t *edx)
|
||||
{
|
||||
__asm(" xorl %%ecx, %%ecx\n"
|
||||
" xgetbv"
|
||||
: "=a"(*eax), "=d"(*edx));
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
// +build gccgo
|
||||
|
||||
package cpu
|
||||
|
||||
//extern gccgoGetCpuidCount
|
||||
func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32)
|
||||
|
||||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) {
|
||||
var a, b, c, d uint32
|
||||
gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d)
|
||||
return a, b, c, d
|
||||
}
|
||||
|
||||
//extern gccgoXgetbv
|
||||
func gccgoXgetbv(eax, edx *uint32)
|
||||
|
||||
func xgetbv() (eax, edx uint32) {
|
||||
var a, d uint32
|
||||
gccgoXgetbv(&a, &d)
|
||||
return a, d
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build mips64 mips64le
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 32
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build mips mipsle
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 32
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 128
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 256
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
|
||||
package cpu
|
||||
|
||||
const cacheLineSize = 64
|
||||
|
||||
func init() {
|
||||
maxID, _, _, _ := cpuid(0, 0)
|
||||
|
||||
if maxID < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
_, _, ecx1, edx1 := cpuid(1, 0)
|
||||
X86.HasSSE2 = isSet(26, edx1)
|
||||
|
||||
X86.HasSSE3 = isSet(0, ecx1)
|
||||
X86.HasPCLMULQDQ = isSet(1, ecx1)
|
||||
X86.HasSSSE3 = isSet(9, ecx1)
|
||||
X86.HasFMA = isSet(12, ecx1)
|
||||
X86.HasSSE41 = isSet(19, ecx1)
|
||||
X86.HasSSE42 = isSet(20, ecx1)
|
||||
X86.HasPOPCNT = isSet(23, ecx1)
|
||||
X86.HasAES = isSet(25, ecx1)
|
||||
X86.HasOSXSAVE = isSet(27, ecx1)
|
||||
|
||||
osSupportsAVX := false
|
||||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
||||
if X86.HasOSXSAVE {
|
||||
eax, _ := xgetbv()
|
||||
// Check if XMM and YMM registers have OS support.
|
||||
osSupportsAVX = isSet(1, eax) && isSet(2, eax)
|
||||
}
|
||||
|
||||
X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
|
||||
|
||||
if maxID < 7 {
|
||||
return
|
||||
}
|
||||
|
||||
_, ebx7, _, _ := cpuid(7, 0)
|
||||
X86.HasBMI1 = isSet(3, ebx7)
|
||||
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
|
||||
X86.HasBMI2 = isSet(8, ebx7)
|
||||
X86.HasERMS = isSet(9, ebx7)
|
||||
X86.HasADX = isSet(19, ebx7)
|
||||
}
|
||||
|
||||
func isSet(bitpos uint, value uint32) bool {
|
||||
return value&(1<<bitpos) != 0
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
// +build !gccgo
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·cpuid(SB), NOSPLIT, $0-24
|
||||
MOVL eaxArg+0(FP), AX
|
||||
MOVL ecxArg+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func xgetbv() (eax, edx uint32)
|
||||
TEXT ·xgetbv(SB),NOSPLIT,$0-8
|
||||
MOVL $0, CX
|
||||
XGETBV
|
||||
MOVL AX, eax+0(FP)
|
||||
MOVL DX, edx+4(FP)
|
||||
RET
|
||||
Loading…
Reference in New Issue