chore(deps): bump github.com/minio/highwayhash from 1.0.2 to 1.0.3 (#3252)

Bumps [github.com/minio/highwayhash](https://github.com/minio/highwayhash) from 1.0.2 to 1.0.3.
- [Release notes](https://github.com/minio/highwayhash/releases)
- [Commits](https://github.com/minio/highwayhash/compare/v1.0.2...v1.0.3)

---
updated-dependencies:
- dependency-name: github.com/minio/highwayhash
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
dependabot[bot] 2024-07-15 09:28:31 -07:00 committed by GitHub
parent 7825b6bf4f
commit 725c6dacdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 411 additions and 62 deletions

2
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/google/go-github v17.0.0+incompatible
github.com/google/slowjam v1.1.1
github.com/karrick/godirwalk v1.16.1
github.com/minio/highwayhash v1.0.2
github.com/minio/highwayhash v1.0.3
github.com/moby/buildkit v0.14.1
github.com/otiai10/copy v1.14.0
github.com/pkg/errors v0.9.1

5
go.sum
View File

@ -342,8 +342,8 @@ github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3v
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
@ -570,7 +570,6 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View File

@ -12,13 +12,11 @@ linters:
- goimports
- misspell
- govet
- golint
- revive
- ineffassign
- gosimple
- deadcode
- unparam
- unused
- structcheck
issues:
exclude-use-default: false
@ -27,4 +25,4 @@ issues:
- error strings should not be capitalized or end with punctuation or a newline
- should have comment # TODO(aead): Remove once all exported ident. have comments!
service:
golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly
golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly

View File

@ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small
### ARM Performance
Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs:
Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs:
```
BenchmarkSum256_16 96.82 MB/s
BenchmarkSum256_64 445.35 MB/s
BenchmarkSum256_1K 2782.46 MB/s
BenchmarkSum256_8K 4083.58 MB/s
BenchmarkSum256_1M 4986.41 MB/s
BenchmarkSum256_5M 4992.72 MB/s
BenchmarkSum256_10M 4993.32 MB/s
BenchmarkSum256_25M 4992.55 MB/s
BenchmarkSum256_16 143.66 MB/s
BenchmarkSum256_64 628.75 MB/s
BenchmarkSum256_1K 3621.71 MB/s
BenchmarkSum256_8K 5039.64 MB/s
BenchmarkSum256_1M 5279.79 MB/s
BenchmarkSum256_5M 5474.60 MB/s
BenchmarkSum256_10M 5621.73 MB/s
BenchmarkSum256_25M 5250.47 MB/s
```
### ppc64le Performance

View File

@ -0,0 +1,132 @@
//
// Copyright (c) 2024 Minio Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//+build !noasm,!appengine
#include "textflag.h"
TEXT ·getVectorLength(SB), NOSPLIT, $0
WORD $0xd2800002 // mov x2, #0
WORD $0x04225022 // addvl x2, x2, #1
WORD $0xd37df042 // lsl x2, x2, #3
WORD $0xd2800003 // mov x3, #0
WORD $0x04635023 // addpl x3, x3, #1
WORD $0xd37df063 // lsl x3, x3, #3
MOVD R2, vl+0(FP)
MOVD R3, pl+8(FP)
RET
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve
WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
WORD $0x25b8c006 // mov z6.s, #0
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
loopSve:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1
WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d
SUBS $32, R2
BPL loopSve
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
completeSve:
RET
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve2
WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
loopSve2:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1
WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d
SUBS $32, R2
BPL loopSve2
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
completeSve2:
RET
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32

View File

@ -2,6 +2,7 @@
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//go:build amd64 && !gccgo && !appengine && !nacl && !noasm
// +build amd64,!gccgo,!appengine,!nacl,!noasm
package highwayhash
@ -12,6 +13,8 @@ var (
useSSE4 = cpu.X86.HasSSE41
useAVX2 = cpu.X86.HasAVX2
useNEON = false
useSVE = false
useSVE2 = false
useVMX = false
)

View File

@ -1,24 +1,54 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Copyright (c) 2017-2024 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//+build !noasm,!appengine
//go:build !noasm && !appengine
// +build !noasm,!appengine
package highwayhash
import (
"golang.org/x/sys/cpu"
)
var (
useSSE4 = false
useAVX2 = false
useNEON = true
useNEON = cpu.ARM64.HasASIMD
useSVE = cpu.ARM64.HasSVE
useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
useVMX = false
)
func init() {
if useSVE {
if vl, _ := getVectorLength(); vl != 256 {
//
// Since HighwahHash is designed for AVX2,
// SVE/SVE2 instructions only run correctly
// for vector length of 256
//
useSVE2 = false
useSVE = false
}
}
}
//go:noescape
func initializeArm64(state *[16]uint64, key []byte)
//go:noescape
func updateArm64(state *[16]uint64, msg []byte)
//go:noescape
func getVectorLength() (vl, pl uint64)
//go:noescape
func updateArm64Sve(state *[16]uint64, msg []byte)
//go:noescape
func updateArm64Sve2(state *[16]uint64, msg []byte)
//go:noescape
func finalizeArm64(out []byte, state *[16]uint64)
@ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) {
}
func update(state *[16]uint64, msg []byte) {
if useNEON {
if useSVE2 {
updateArm64Sve2(state, msg)
} else if useSVE {
updateArm64Sve(state, msg)
} else if useNEON {
updateArm64(state, msg)
} else {
updateGeneric(state, msg)

View File

@ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) {
}
func updateGeneric(state *[16]uint64, msg []byte) {
for len(msg) > 0 {
// add message
state[v1+0] += binary.LittleEndian.Uint64(msg)
state[v1+1] += binary.LittleEndian.Uint64(msg[8:])
state[v1+2] += binary.LittleEndian.Uint64(msg[16:])
state[v1+3] += binary.LittleEndian.Uint64(msg[24:])
// v1 += mul0
state[v1+0] += state[mul0+0]
state[v1+1] += state[mul0+1]
state[v1+2] += state[mul0+2]
state[v1+3] += state[mul0+3]
for len(msg) >= 32 {
m := msg[:32]
// add message + mul0
// Interleave operations to hide multiplication
state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0]
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
// v0 += mul1
state[v0+0] += state[mul1+0]
state[v0+1] += state[mul1+1]
state[v0+2] += state[mul1+2]
state[v0+3] += state[mul1+3]
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1]
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
state[v0+1] += state[mul1+1]
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2]
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
state[v0+2] += state[mul1+2]
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3]
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
state[v0+3] += state[mul1+3]
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
// inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
{
val0 := state[v1+0]
val1 := state[v1+1]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
state[v0+0] += res
state[v0+1] += res2
}
// zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
{
val0 := state[v1+2]
val1 := state[v1+3]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v0+2] += res
state[v0+3] += res2
}
// inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
{
val0 := state[v0+0]
val1 := state[v0+1]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v1+0] += res
state[v1+1] += res2
}
//inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
{
val0 := state[v0+2]
val1 := state[v0+3]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v1+2] += res
state[v1+3] += res2
}
msg = msg[32:]
}
}
@ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) {
}
}
// Experiments on variations left for future reference...
/*
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
m0 := v0 & (0xFF << (2 * 8))
m1 := (v1 & (0xFF << (7 * 8))) >> 8
m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16
m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24
m4 := (v0 & (0xFF << (1 * 8))) << 32
m5 := v0 << 56
if true {
// fastest. original interleaved...
res := v0 & (0xff << (2 * 8))
res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
res += (v1 & (0xff << (7 * 8))) >> 8
res2 += (v0 & (0xff << (6 * 8))) >> 8
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
res2 += (v1 & (0xff << (5 * 8))) >> 16
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
res += (v0 & (0xff << (1 * 8))) << 32
res2 += (v1 & 0xff) << 48
res += v0 << 56
res2 += (v1 & (0xff << (1 * 8))) << 24
*d0 += m0 + m1 + m2 + m3 + m4 + m5
*d0 += res
*d1 += res2
} else if false {
// Reading bytes and combining into uint64
var v0b [8]byte
binary.LittleEndian.PutUint64(v0b[:], v0)
var v1b [8]byte
binary.LittleEndian.PutUint64(v1b[:], v1)
var res, res2 uint64
m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8)))
m1 = (v0 & (0xFF << (6 * 8))) >> 8
m2 = (v1 & (0xFF << (5 * 8))) >> 16
m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24
m4 = (v1 & 0xFF) << 48
m5 = (v1 & (0xFF << (1 * 8))) << 24
res = uint64(v0b[0]) << (7 * 8)
res2 = uint64(v1b[0]) << (6 * 8)
res |= uint64(v0b[1]) << (5 * 8)
res2 |= uint64(v1b[1]) << (4 * 8)
res |= uint64(v0b[2]) << (2 * 8)
res2 |= uint64(v1b[2]) << (2 * 8)
res |= uint64(v0b[3])
res2 |= uint64(v0b[4]) << (1 * 8)
res |= uint64(v0b[5]) << (3 * 8)
res2 |= uint64(v0b[6]) << (5 * 8)
res |= uint64(v1b[4]) << (1 * 8)
res2 |= uint64(v0b[7]) << (7 * 8)
res |= uint64(v1b[6]) << (4 * 8)
res2 |= uint64(v1b[3])
res |= uint64(v1b[7]) << (6 * 8)
res2 |= uint64(v1b[5]) << (3 * 8)
*d1 += m3 + m2 + m5 + m1 + m4 + m0
*d0 += res
*d1 += res2
} else if false {
// bytes to bytes shuffle
var v0b [8]byte
binary.LittleEndian.PutUint64(v0b[:], v0)
var v1b [8]byte
binary.LittleEndian.PutUint64(v1b[:], v1)
var res [8]byte
//res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
res[0] = v0b[3]
res[1] = v1b[4]
// res := v0 & (0xff << (2 * 8))
res[2] = v0b[2]
//res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
res[3] = v0b[5]
res[4] = v1b[6]
//res += (v0 & (0xff << (1 * 8))) << 32
res[5] = v0b[1]
//res += (v1 & (0xff << (7 * 8))) >> 8
res[6] += v1b[7]
//res += v0 << 56
res[7] = v0b[0]
v0 = binary.LittleEndian.Uint64(res[:])
*d0 += v0
//res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
res[0] = v1b[3]
res[1] = v0b[4]
res[2] = v1b[2]
// res += (v1 & (0xff << (5 * 8))) >> 16
res[3] = v1b[5]
//res += (v1 & (0xff << (1 * 8))) << 24
res[4] = v1b[1]
// res += (v0 & (0xff << (6 * 8))) >> 8
res[5] = v0b[6]
//res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
res[7] = v0b[7]
//res += (v1 & 0xff) << 48
res[6] = v1b[0]
v0 = binary.LittleEndian.Uint64(res[:])
*d1 += v0
} else {
// original.
res := v0 & (0xff << (2 * 8))
res += (v1 & (0xff << (7 * 8))) >> 8
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
res += (v0 & (0xff << (1 * 8))) << 32
res += v0 << 56
*d0 += res
res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
res += (v0 & (0xff << (6 * 8))) >> 8
res += (v1 & (0xff << (5 * 8))) >> 16
res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
res += (v1 & 0xff) << 48
res += (v1 & (0xff << (1 * 8))) << 24
*d1 += res
}
}
*/
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {

View File

@ -2,7 +2,8 @@
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//+build !noasm,!appengine
//go:build !noasm && !appengine
// +build !noasm,!appengine
package highwayhash
@ -10,6 +11,8 @@ var (
useSSE4 = false
useAVX2 = false
useNEON = false
useSVE = false
useSVE2 = false
useVMX = true
)

View File

@ -2,6 +2,7 @@
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//go:build noasm || (!amd64 && !arm64 && !ppc64le)
// +build noasm !amd64,!arm64,!ppc64le
package highwayhash
@ -10,6 +11,8 @@ var (
useSSE4 = false
useAVX2 = false
useNEON = false
useSVE = false
useSVE2 = false
useVMX = false
)

2
vendor/modules.txt vendored
View File

@ -842,7 +842,7 @@ github.com/magiconair/properties
# github.com/matttproud/golang_protobuf_extensions v1.0.4
## explicit; go 1.9
github.com/matttproud/golang_protobuf_extensions/pbutil
# github.com/minio/highwayhash v1.0.2
# github.com/minio/highwayhash v1.0.3
## explicit; go 1.15
github.com/minio/highwayhash
# github.com/mitchellh/go-homedir v1.1.0