chore(deps): bump github.com/minio/highwayhash from 1.0.2 to 1.0.3 (#3252)
Bumps [github.com/minio/highwayhash](https://github.com/minio/highwayhash) from 1.0.2 to 1.0.3. - [Release notes](https://github.com/minio/highwayhash/releases) - [Commits](https://github.com/minio/highwayhash/compare/v1.0.2...v1.0.3) --- updated-dependencies: - dependency-name: github.com/minio/highwayhash dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
parent
7825b6bf4f
commit
725c6dacdc
2
go.mod
2
go.mod
|
|
@ -21,7 +21,7 @@ require (
|
|||
github.com/google/go-github v17.0.0+incompatible
|
||||
github.com/google/slowjam v1.1.1
|
||||
github.com/karrick/godirwalk v1.16.1
|
||||
github.com/minio/highwayhash v1.0.2
|
||||
github.com/minio/highwayhash v1.0.3
|
||||
github.com/moby/buildkit v0.14.1
|
||||
github.com/otiai10/copy v1.14.0
|
||||
github.com/pkg/errors v0.9.1
|
||||
|
|
|
|||
5
go.sum
5
go.sum
|
|
@ -342,8 +342,8 @@ github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3v
|
|||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
|
||||
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
|
||||
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
|
||||
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
||||
|
|
@ -570,7 +570,6 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
|||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
|
|
|
|||
|
|
@ -12,13 +12,11 @@ linters:
|
|||
- goimports
|
||||
- misspell
|
||||
- govet
|
||||
- golint
|
||||
- revive
|
||||
- ineffassign
|
||||
- gosimple
|
||||
- deadcode
|
||||
- unparam
|
||||
- unused
|
||||
- structcheck
|
||||
|
||||
issues:
|
||||
exclude-use-default: false
|
||||
|
|
@ -27,4 +25,4 @@ issues:
|
|||
- error strings should not be capitalized or end with punctuation or a newline
|
||||
- should have comment # TODO(aead): Remove once all exported ident. have comments!
|
||||
service:
|
||||
golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly
|
||||
golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly
|
||||
|
|
|
|||
|
|
@ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small
|
|||
|
||||
### ARM Performance
|
||||
|
||||
Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs:
|
||||
Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs:
|
||||
|
||||
```
|
||||
BenchmarkSum256_16 96.82 MB/s
|
||||
BenchmarkSum256_64 445.35 MB/s
|
||||
BenchmarkSum256_1K 2782.46 MB/s
|
||||
BenchmarkSum256_8K 4083.58 MB/s
|
||||
BenchmarkSum256_1M 4986.41 MB/s
|
||||
BenchmarkSum256_5M 4992.72 MB/s
|
||||
BenchmarkSum256_10M 4993.32 MB/s
|
||||
BenchmarkSum256_25M 4992.55 MB/s
|
||||
BenchmarkSum256_16 143.66 MB/s
|
||||
BenchmarkSum256_64 628.75 MB/s
|
||||
BenchmarkSum256_1K 3621.71 MB/s
|
||||
BenchmarkSum256_8K 5039.64 MB/s
|
||||
BenchmarkSum256_1M 5279.79 MB/s
|
||||
BenchmarkSum256_5M 5474.60 MB/s
|
||||
BenchmarkSum256_10M 5621.73 MB/s
|
||||
BenchmarkSum256_25M 5250.47 MB/s
|
||||
```
|
||||
|
||||
### ppc64le Performance
|
||||
|
|
|
|||
|
|
@ -0,0 +1,132 @@
|
|||
//
|
||||
// Copyright (c) 2024 Minio Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·getVectorLength(SB), NOSPLIT, $0
|
||||
WORD $0xd2800002 // mov x2, #0
|
||||
WORD $0x04225022 // addvl x2, x2, #1
|
||||
WORD $0xd37df042 // lsl x2, x2, #3
|
||||
WORD $0xd2800003 // mov x3, #0
|
||||
WORD $0x04635023 // addpl x3, x3, #1
|
||||
WORD $0xd37df063 // lsl x3, x3, #3
|
||||
MOVD R2, vl+0(FP)
|
||||
MOVD R3, pl+8(FP)
|
||||
RET
|
||||
|
||||
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
|
||||
MOVD state+0(FP), R0
|
||||
MOVD msg_base+8(FP), R1
|
||||
MOVD msg_len+16(FP), R2 // length of message
|
||||
SUBS $32, R2
|
||||
BMI completeSve
|
||||
|
||||
WORD $0x2518e3e1 // ptrue p1.b
|
||||
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
||||
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
||||
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
||||
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
||||
|
||||
// Load zipper merge constants table pointer
|
||||
MOVD $·zipperMergeSve(SB), R3
|
||||
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
||||
WORD $0x25b8c006 // mov z6.s, #0
|
||||
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
|
||||
|
||||
loopSve:
|
||||
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
||||
ADD $32, R1
|
||||
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
||||
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
||||
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
|
||||
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
||||
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
||||
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
||||
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
||||
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
|
||||
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
||||
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
||||
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
||||
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
||||
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
|
||||
SUBS $32, R2
|
||||
BPL loopSve
|
||||
|
||||
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
||||
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
||||
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
||||
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
||||
|
||||
completeSve:
|
||||
RET
|
||||
|
||||
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
|
||||
MOVD state+0(FP), R0
|
||||
MOVD msg_base+8(FP), R1
|
||||
MOVD msg_len+16(FP), R2 // length of message
|
||||
SUBS $32, R2
|
||||
BMI completeSve2
|
||||
|
||||
WORD $0x2518e3e1 // ptrue p1.b
|
||||
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
||||
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
||||
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
||||
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
||||
|
||||
// Load zipper merge constants table pointer
|
||||
MOVD $·zipperMergeSve(SB), R3
|
||||
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
||||
|
||||
loopSve2:
|
||||
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
||||
ADD $32, R1
|
||||
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
||||
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
||||
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
|
||||
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
||||
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
||||
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
||||
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
|
||||
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
||||
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
||||
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
||||
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
|
||||
SUBS $32, R2
|
||||
BPL loopSve2
|
||||
|
||||
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
||||
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
||||
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
||||
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
||||
|
||||
completeSve2:
|
||||
RET
|
||||
|
||||
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
|
||||
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
|
||||
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
|
||||
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
|
||||
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//go:build amd64 && !gccgo && !appengine && !nacl && !noasm
|
||||
// +build amd64,!gccgo,!appengine,!nacl,!noasm
|
||||
|
||||
package highwayhash
|
||||
|
|
@ -12,6 +13,8 @@ var (
|
|||
useSSE4 = cpu.X86.HasSSE41
|
||||
useAVX2 = cpu.X86.HasAVX2
|
||||
useNEON = false
|
||||
useSVE = false
|
||||
useSVE2 = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,24 +1,54 @@
|
|||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Copyright (c) 2017-2024 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
//go:build !noasm && !appengine
|
||||
// +build !noasm,!appengine
|
||||
|
||||
package highwayhash
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = true
|
||||
useNEON = cpu.ARM64.HasASIMD
|
||||
useSVE = cpu.ARM64.HasSVE
|
||||
useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
func init() {
|
||||
if useSVE {
|
||||
if vl, _ := getVectorLength(); vl != 256 {
|
||||
//
|
||||
// Since HighwahHash is designed for AVX2,
|
||||
// SVE/SVE2 instructions only run correctly
|
||||
// for vector length of 256
|
||||
//
|
||||
useSVE2 = false
|
||||
useSVE = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func initializeArm64(state *[16]uint64, key []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func getVectorLength() (vl, pl uint64)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64Sve(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64Sve2(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func finalizeArm64(out []byte, state *[16]uint64)
|
||||
|
||||
|
|
@ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) {
|
|||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
if useNEON {
|
||||
if useSVE2 {
|
||||
updateArm64Sve2(state, msg)
|
||||
} else if useSVE {
|
||||
updateArm64Sve(state, msg)
|
||||
} else if useNEON {
|
||||
updateArm64(state, msg)
|
||||
} else {
|
||||
updateGeneric(state, msg)
|
||||
|
|
|
|||
|
|
@ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) {
|
|||
}
|
||||
|
||||
func updateGeneric(state *[16]uint64, msg []byte) {
|
||||
for len(msg) > 0 {
|
||||
// add message
|
||||
state[v1+0] += binary.LittleEndian.Uint64(msg)
|
||||
state[v1+1] += binary.LittleEndian.Uint64(msg[8:])
|
||||
state[v1+2] += binary.LittleEndian.Uint64(msg[16:])
|
||||
state[v1+3] += binary.LittleEndian.Uint64(msg[24:])
|
||||
|
||||
// v1 += mul0
|
||||
state[v1+0] += state[mul0+0]
|
||||
state[v1+1] += state[mul0+1]
|
||||
state[v1+2] += state[mul0+2]
|
||||
state[v1+3] += state[mul0+3]
|
||||
for len(msg) >= 32 {
|
||||
m := msg[:32]
|
||||
|
||||
// add message + mul0
|
||||
// Interleave operations to hide multiplication
|
||||
state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0]
|
||||
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
|
||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
||||
|
||||
// v0 += mul1
|
||||
state[v0+0] += state[mul1+0]
|
||||
state[v0+1] += state[mul1+1]
|
||||
state[v0+2] += state[mul1+2]
|
||||
state[v0+3] += state[mul1+3]
|
||||
|
||||
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
|
||||
|
||||
state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1]
|
||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
||||
state[v0+1] += state[mul1+1]
|
||||
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
|
||||
|
||||
state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2]
|
||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
||||
state[v0+2] += state[mul1+2]
|
||||
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
|
||||
|
||||
state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3]
|
||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
||||
state[v0+3] += state[mul1+3]
|
||||
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
|
||||
|
||||
zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
||||
zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
||||
// inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
||||
{
|
||||
val0 := state[v1+0]
|
||||
val1 := state[v1+1]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
||||
zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
||||
state[v0+0] += res
|
||||
state[v0+1] += res2
|
||||
}
|
||||
// zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
||||
{
|
||||
val0 := state[v1+2]
|
||||
val1 := state[v1+3]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
state[v0+2] += res
|
||||
state[v0+3] += res2
|
||||
}
|
||||
|
||||
// inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
||||
{
|
||||
val0 := state[v0+0]
|
||||
val1 := state[v0+1]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
state[v1+0] += res
|
||||
state[v1+1] += res2
|
||||
}
|
||||
|
||||
//inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
||||
{
|
||||
val0 := state[v0+2]
|
||||
val1 := state[v0+3]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
state[v1+2] += res
|
||||
state[v1+3] += res2
|
||||
}
|
||||
msg = msg[32:]
|
||||
}
|
||||
}
|
||||
|
|
@ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
// Experiments on variations left for future reference...
|
||||
/*
|
||||
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
|
||||
m0 := v0 & (0xFF << (2 * 8))
|
||||
m1 := (v1 & (0xFF << (7 * 8))) >> 8
|
||||
m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16
|
||||
m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24
|
||||
m4 := (v0 & (0xFF << (1 * 8))) << 32
|
||||
m5 := v0 << 56
|
||||
if true {
|
||||
// fastest. original interleaved...
|
||||
res := v0 & (0xff << (2 * 8))
|
||||
res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||
res += (v1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (v0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (v1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (v0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (v1 & 0xff) << 48
|
||||
res += v0 << 56
|
||||
res2 += (v1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
*d0 += m0 + m1 + m2 + m3 + m4 + m5
|
||||
*d0 += res
|
||||
*d1 += res2
|
||||
} else if false {
|
||||
// Reading bytes and combining into uint64
|
||||
var v0b [8]byte
|
||||
binary.LittleEndian.PutUint64(v0b[:], v0)
|
||||
var v1b [8]byte
|
||||
binary.LittleEndian.PutUint64(v1b[:], v1)
|
||||
var res, res2 uint64
|
||||
|
||||
m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8)))
|
||||
m1 = (v0 & (0xFF << (6 * 8))) >> 8
|
||||
m2 = (v1 & (0xFF << (5 * 8))) >> 16
|
||||
m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24
|
||||
m4 = (v1 & 0xFF) << 48
|
||||
m5 = (v1 & (0xFF << (1 * 8))) << 24
|
||||
res = uint64(v0b[0]) << (7 * 8)
|
||||
res2 = uint64(v1b[0]) << (6 * 8)
|
||||
res |= uint64(v0b[1]) << (5 * 8)
|
||||
res2 |= uint64(v1b[1]) << (4 * 8)
|
||||
res |= uint64(v0b[2]) << (2 * 8)
|
||||
res2 |= uint64(v1b[2]) << (2 * 8)
|
||||
res |= uint64(v0b[3])
|
||||
res2 |= uint64(v0b[4]) << (1 * 8)
|
||||
res |= uint64(v0b[5]) << (3 * 8)
|
||||
res2 |= uint64(v0b[6]) << (5 * 8)
|
||||
res |= uint64(v1b[4]) << (1 * 8)
|
||||
res2 |= uint64(v0b[7]) << (7 * 8)
|
||||
res |= uint64(v1b[6]) << (4 * 8)
|
||||
res2 |= uint64(v1b[3])
|
||||
res |= uint64(v1b[7]) << (6 * 8)
|
||||
res2 |= uint64(v1b[5]) << (3 * 8)
|
||||
|
||||
*d1 += m3 + m2 + m5 + m1 + m4 + m0
|
||||
*d0 += res
|
||||
*d1 += res2
|
||||
|
||||
} else if false {
|
||||
// bytes to bytes shuffle
|
||||
var v0b [8]byte
|
||||
binary.LittleEndian.PutUint64(v0b[:], v0)
|
||||
var v1b [8]byte
|
||||
binary.LittleEndian.PutUint64(v1b[:], v1)
|
||||
var res [8]byte
|
||||
|
||||
//res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||
res[0] = v0b[3]
|
||||
res[1] = v1b[4]
|
||||
|
||||
// res := v0 & (0xff << (2 * 8))
|
||||
res[2] = v0b[2]
|
||||
|
||||
//res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||
res[3] = v0b[5]
|
||||
res[4] = v1b[6]
|
||||
|
||||
//res += (v0 & (0xff << (1 * 8))) << 32
|
||||
res[5] = v0b[1]
|
||||
|
||||
//res += (v1 & (0xff << (7 * 8))) >> 8
|
||||
res[6] += v1b[7]
|
||||
|
||||
//res += v0 << 56
|
||||
res[7] = v0b[0]
|
||||
v0 = binary.LittleEndian.Uint64(res[:])
|
||||
*d0 += v0
|
||||
|
||||
//res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||
res[0] = v1b[3]
|
||||
res[1] = v0b[4]
|
||||
|
||||
res[2] = v1b[2]
|
||||
|
||||
// res += (v1 & (0xff << (5 * 8))) >> 16
|
||||
res[3] = v1b[5]
|
||||
|
||||
//res += (v1 & (0xff << (1 * 8))) << 24
|
||||
res[4] = v1b[1]
|
||||
|
||||
// res += (v0 & (0xff << (6 * 8))) >> 8
|
||||
res[5] = v0b[6]
|
||||
|
||||
//res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||
res[7] = v0b[7]
|
||||
|
||||
//res += (v1 & 0xff) << 48
|
||||
res[6] = v1b[0]
|
||||
|
||||
v0 = binary.LittleEndian.Uint64(res[:])
|
||||
*d1 += v0
|
||||
} else {
|
||||
// original.
|
||||
res := v0 & (0xff << (2 * 8))
|
||||
res += (v1 & (0xff << (7 * 8))) >> 8
|
||||
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||
res += (v0 & (0xff << (1 * 8))) << 32
|
||||
res += v0 << 56
|
||||
|
||||
*d0 += res
|
||||
|
||||
res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||
res += (v0 & (0xff << (6 * 8))) >> 8
|
||||
res += (v1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (v1 & 0xff) << 48
|
||||
res += (v1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
*d1 += res
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
|
||||
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
//go:build !noasm && !appengine
|
||||
// +build !noasm,!appengine
|
||||
|
||||
package highwayhash
|
||||
|
||||
|
|
@ -10,6 +11,8 @@ var (
|
|||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useSVE = false
|
||||
useSVE2 = false
|
||||
useVMX = true
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//go:build noasm || (!amd64 && !arm64 && !ppc64le)
|
||||
// +build noasm !amd64,!arm64,!ppc64le
|
||||
|
||||
package highwayhash
|
||||
|
|
@ -10,6 +11,8 @@ var (
|
|||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useSVE = false
|
||||
useSVE2 = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -842,7 +842,7 @@ github.com/magiconair/properties
|
|||
# github.com/matttproud/golang_protobuf_extensions v1.0.4
|
||||
## explicit; go 1.9
|
||||
github.com/matttproud/golang_protobuf_extensions/pbutil
|
||||
# github.com/minio/highwayhash v1.0.2
|
||||
# github.com/minio/highwayhash v1.0.3
|
||||
## explicit; go 1.15
|
||||
github.com/minio/highwayhash
|
||||
# github.com/mitchellh/go-homedir v1.1.0
|
||||
|
|
|
|||
Loading…
Reference in New Issue