chore(deps): bump github.com/minio/highwayhash from 1.0.2 to 1.0.3 (#3252)
Bumps [github.com/minio/highwayhash](https://github.com/minio/highwayhash) from 1.0.2 to 1.0.3. - [Release notes](https://github.com/minio/highwayhash/releases) - [Commits](https://github.com/minio/highwayhash/compare/v1.0.2...v1.0.3) --- updated-dependencies: - dependency-name: github.com/minio/highwayhash dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
parent
7825b6bf4f
commit
725c6dacdc
2
go.mod
2
go.mod
|
|
@ -21,7 +21,7 @@ require (
|
||||||
github.com/google/go-github v17.0.0+incompatible
|
github.com/google/go-github v17.0.0+incompatible
|
||||||
github.com/google/slowjam v1.1.1
|
github.com/google/slowjam v1.1.1
|
||||||
github.com/karrick/godirwalk v1.16.1
|
github.com/karrick/godirwalk v1.16.1
|
||||||
github.com/minio/highwayhash v1.0.2
|
github.com/minio/highwayhash v1.0.3
|
||||||
github.com/moby/buildkit v0.14.1
|
github.com/moby/buildkit v0.14.1
|
||||||
github.com/otiai10/copy v1.14.0
|
github.com/otiai10/copy v1.14.0
|
||||||
github.com/pkg/errors v0.9.1
|
github.com/pkg/errors v0.9.1
|
||||||
|
|
|
||||||
5
go.sum
5
go.sum
|
|
@ -342,8 +342,8 @@ github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3v
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||||
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
|
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
|
||||||
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
|
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||||
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
||||||
|
|
@ -570,7 +570,6 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
|
|
||||||
|
|
@ -12,13 +12,11 @@ linters:
|
||||||
- goimports
|
- goimports
|
||||||
- misspell
|
- misspell
|
||||||
- govet
|
- govet
|
||||||
- golint
|
- revive
|
||||||
- ineffassign
|
- ineffassign
|
||||||
- gosimple
|
- gosimple
|
||||||
- deadcode
|
|
||||||
- unparam
|
- unparam
|
||||||
- unused
|
- unused
|
||||||
- structcheck
|
|
||||||
|
|
||||||
issues:
|
issues:
|
||||||
exclude-use-default: false
|
exclude-use-default: false
|
||||||
|
|
@ -27,4 +25,4 @@ issues:
|
||||||
- error strings should not be capitalized or end with punctuation or a newline
|
- error strings should not be capitalized or end with punctuation or a newline
|
||||||
- should have comment # TODO(aead): Remove once all exported ident. have comments!
|
- should have comment # TODO(aead): Remove once all exported ident. have comments!
|
||||||
service:
|
service:
|
||||||
golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly
|
golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly
|
||||||
|
|
|
||||||
|
|
@ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small
|
||||||
|
|
||||||
### ARM Performance
|
### ARM Performance
|
||||||
|
|
||||||
Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs:
|
Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs:
|
||||||
|
|
||||||
```
|
```
|
||||||
BenchmarkSum256_16 96.82 MB/s
|
BenchmarkSum256_16 143.66 MB/s
|
||||||
BenchmarkSum256_64 445.35 MB/s
|
BenchmarkSum256_64 628.75 MB/s
|
||||||
BenchmarkSum256_1K 2782.46 MB/s
|
BenchmarkSum256_1K 3621.71 MB/s
|
||||||
BenchmarkSum256_8K 4083.58 MB/s
|
BenchmarkSum256_8K 5039.64 MB/s
|
||||||
BenchmarkSum256_1M 4986.41 MB/s
|
BenchmarkSum256_1M 5279.79 MB/s
|
||||||
BenchmarkSum256_5M 4992.72 MB/s
|
BenchmarkSum256_5M 5474.60 MB/s
|
||||||
BenchmarkSum256_10M 4993.32 MB/s
|
BenchmarkSum256_10M 5621.73 MB/s
|
||||||
BenchmarkSum256_25M 4992.55 MB/s
|
BenchmarkSum256_25M 5250.47 MB/s
|
||||||
```
|
```
|
||||||
|
|
||||||
### ppc64le Performance
|
### ppc64le Performance
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,132 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 Minio Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
TEXT ·getVectorLength(SB), NOSPLIT, $0
|
||||||
|
WORD $0xd2800002 // mov x2, #0
|
||||||
|
WORD $0x04225022 // addvl x2, x2, #1
|
||||||
|
WORD $0xd37df042 // lsl x2, x2, #3
|
||||||
|
WORD $0xd2800003 // mov x3, #0
|
||||||
|
WORD $0x04635023 // addpl x3, x3, #1
|
||||||
|
WORD $0xd37df063 // lsl x3, x3, #3
|
||||||
|
MOVD R2, vl+0(FP)
|
||||||
|
MOVD R3, pl+8(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
|
||||||
|
MOVD state+0(FP), R0
|
||||||
|
MOVD msg_base+8(FP), R1
|
||||||
|
MOVD msg_len+16(FP), R2 // length of message
|
||||||
|
SUBS $32, R2
|
||||||
|
BMI completeSve
|
||||||
|
|
||||||
|
WORD $0x2518e3e1 // ptrue p1.b
|
||||||
|
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
||||||
|
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
||||||
|
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
||||||
|
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
||||||
|
|
||||||
|
// Load zipper merge constants table pointer
|
||||||
|
MOVD $·zipperMergeSve(SB), R3
|
||||||
|
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
||||||
|
WORD $0x25b8c006 // mov z6.s, #0
|
||||||
|
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
|
||||||
|
|
||||||
|
loopSve:
|
||||||
|
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
||||||
|
ADD $32, R1
|
||||||
|
|
||||||
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||||
|
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
||||||
|
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
||||||
|
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
|
||||||
|
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
||||||
|
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
||||||
|
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
||||||
|
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
||||||
|
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
|
||||||
|
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
||||||
|
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
||||||
|
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
||||||
|
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
||||||
|
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
||||||
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||||
|
|
||||||
|
SUBS $32, R2
|
||||||
|
BPL loopSve
|
||||||
|
|
||||||
|
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
||||||
|
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
||||||
|
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
||||||
|
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
||||||
|
|
||||||
|
completeSve:
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
|
||||||
|
MOVD state+0(FP), R0
|
||||||
|
MOVD msg_base+8(FP), R1
|
||||||
|
MOVD msg_len+16(FP), R2 // length of message
|
||||||
|
SUBS $32, R2
|
||||||
|
BMI completeSve2
|
||||||
|
|
||||||
|
WORD $0x2518e3e1 // ptrue p1.b
|
||||||
|
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
||||||
|
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
||||||
|
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
||||||
|
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
||||||
|
|
||||||
|
// Load zipper merge constants table pointer
|
||||||
|
MOVD $·zipperMergeSve(SB), R3
|
||||||
|
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
||||||
|
|
||||||
|
loopSve2:
|
||||||
|
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
||||||
|
ADD $32, R1
|
||||||
|
|
||||||
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||||
|
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
||||||
|
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
||||||
|
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
|
||||||
|
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
||||||
|
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
||||||
|
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
||||||
|
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
|
||||||
|
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
||||||
|
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
||||||
|
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
||||||
|
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
||||||
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||||
|
|
||||||
|
SUBS $32, R2
|
||||||
|
BPL loopSve2
|
||||||
|
|
||||||
|
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
||||||
|
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
||||||
|
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
||||||
|
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
||||||
|
|
||||||
|
completeSve2:
|
||||||
|
RET
|
||||||
|
|
||||||
|
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
|
||||||
|
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
|
||||||
|
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
|
||||||
|
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
|
||||||
|
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
// Use of this source code is governed by a license that can be
|
// Use of this source code is governed by a license that can be
|
||||||
// found in the LICENSE file.
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build amd64 && !gccgo && !appengine && !nacl && !noasm
|
||||||
// +build amd64,!gccgo,!appengine,!nacl,!noasm
|
// +build amd64,!gccgo,!appengine,!nacl,!noasm
|
||||||
|
|
||||||
package highwayhash
|
package highwayhash
|
||||||
|
|
@ -12,6 +13,8 @@ var (
|
||||||
useSSE4 = cpu.X86.HasSSE41
|
useSSE4 = cpu.X86.HasSSE41
|
||||||
useAVX2 = cpu.X86.HasAVX2
|
useAVX2 = cpu.X86.HasAVX2
|
||||||
useNEON = false
|
useNEON = false
|
||||||
|
useSVE = false
|
||||||
|
useSVE2 = false
|
||||||
useVMX = false
|
useVMX = false
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,54 @@
|
||||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
// Copyright (c) 2017-2024 Minio Inc. All rights reserved.
|
||||||
// Use of this source code is governed by a license that can be
|
// Use of this source code is governed by a license that can be
|
||||||
// found in the LICENSE file.
|
// found in the LICENSE file.
|
||||||
|
|
||||||
//+build !noasm,!appengine
|
//go:build !noasm && !appengine
|
||||||
|
// +build !noasm,!appengine
|
||||||
|
|
||||||
package highwayhash
|
package highwayhash
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/sys/cpu"
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
useSSE4 = false
|
useSSE4 = false
|
||||||
useAVX2 = false
|
useAVX2 = false
|
||||||
useNEON = true
|
useNEON = cpu.ARM64.HasASIMD
|
||||||
|
useSVE = cpu.ARM64.HasSVE
|
||||||
|
useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
|
||||||
useVMX = false
|
useVMX = false
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if useSVE {
|
||||||
|
if vl, _ := getVectorLength(); vl != 256 {
|
||||||
|
//
|
||||||
|
// Since HighwahHash is designed for AVX2,
|
||||||
|
// SVE/SVE2 instructions only run correctly
|
||||||
|
// for vector length of 256
|
||||||
|
//
|
||||||
|
useSVE2 = false
|
||||||
|
useSVE = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func initializeArm64(state *[16]uint64, key []byte)
|
func initializeArm64(state *[16]uint64, key []byte)
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func updateArm64(state *[16]uint64, msg []byte)
|
func updateArm64(state *[16]uint64, msg []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func getVectorLength() (vl, pl uint64)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func updateArm64Sve(state *[16]uint64, msg []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func updateArm64Sve2(state *[16]uint64, msg []byte)
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func finalizeArm64(out []byte, state *[16]uint64)
|
func finalizeArm64(out []byte, state *[16]uint64)
|
||||||
|
|
||||||
|
|
@ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func update(state *[16]uint64, msg []byte) {
|
func update(state *[16]uint64, msg []byte) {
|
||||||
if useNEON {
|
if useSVE2 {
|
||||||
|
updateArm64Sve2(state, msg)
|
||||||
|
} else if useSVE {
|
||||||
|
updateArm64Sve(state, msg)
|
||||||
|
} else if useNEON {
|
||||||
updateArm64(state, msg)
|
updateArm64(state, msg)
|
||||||
} else {
|
} else {
|
||||||
updateGeneric(state, msg)
|
updateGeneric(state, msg)
|
||||||
|
|
|
||||||
|
|
@ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateGeneric(state *[16]uint64, msg []byte) {
|
func updateGeneric(state *[16]uint64, msg []byte) {
|
||||||
for len(msg) > 0 {
|
for len(msg) >= 32 {
|
||||||
// add message
|
m := msg[:32]
|
||||||
state[v1+0] += binary.LittleEndian.Uint64(msg)
|
|
||||||
state[v1+1] += binary.LittleEndian.Uint64(msg[8:])
|
|
||||||
state[v1+2] += binary.LittleEndian.Uint64(msg[16:])
|
|
||||||
state[v1+3] += binary.LittleEndian.Uint64(msg[24:])
|
|
||||||
|
|
||||||
// v1 += mul0
|
|
||||||
state[v1+0] += state[mul0+0]
|
|
||||||
state[v1+1] += state[mul0+1]
|
|
||||||
state[v1+2] += state[mul0+2]
|
|
||||||
state[v1+3] += state[mul0+3]
|
|
||||||
|
|
||||||
|
// add message + mul0
|
||||||
|
// Interleave operations to hide multiplication
|
||||||
|
state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0]
|
||||||
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
|
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
|
||||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
|
||||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
|
||||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
|
||||||
|
|
||||||
// v0 += mul1
|
|
||||||
state[v0+0] += state[mul1+0]
|
state[v0+0] += state[mul1+0]
|
||||||
state[v0+1] += state[mul1+1]
|
|
||||||
state[v0+2] += state[mul1+2]
|
|
||||||
state[v0+3] += state[mul1+3]
|
|
||||||
|
|
||||||
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
|
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
|
||||||
|
|
||||||
|
state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1]
|
||||||
|
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
||||||
|
state[v0+1] += state[mul1+1]
|
||||||
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
|
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
|
||||||
|
|
||||||
|
state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2]
|
||||||
|
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
||||||
|
state[v0+2] += state[mul1+2]
|
||||||
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
|
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
|
||||||
|
|
||||||
|
state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3]
|
||||||
|
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
||||||
|
state[v0+3] += state[mul1+3]
|
||||||
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
|
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
|
||||||
|
|
||||||
zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
// inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
||||||
zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
{
|
||||||
|
val0 := state[v1+0]
|
||||||
|
val1 := state[v1+1]
|
||||||
|
res := val0 & (0xff << (2 * 8))
|
||||||
|
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||||
|
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||||
|
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (val0 & (0xff << (1 * 8))) << 32
|
||||||
|
res2 += (val1 & 0xff) << 48
|
||||||
|
res += val0 << 56
|
||||||
|
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||||
|
|
||||||
zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
state[v0+0] += res
|
||||||
zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
state[v0+1] += res2
|
||||||
|
}
|
||||||
|
// zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
||||||
|
{
|
||||||
|
val0 := state[v1+2]
|
||||||
|
val1 := state[v1+3]
|
||||||
|
res := val0 & (0xff << (2 * 8))
|
||||||
|
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||||
|
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||||
|
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (val0 & (0xff << (1 * 8))) << 32
|
||||||
|
res2 += (val1 & 0xff) << 48
|
||||||
|
res += val0 << 56
|
||||||
|
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||||
|
|
||||||
|
state[v0+2] += res
|
||||||
|
state[v0+3] += res2
|
||||||
|
}
|
||||||
|
|
||||||
|
// inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
||||||
|
{
|
||||||
|
val0 := state[v0+0]
|
||||||
|
val1 := state[v0+1]
|
||||||
|
res := val0 & (0xff << (2 * 8))
|
||||||
|
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||||
|
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||||
|
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (val0 & (0xff << (1 * 8))) << 32
|
||||||
|
res2 += (val1 & 0xff) << 48
|
||||||
|
res += val0 << 56
|
||||||
|
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||||
|
|
||||||
|
state[v1+0] += res
|
||||||
|
state[v1+1] += res2
|
||||||
|
}
|
||||||
|
|
||||||
|
//inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
||||||
|
{
|
||||||
|
val0 := state[v0+2]
|
||||||
|
val1 := state[v0+3]
|
||||||
|
res := val0 & (0xff << (2 * 8))
|
||||||
|
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||||
|
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||||
|
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (val0 & (0xff << (1 * 8))) << 32
|
||||||
|
res2 += (val1 & 0xff) << 48
|
||||||
|
res += val0 << 56
|
||||||
|
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||||
|
|
||||||
|
state[v1+2] += res
|
||||||
|
state[v1+3] += res2
|
||||||
|
}
|
||||||
msg = msg[32:]
|
msg = msg[32:]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Experiments on variations left for future reference...
|
||||||
|
/*
|
||||||
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
|
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
|
||||||
m0 := v0 & (0xFF << (2 * 8))
|
if true {
|
||||||
m1 := (v1 & (0xFF << (7 * 8))) >> 8
|
// fastest. original interleaved...
|
||||||
m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16
|
res := v0 & (0xff << (2 * 8))
|
||||||
m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24
|
res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||||
m4 := (v0 & (0xFF << (1 * 8))) << 32
|
res += (v1 & (0xff << (7 * 8))) >> 8
|
||||||
m5 := v0 << 56
|
res2 += (v0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res2 += (v1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (v0 & (0xff << (1 * 8))) << 32
|
||||||
|
res2 += (v1 & 0xff) << 48
|
||||||
|
res += v0 << 56
|
||||||
|
res2 += (v1 & (0xff << (1 * 8))) << 24
|
||||||
|
|
||||||
*d0 += m0 + m1 + m2 + m3 + m4 + m5
|
*d0 += res
|
||||||
|
*d1 += res2
|
||||||
|
} else if false {
|
||||||
|
// Reading bytes and combining into uint64
|
||||||
|
var v0b [8]byte
|
||||||
|
binary.LittleEndian.PutUint64(v0b[:], v0)
|
||||||
|
var v1b [8]byte
|
||||||
|
binary.LittleEndian.PutUint64(v1b[:], v1)
|
||||||
|
var res, res2 uint64
|
||||||
|
|
||||||
m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8)))
|
res = uint64(v0b[0]) << (7 * 8)
|
||||||
m1 = (v0 & (0xFF << (6 * 8))) >> 8
|
res2 = uint64(v1b[0]) << (6 * 8)
|
||||||
m2 = (v1 & (0xFF << (5 * 8))) >> 16
|
res |= uint64(v0b[1]) << (5 * 8)
|
||||||
m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24
|
res2 |= uint64(v1b[1]) << (4 * 8)
|
||||||
m4 = (v1 & 0xFF) << 48
|
res |= uint64(v0b[2]) << (2 * 8)
|
||||||
m5 = (v1 & (0xFF << (1 * 8))) << 24
|
res2 |= uint64(v1b[2]) << (2 * 8)
|
||||||
|
res |= uint64(v0b[3])
|
||||||
|
res2 |= uint64(v0b[4]) << (1 * 8)
|
||||||
|
res |= uint64(v0b[5]) << (3 * 8)
|
||||||
|
res2 |= uint64(v0b[6]) << (5 * 8)
|
||||||
|
res |= uint64(v1b[4]) << (1 * 8)
|
||||||
|
res2 |= uint64(v0b[7]) << (7 * 8)
|
||||||
|
res |= uint64(v1b[6]) << (4 * 8)
|
||||||
|
res2 |= uint64(v1b[3])
|
||||||
|
res |= uint64(v1b[7]) << (6 * 8)
|
||||||
|
res2 |= uint64(v1b[5]) << (3 * 8)
|
||||||
|
|
||||||
*d1 += m3 + m2 + m5 + m1 + m4 + m0
|
*d0 += res
|
||||||
|
*d1 += res2
|
||||||
|
|
||||||
|
} else if false {
|
||||||
|
// bytes to bytes shuffle
|
||||||
|
var v0b [8]byte
|
||||||
|
binary.LittleEndian.PutUint64(v0b[:], v0)
|
||||||
|
var v1b [8]byte
|
||||||
|
binary.LittleEndian.PutUint64(v1b[:], v1)
|
||||||
|
var res [8]byte
|
||||||
|
|
||||||
|
//res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res[0] = v0b[3]
|
||||||
|
res[1] = v1b[4]
|
||||||
|
|
||||||
|
// res := v0 & (0xff << (2 * 8))
|
||||||
|
res[2] = v0b[2]
|
||||||
|
|
||||||
|
//res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res[3] = v0b[5]
|
||||||
|
res[4] = v1b[6]
|
||||||
|
|
||||||
|
//res += (v0 & (0xff << (1 * 8))) << 32
|
||||||
|
res[5] = v0b[1]
|
||||||
|
|
||||||
|
//res += (v1 & (0xff << (7 * 8))) >> 8
|
||||||
|
res[6] += v1b[7]
|
||||||
|
|
||||||
|
//res += v0 << 56
|
||||||
|
res[7] = v0b[0]
|
||||||
|
v0 = binary.LittleEndian.Uint64(res[:])
|
||||||
|
*d0 += v0
|
||||||
|
|
||||||
|
//res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res[0] = v1b[3]
|
||||||
|
res[1] = v0b[4]
|
||||||
|
|
||||||
|
res[2] = v1b[2]
|
||||||
|
|
||||||
|
// res += (v1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res[3] = v1b[5]
|
||||||
|
|
||||||
|
//res += (v1 & (0xff << (1 * 8))) << 24
|
||||||
|
res[4] = v1b[1]
|
||||||
|
|
||||||
|
// res += (v0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res[5] = v0b[6]
|
||||||
|
|
||||||
|
//res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||||
|
res[7] = v0b[7]
|
||||||
|
|
||||||
|
//res += (v1 & 0xff) << 48
|
||||||
|
res[6] = v1b[0]
|
||||||
|
|
||||||
|
v0 = binary.LittleEndian.Uint64(res[:])
|
||||||
|
*d1 += v0
|
||||||
|
} else {
|
||||||
|
// original.
|
||||||
|
res := v0 & (0xff << (2 * 8))
|
||||||
|
res += (v1 & (0xff << (7 * 8))) >> 8
|
||||||
|
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||||
|
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (v0 & (0xff << (1 * 8))) << 32
|
||||||
|
res += v0 << 56
|
||||||
|
|
||||||
|
*d0 += res
|
||||||
|
|
||||||
|
res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||||
|
res += (v0 & (0xff << (6 * 8))) >> 8
|
||||||
|
res += (v1 & (0xff << (5 * 8))) >> 16
|
||||||
|
res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||||
|
res += (v1 & 0xff) << 48
|
||||||
|
res += (v1 & (0xff << (1 * 8))) << 24
|
||||||
|
|
||||||
|
*d1 += res
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
|
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
|
||||||
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {
|
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,8 @@
|
||||||
// Use of this source code is governed by a license that can be
|
// Use of this source code is governed by a license that can be
|
||||||
// found in the LICENSE file.
|
// found in the LICENSE file.
|
||||||
|
|
||||||
//+build !noasm,!appengine
|
//go:build !noasm && !appengine
|
||||||
|
// +build !noasm,!appengine
|
||||||
|
|
||||||
package highwayhash
|
package highwayhash
|
||||||
|
|
||||||
|
|
@ -10,6 +11,8 @@ var (
|
||||||
useSSE4 = false
|
useSSE4 = false
|
||||||
useAVX2 = false
|
useAVX2 = false
|
||||||
useNEON = false
|
useNEON = false
|
||||||
|
useSVE = false
|
||||||
|
useSVE2 = false
|
||||||
useVMX = true
|
useVMX = true
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
// Use of this source code is governed by a license that can be
|
// Use of this source code is governed by a license that can be
|
||||||
// found in the LICENSE file.
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build noasm || (!amd64 && !arm64 && !ppc64le)
|
||||||
// +build noasm !amd64,!arm64,!ppc64le
|
// +build noasm !amd64,!arm64,!ppc64le
|
||||||
|
|
||||||
package highwayhash
|
package highwayhash
|
||||||
|
|
@ -10,6 +11,8 @@ var (
|
||||||
useSSE4 = false
|
useSSE4 = false
|
||||||
useAVX2 = false
|
useAVX2 = false
|
||||||
useNEON = false
|
useNEON = false
|
||||||
|
useSVE = false
|
||||||
|
useSVE2 = false
|
||||||
useVMX = false
|
useVMX = false
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -842,7 +842,7 @@ github.com/magiconair/properties
|
||||||
# github.com/matttproud/golang_protobuf_extensions v1.0.4
|
# github.com/matttproud/golang_protobuf_extensions v1.0.4
|
||||||
## explicit; go 1.9
|
## explicit; go 1.9
|
||||||
github.com/matttproud/golang_protobuf_extensions/pbutil
|
github.com/matttproud/golang_protobuf_extensions/pbutil
|
||||||
# github.com/minio/highwayhash v1.0.2
|
# github.com/minio/highwayhash v1.0.3
|
||||||
## explicit; go 1.15
|
## explicit; go 1.15
|
||||||
github.com/minio/highwayhash
|
github.com/minio/highwayhash
|
||||||
# github.com/mitchellh/go-homedir v1.1.0
|
# github.com/mitchellh/go-homedir v1.1.0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue