chore(deps): bump github.com/minio/highwayhash from 1.0.2 to 1.0.3 (#3252)
Bumps [github.com/minio/highwayhash](https://github.com/minio/highwayhash) from 1.0.2 to 1.0.3. - [Release notes](https://github.com/minio/highwayhash/releases) - [Commits](https://github.com/minio/highwayhash/compare/v1.0.2...v1.0.3) --- updated-dependencies: - dependency-name: github.com/minio/highwayhash dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									7825b6bf4f
								
							
						
					
					
						commit
						725c6dacdc
					
				
							
								
								
									
										2
									
								
								go.mod
								
								
								
								
							
							
						
						
									
										2
									
								
								go.mod
								
								
								
								
							|  | @ -21,7 +21,7 @@ require ( | |||
| 	github.com/google/go-github v17.0.0+incompatible | ||||
| 	github.com/google/slowjam v1.1.1 | ||||
| 	github.com/karrick/godirwalk v1.16.1 | ||||
| 	github.com/minio/highwayhash v1.0.2 | ||||
| 	github.com/minio/highwayhash v1.0.3 | ||||
| 	github.com/moby/buildkit v0.14.1 | ||||
| 	github.com/otiai10/copy v1.14.0 | ||||
| 	github.com/pkg/errors v0.9.1 | ||||
|  |  | |||
							
								
								
									
										5
									
								
								go.sum
								
								
								
								
							
							
						
						
									
										5
									
								
								go.sum
								
								
								
								
							|  | @ -342,8 +342,8 @@ github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3v | |||
| github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= | ||||
| github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= | ||||
| github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= | ||||
| github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= | ||||
| github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= | ||||
| github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q= | ||||
| github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ= | ||||
| github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= | ||||
| github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= | ||||
| github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= | ||||
|  | @ -570,7 +570,6 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= | |||
| golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | ||||
| golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | ||||
|  |  | |||
|  | @ -12,13 +12,11 @@ linters: | |||
|     - goimports | ||||
|     - misspell | ||||
|     - govet | ||||
|     - golint | ||||
|     - revive | ||||
|     - ineffassign | ||||
|     - gosimple | ||||
|     - deadcode | ||||
|     - unparam | ||||
|     - unused | ||||
|     - structcheck | ||||
| 
 | ||||
| issues: | ||||
|   exclude-use-default: false | ||||
|  | @ -27,4 +25,4 @@ issues: | |||
|       - error strings should not be capitalized or end with punctuation or a newline | ||||
|       - should have comment           # TODO(aead): Remove once all exported ident. have comments! | ||||
| service: | ||||
|   golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly | ||||
|   golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly | ||||
|  |  | |||
|  | @ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small | |||
| 
 | ||||
| ### ARM Performance | ||||
| 
 | ||||
| Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs: | ||||
| Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs: | ||||
| 
 | ||||
| ``` | ||||
| BenchmarkSum256_16                 96.82 MB/s | ||||
| BenchmarkSum256_64                445.35 MB/s | ||||
| BenchmarkSum256_1K               2782.46 MB/s | ||||
| BenchmarkSum256_8K               4083.58 MB/s | ||||
| BenchmarkSum256_1M               4986.41 MB/s | ||||
| BenchmarkSum256_5M               4992.72 MB/s | ||||
| BenchmarkSum256_10M              4993.32 MB/s | ||||
| BenchmarkSum256_25M              4992.55 MB/s | ||||
| BenchmarkSum256_16                143.66 MB/s | ||||
| BenchmarkSum256_64                628.75 MB/s | ||||
| BenchmarkSum256_1K               3621.71 MB/s | ||||
| BenchmarkSum256_8K               5039.64 MB/s | ||||
| BenchmarkSum256_1M               5279.79 MB/s | ||||
| BenchmarkSum256_5M               5474.60 MB/s | ||||
| BenchmarkSum256_10M              5621.73 MB/s | ||||
| BenchmarkSum256_25M              5250.47 MB/s | ||||
| ``` | ||||
| 
 | ||||
| ### ppc64le Performance | ||||
|  |  | |||
|  | @ -0,0 +1,132 @@ | |||
| // | ||||
| // Copyright (c) 2024 Minio Inc. All rights reserved. | ||||
| // | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License. | ||||
| // You may obtain a copy of the License at | ||||
| // | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, software | ||||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| // See the License for the specific language governing permissions and | ||||
| // limitations under the License. | ||||
| // | ||||
| 
 | ||||
| //+build !noasm,!appengine | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| TEXT ·getVectorLength(SB), NOSPLIT, $0 | ||||
|     WORD $0xd2800002 // mov   x2, #0 | ||||
|     WORD $0x04225022 // addvl x2, x2, #1 | ||||
|     WORD $0xd37df042 // lsl   x2, x2, #3 | ||||
|     WORD $0xd2800003 // mov   x3, #0 | ||||
|     WORD $0x04635023 // addpl x3, x3, #1 | ||||
|     WORD $0xd37df063 // lsl   x3, x3, #3 | ||||
|     MOVD R2, vl+0(FP) | ||||
|     MOVD R3, pl+8(FP) | ||||
|     RET | ||||
| 
 | ||||
| TEXT ·updateArm64Sve(SB), NOSPLIT, $0 | ||||
|     MOVD state+0(FP), R0 | ||||
|     MOVD msg_base+8(FP), R1 | ||||
|     MOVD msg_len+16(FP), R2 // length of message | ||||
|     SUBS $32, R2 | ||||
|     BMI  completeSve | ||||
| 
 | ||||
|     WORD $0x2518e3e1 // ptrue p1.b | ||||
|     WORD $0xa5e0a401 // ld1d  z1.d, p1/z, [x0] | ||||
|     WORD $0xa5e1a402 // ld1d  z2.d, p1/z, [x0, #1, MUL VL] | ||||
|     WORD $0xa5e2a403 // ld1d  z3.d, p1/z, [x0, #2, MUL VL] | ||||
|     WORD $0xa5e3a404 // ld1d  z4.d, p1/z, [x0, #3, MUL VL] | ||||
| 
 | ||||
|     // Load zipper merge constants table pointer | ||||
|     MOVD $·zipperMergeSve(SB), R3 | ||||
|     WORD $0xa5e0a465 // ld1d  z5.d, p1/z, [x3] | ||||
|     WORD $0x25b8c006 // mov   z6.s, #0 | ||||
|     WORD $0x25d8e3e2 // ptrue p2.d              /* set every other lane for "s" type */ | ||||
| 
 | ||||
| loopSve: | ||||
|     WORD $0xa5e0a420 // ld1d  z0.d, p1/z, [x1] | ||||
|     ADD  $32, R1 | ||||
| 
 | ||||
|     WORD $0x04e00042 // add z2.d, z2.d, z0.d | ||||
|     WORD $0x04e30042 // add z2.d, z2.d, z3.d | ||||
|     WORD $0x04e09420 // lsr z0.d, z1.d, #32 | ||||
|     WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s | ||||
|     WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d | ||||
|     WORD $0x04a33003 // eor z3.d, z0.d, z3.d | ||||
|     WORD $0x04e10081 // add z1.d, z4.d, z1.d | ||||
|     WORD $0x04e09440 // lsr z0.d, z2.d, #32 | ||||
|     WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s | ||||
|     WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d | ||||
|     WORD $0x04a43004 // eor z4.d, z0.d, z4.d | ||||
|     WORD $0x05253040 // tbl z0.b, z2.b, z5.b | ||||
|     WORD $0x04e00021 // add z1.d, z1.d, z0.d | ||||
|     WORD $0x05253020 // tbl z0.b, z1.b, z5.b | ||||
|     WORD $0x04e00042 // add z2.d, z2.d, z0.d | ||||
| 
 | ||||
|     SUBS $32, R2 | ||||
|     BPL  loopSve | ||||
| 
 | ||||
|     WORD $0xe5e0e401 // st1d z1.d, p1, [x0] | ||||
|     WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL] | ||||
|     WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL] | ||||
|     WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL] | ||||
| 
 | ||||
| completeSve: | ||||
|     RET | ||||
| 
 | ||||
| TEXT ·updateArm64Sve2(SB), NOSPLIT, $0 | ||||
|     MOVD state+0(FP), R0 | ||||
|     MOVD msg_base+8(FP), R1 | ||||
|     MOVD msg_len+16(FP), R2 // length of message | ||||
|     SUBS $32, R2 | ||||
|     BMI  completeSve2 | ||||
| 
 | ||||
|     WORD $0x2518e3e1 // ptrue p1.b | ||||
|     WORD $0xa5e0a401 // ld1d  z1.d, p1/z, [x0] | ||||
|     WORD $0xa5e1a402 // ld1d  z2.d, p1/z, [x0, #1, MUL VL] | ||||
|     WORD $0xa5e2a403 // ld1d  z3.d, p1/z, [x0, #2, MUL VL] | ||||
|     WORD $0xa5e3a404 // ld1d  z4.d, p1/z, [x0, #3, MUL VL] | ||||
| 
 | ||||
|     // Load zipper merge constants table pointer | ||||
|     MOVD $·zipperMergeSve(SB), R3 | ||||
|     WORD $0xa5e0a465 // ld1d  z5.d, p1/z, [x3] | ||||
| 
 | ||||
| loopSve2: | ||||
|     WORD $0xa5e0a420 // ld1d  z0.d, p1/z, [x1] | ||||
|     ADD  $32, R1 | ||||
| 
 | ||||
|     WORD $0x04e00042 // add z2.d, z2.d, z0.d | ||||
|     WORD $0x04e30042 // add z2.d, z2.d, z3.d | ||||
|     WORD $0x04e09420 // lsr z0.d, z1.d, #32 | ||||
|     WORD $0x45c27800 // umullb z0.d, z0.s, z2.s | ||||
|     WORD $0x04a33003 // eor z3.d, z0.d, z3.d | ||||
|     WORD $0x04e10081 // add z1.d, z4.d, z1.d | ||||
|     WORD $0x04e09440 // lsr z0.d, z2.d, #32 | ||||
|     WORD $0x45c17800 // umullb z0.d, z0.s, z1.s | ||||
|     WORD $0x04a43004 // eor z4.d, z0.d, z4.d | ||||
|     WORD $0x05253040 // tbl z0.b, z2.b, z5.b | ||||
|     WORD $0x04e00021 // add z1.d, z1.d, z0.d | ||||
|     WORD $0x05253020 // tbl z0.b, z1.b, z5.b | ||||
|     WORD $0x04e00042 // add z2.d, z2.d, z0.d | ||||
| 
 | ||||
|     SUBS $32, R2 | ||||
|     BPL  loopSve2 | ||||
| 
 | ||||
|     WORD $0xe5e0e401 // st1d z1.d, p1, [x0] | ||||
|     WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL] | ||||
|     WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL] | ||||
|     WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL] | ||||
| 
 | ||||
| completeSve2: | ||||
|     RET | ||||
| 
 | ||||
| DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03 | ||||
| DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b | ||||
| DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13 | ||||
| DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b | ||||
| GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32 | ||||
|  | @ -2,6 +2,7 @@ | |||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| //go:build amd64 && !gccgo && !appengine && !nacl && !noasm
 | ||||
| // +build amd64,!gccgo,!appengine,!nacl,!noasm
 | ||||
| 
 | ||||
| package highwayhash | ||||
|  | @ -12,6 +13,8 @@ var ( | |||
| 	useSSE4 = cpu.X86.HasSSE41 | ||||
| 	useAVX2 = cpu.X86.HasAVX2 | ||||
| 	useNEON = false | ||||
| 	useSVE  = false | ||||
| 	useSVE2 = false | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,24 +1,54 @@ | |||
| // Copyright (c) 2017 Minio Inc. All rights reserved.
 | ||||
| // Copyright (c) 2017-2024 Minio Inc. All rights reserved.
 | ||||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| //+build !noasm,!appengine
 | ||||
| //go:build !noasm && !appengine
 | ||||
| // +build !noasm,!appengine
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
| import ( | ||||
| 	"golang.org/x/sys/cpu" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE4 = false | ||||
| 	useAVX2 = false | ||||
| 	useNEON = true | ||||
| 	useNEON = cpu.ARM64.HasASIMD | ||||
| 	useSVE  = cpu.ARM64.HasSVE | ||||
| 	useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
 | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
| func init() { | ||||
| 	if useSVE { | ||||
| 		if vl, _ := getVectorLength(); vl != 256 { | ||||
| 			//
 | ||||
| 			// Since HighwahHash is designed for AVX2,
 | ||||
| 			// SVE/SVE2 instructions only run correctly
 | ||||
| 			// for vector length of 256
 | ||||
| 			//
 | ||||
| 			useSVE2 = false | ||||
| 			useSVE = false | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func initializeArm64(state *[16]uint64, key []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateArm64(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func getVectorLength() (vl, pl uint64) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateArm64Sve(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func updateArm64Sve2(state *[16]uint64, msg []byte) | ||||
| 
 | ||||
| //go:noescape
 | ||||
| func finalizeArm64(out []byte, state *[16]uint64) | ||||
| 
 | ||||
|  | @ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) { | |||
| } | ||||
| 
 | ||||
| func update(state *[16]uint64, msg []byte) { | ||||
| 	if useNEON { | ||||
| 	if useSVE2 { | ||||
| 		updateArm64Sve2(state, msg) | ||||
| 	} else if useSVE { | ||||
| 		updateArm64Sve(state, msg) | ||||
| 	} else if useNEON { | ||||
| 		updateArm64(state, msg) | ||||
| 	} else { | ||||
| 		updateGeneric(state, msg) | ||||
|  |  | |||
|  | @ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) { | |||
| } | ||||
| 
 | ||||
| func updateGeneric(state *[16]uint64, msg []byte) { | ||||
| 	for len(msg) > 0 { | ||||
| 		// add message
 | ||||
| 		state[v1+0] += binary.LittleEndian.Uint64(msg) | ||||
| 		state[v1+1] += binary.LittleEndian.Uint64(msg[8:]) | ||||
| 		state[v1+2] += binary.LittleEndian.Uint64(msg[16:]) | ||||
| 		state[v1+3] += binary.LittleEndian.Uint64(msg[24:]) | ||||
| 
 | ||||
| 		// v1 += mul0
 | ||||
| 		state[v1+0] += state[mul0+0] | ||||
| 		state[v1+1] += state[mul0+1] | ||||
| 		state[v1+2] += state[mul0+2] | ||||
| 		state[v1+3] += state[mul0+3] | ||||
| 	for len(msg) >= 32 { | ||||
| 		m := msg[:32] | ||||
| 
 | ||||
| 		// add message + mul0
 | ||||
| 		// Interleave operations to hide multiplication
 | ||||
| 		state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0] | ||||
| 		state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32) | ||||
| 		state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) | ||||
| 		state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) | ||||
| 		state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) | ||||
| 
 | ||||
| 		// v0 += mul1
 | ||||
| 		state[v0+0] += state[mul1+0] | ||||
| 		state[v0+1] += state[mul1+1] | ||||
| 		state[v0+2] += state[mul1+2] | ||||
| 		state[v0+3] += state[mul1+3] | ||||
| 
 | ||||
| 		state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32) | ||||
| 
 | ||||
| 		state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1] | ||||
| 		state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) | ||||
| 		state[v0+1] += state[mul1+1] | ||||
| 		state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32) | ||||
| 
 | ||||
| 		state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2] | ||||
| 		state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) | ||||
| 		state[v0+2] += state[mul1+2] | ||||
| 		state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32) | ||||
| 
 | ||||
| 		state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3] | ||||
| 		state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) | ||||
| 		state[v0+3] += state[mul1+3] | ||||
| 		state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32) | ||||
| 
 | ||||
| 		zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) | ||||
| 		zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) | ||||
| 		// inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
 | ||||
| 		{ | ||||
| 			val0 := state[v1+0] | ||||
| 			val1 := state[v1+1] | ||||
| 			res := val0 & (0xff << (2 * 8)) | ||||
| 			res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) | ||||
| 			res += (val1 & (0xff << (7 * 8))) >> 8 | ||||
| 			res2 += (val0 & (0xff << (6 * 8))) >> 8 | ||||
| 			res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 | ||||
| 			res2 += (val1 & (0xff << (5 * 8))) >> 16 | ||||
| 			res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res += (val0 & (0xff << (1 * 8))) << 32 | ||||
| 			res2 += (val1 & 0xff) << 48 | ||||
| 			res += val0 << 56 | ||||
| 			res2 += (val1 & (0xff << (1 * 8))) << 24 | ||||
| 
 | ||||
| 		zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) | ||||
| 		zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) | ||||
| 			state[v0+0] += res | ||||
| 			state[v0+1] += res2 | ||||
| 		} | ||||
| 		// zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
 | ||||
| 		{ | ||||
| 			val0 := state[v1+2] | ||||
| 			val1 := state[v1+3] | ||||
| 			res := val0 & (0xff << (2 * 8)) | ||||
| 			res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) | ||||
| 			res += (val1 & (0xff << (7 * 8))) >> 8 | ||||
| 			res2 += (val0 & (0xff << (6 * 8))) >> 8 | ||||
| 			res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 | ||||
| 			res2 += (val1 & (0xff << (5 * 8))) >> 16 | ||||
| 			res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res += (val0 & (0xff << (1 * 8))) << 32 | ||||
| 			res2 += (val1 & 0xff) << 48 | ||||
| 			res += val0 << 56 | ||||
| 			res2 += (val1 & (0xff << (1 * 8))) << 24 | ||||
| 
 | ||||
| 			state[v0+2] += res | ||||
| 			state[v0+3] += res2 | ||||
| 		} | ||||
| 
 | ||||
| 		// inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
 | ||||
| 		{ | ||||
| 			val0 := state[v0+0] | ||||
| 			val1 := state[v0+1] | ||||
| 			res := val0 & (0xff << (2 * 8)) | ||||
| 			res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) | ||||
| 			res += (val1 & (0xff << (7 * 8))) >> 8 | ||||
| 			res2 += (val0 & (0xff << (6 * 8))) >> 8 | ||||
| 			res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 | ||||
| 			res2 += (val1 & (0xff << (5 * 8))) >> 16 | ||||
| 			res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res += (val0 & (0xff << (1 * 8))) << 32 | ||||
| 			res2 += (val1 & 0xff) << 48 | ||||
| 			res += val0 << 56 | ||||
| 			res2 += (val1 & (0xff << (1 * 8))) << 24 | ||||
| 
 | ||||
| 			state[v1+0] += res | ||||
| 			state[v1+1] += res2 | ||||
| 		} | ||||
| 
 | ||||
| 		//inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
 | ||||
| 		{ | ||||
| 			val0 := state[v0+2] | ||||
| 			val1 := state[v0+3] | ||||
| 			res := val0 & (0xff << (2 * 8)) | ||||
| 			res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) | ||||
| 			res += (val1 & (0xff << (7 * 8))) >> 8 | ||||
| 			res2 += (val0 & (0xff << (6 * 8))) >> 8 | ||||
| 			res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 | ||||
| 			res2 += (val1 & (0xff << (5 * 8))) >> 16 | ||||
| 			res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 | ||||
| 			res += (val0 & (0xff << (1 * 8))) << 32 | ||||
| 			res2 += (val1 & 0xff) << 48 | ||||
| 			res += val0 << 56 | ||||
| 			res2 += (val1 & (0xff << (1 * 8))) << 24 | ||||
| 
 | ||||
| 			state[v1+2] += res | ||||
| 			state[v1+3] += res2 | ||||
| 		} | ||||
| 		msg = msg[32:] | ||||
| 	} | ||||
| } | ||||
|  | @ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) { | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Experiments on variations left for future reference...
 | ||||
| /* | ||||
| func zipperMerge(v0, v1 uint64, d0, d1 *uint64) { | ||||
| 	m0 := v0 & (0xFF << (2 * 8)) | ||||
| 	m1 := (v1 & (0xFF << (7 * 8))) >> 8 | ||||
| 	m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16 | ||||
| 	m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24 | ||||
| 	m4 := (v0 & (0xFF << (1 * 8))) << 32 | ||||
| 	m5 := v0 << 56 | ||||
| 	if true { | ||||
| 		// fastest. original interleaved...
 | ||||
| 		res := v0 & (0xff << (2 * 8)) | ||||
| 		res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) | ||||
| 		res += (v1 & (0xff << (7 * 8))) >> 8 | ||||
| 		res2 += (v0 & (0xff << (6 * 8))) >> 8 | ||||
| 		res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 | ||||
| 		res2 += (v1 & (0xff << (5 * 8))) >> 16 | ||||
| 		res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 | ||||
| 		res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 | ||||
| 		res += (v0 & (0xff << (1 * 8))) << 32 | ||||
| 		res2 += (v1 & 0xff) << 48 | ||||
| 		res += v0 << 56 | ||||
| 		res2 += (v1 & (0xff << (1 * 8))) << 24 | ||||
| 
 | ||||
| 	*d0 += m0 + m1 + m2 + m3 + m4 + m5 | ||||
| 		*d0 += res | ||||
| 		*d1 += res2 | ||||
| 	} else if false { | ||||
| 		// Reading bytes and combining into uint64
 | ||||
| 		var v0b [8]byte | ||||
| 		binary.LittleEndian.PutUint64(v0b[:], v0) | ||||
| 		var v1b [8]byte | ||||
| 		binary.LittleEndian.PutUint64(v1b[:], v1) | ||||
| 		var res, res2 uint64 | ||||
| 
 | ||||
| 	m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8))) | ||||
| 	m1 = (v0 & (0xFF << (6 * 8))) >> 8 | ||||
| 	m2 = (v1 & (0xFF << (5 * 8))) >> 16 | ||||
| 	m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24 | ||||
| 	m4 = (v1 & 0xFF) << 48 | ||||
| 	m5 = (v1 & (0xFF << (1 * 8))) << 24 | ||||
| 		res = uint64(v0b[0]) << (7 * 8) | ||||
| 		res2 = uint64(v1b[0]) << (6 * 8) | ||||
| 		res |= uint64(v0b[1]) << (5 * 8) | ||||
| 		res2 |= uint64(v1b[1]) << (4 * 8) | ||||
| 		res |= uint64(v0b[2]) << (2 * 8) | ||||
| 		res2 |= uint64(v1b[2]) << (2 * 8) | ||||
| 		res |= uint64(v0b[3]) | ||||
| 		res2 |= uint64(v0b[4]) << (1 * 8) | ||||
| 		res |= uint64(v0b[5]) << (3 * 8) | ||||
| 		res2 |= uint64(v0b[6]) << (5 * 8) | ||||
| 		res |= uint64(v1b[4]) << (1 * 8) | ||||
| 		res2 |= uint64(v0b[7]) << (7 * 8) | ||||
| 		res |= uint64(v1b[6]) << (4 * 8) | ||||
| 		res2 |= uint64(v1b[3]) | ||||
| 		res |= uint64(v1b[7]) << (6 * 8) | ||||
| 		res2 |= uint64(v1b[5]) << (3 * 8) | ||||
| 
 | ||||
| 	*d1 += m3 + m2 + m5 + m1 + m4 + m0 | ||||
| 		*d0 += res | ||||
| 		*d1 += res2 | ||||
| 
 | ||||
| 	} else if false { | ||||
| 		// bytes to bytes shuffle
 | ||||
| 		var v0b [8]byte | ||||
| 		binary.LittleEndian.PutUint64(v0b[:], v0) | ||||
| 		var v1b [8]byte | ||||
| 		binary.LittleEndian.PutUint64(v1b[:], v1) | ||||
| 		var res [8]byte | ||||
| 
 | ||||
| 		//res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
 | ||||
| 		res[0] = v0b[3] | ||||
| 		res[1] = v1b[4] | ||||
| 
 | ||||
| 		// res := v0 & (0xff << (2 * 8))
 | ||||
| 		res[2] = v0b[2] | ||||
| 
 | ||||
| 		//res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
 | ||||
| 		res[3] = v0b[5] | ||||
| 		res[4] = v1b[6] | ||||
| 
 | ||||
| 		//res += (v0 & (0xff << (1 * 8))) << 32
 | ||||
| 		res[5] = v0b[1] | ||||
| 
 | ||||
| 		//res += (v1 & (0xff << (7 * 8))) >> 8
 | ||||
| 		res[6] += v1b[7] | ||||
| 
 | ||||
| 		//res += v0 << 56
 | ||||
| 		res[7] = v0b[0] | ||||
| 		v0 = binary.LittleEndian.Uint64(res[:]) | ||||
| 		*d0 += v0 | ||||
| 
 | ||||
| 		//res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
 | ||||
| 		res[0] = v1b[3] | ||||
| 		res[1] = v0b[4] | ||||
| 
 | ||||
| 		res[2] = v1b[2] | ||||
| 
 | ||||
| 		// res += (v1 & (0xff << (5 * 8))) >> 16
 | ||||
| 		res[3] = v1b[5] | ||||
| 
 | ||||
| 		//res += (v1 & (0xff << (1 * 8))) << 24
 | ||||
| 		res[4] = v1b[1] | ||||
| 
 | ||||
| 		// res += (v0 & (0xff << (6 * 8))) >> 8
 | ||||
| 		res[5] = v0b[6] | ||||
| 
 | ||||
| 		//res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
 | ||||
| 		res[7] = v0b[7] | ||||
| 
 | ||||
| 		//res += (v1 & 0xff) << 48
 | ||||
| 		res[6] = v1b[0] | ||||
| 
 | ||||
| 		v0 = binary.LittleEndian.Uint64(res[:]) | ||||
| 		*d1 += v0 | ||||
| 	} else { | ||||
| 		// original.
 | ||||
| 		res := v0 & (0xff << (2 * 8)) | ||||
| 		res += (v1 & (0xff << (7 * 8))) >> 8 | ||||
| 		res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 | ||||
| 		res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 | ||||
| 		res += (v0 & (0xff << (1 * 8))) << 32 | ||||
| 		res += v0 << 56 | ||||
| 
 | ||||
| 		*d0 += res | ||||
| 
 | ||||
| 		res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) | ||||
| 		res += (v0 & (0xff << (6 * 8))) >> 8 | ||||
| 		res += (v1 & (0xff << (5 * 8))) >> 16 | ||||
| 		res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 | ||||
| 		res += (v1 & 0xff) << 48 | ||||
| 		res += (v1 & (0xff << (1 * 8))) << 24 | ||||
| 
 | ||||
| 		*d1 += res | ||||
| 	} | ||||
| } | ||||
| */ | ||||
| 
 | ||||
| // reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
 | ||||
| func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) { | ||||
|  |  | |||
|  | @ -2,7 +2,8 @@ | |||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| //+build !noasm,!appengine
 | ||||
| //go:build !noasm && !appengine
 | ||||
| // +build !noasm,!appengine
 | ||||
| 
 | ||||
| package highwayhash | ||||
| 
 | ||||
|  | @ -10,6 +11,8 @@ var ( | |||
| 	useSSE4 = false | ||||
| 	useAVX2 = false | ||||
| 	useNEON = false | ||||
| 	useSVE  = false | ||||
| 	useSVE2 = false | ||||
| 	useVMX  = true | ||||
| ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -2,6 +2,7 @@ | |||
| // Use of this source code is governed by a license that can be
 | ||||
| // found in the LICENSE file.
 | ||||
| 
 | ||||
| //go:build noasm || (!amd64 && !arm64 && !ppc64le)
 | ||||
| // +build noasm !amd64,!arm64,!ppc64le
 | ||||
| 
 | ||||
| package highwayhash | ||||
|  | @ -10,6 +11,8 @@ var ( | |||
| 	useSSE4 = false | ||||
| 	useAVX2 = false | ||||
| 	useNEON = false | ||||
| 	useSVE  = false | ||||
| 	useSVE2 = false | ||||
| 	useVMX  = false | ||||
| ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -842,7 +842,7 @@ github.com/magiconair/properties | |||
| # github.com/matttproud/golang_protobuf_extensions v1.0.4 | ||||
| ## explicit; go 1.9 | ||||
| github.com/matttproud/golang_protobuf_extensions/pbutil | ||||
| # github.com/minio/highwayhash v1.0.2 | ||||
| # github.com/minio/highwayhash v1.0.3 | ||||
| ## explicit; go 1.15 | ||||
| github.com/minio/highwayhash | ||||
| # github.com/mitchellh/go-homedir v1.1.0 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue