kaniko/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s

133 lines
4.4 KiB
ArmAsm

//
// Copyright (c) 2024 Minio Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//+build !noasm,!appengine
#include "textflag.h"
TEXT ·getVectorLength(SB), NOSPLIT, $0
WORD $0xd2800002 // mov x2, #0
WORD $0x04225022 // addvl x2, x2, #1
WORD $0xd37df042 // lsl x2, x2, #3
WORD $0xd2800003 // mov x3, #0
WORD $0x04635023 // addpl x3, x3, #1
WORD $0xd37df063 // lsl x3, x3, #3
MOVD R2, vl+0(FP)
MOVD R3, pl+8(FP)
RET
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve
WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
WORD $0x25b8c006 // mov z6.s, #0
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
loopSve:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1
WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d
SUBS $32, R2
BPL loopSve
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
completeSve:
RET
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve2
WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
loopSve2:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1
WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d
SUBS $32, R2
BPL loopSve2
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
completeSve2:
RET
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32