133 lines
4.4 KiB
ArmAsm
133 lines
4.4 KiB
ArmAsm
//
|
|
// Copyright (c) 2024 Minio Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
//+build !noasm,!appengine
|
|
|
|
#include "textflag.h"
|
|
|
|
TEXT ·getVectorLength(SB), NOSPLIT, $0
|
|
WORD $0xd2800002 // mov x2, #0
|
|
WORD $0x04225022 // addvl x2, x2, #1
|
|
WORD $0xd37df042 // lsl x2, x2, #3
|
|
WORD $0xd2800003 // mov x3, #0
|
|
WORD $0x04635023 // addpl x3, x3, #1
|
|
WORD $0xd37df063 // lsl x3, x3, #3
|
|
MOVD R2, vl+0(FP)
|
|
MOVD R3, pl+8(FP)
|
|
RET
|
|
|
|
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
|
|
MOVD state+0(FP), R0
|
|
MOVD msg_base+8(FP), R1
|
|
MOVD msg_len+16(FP), R2 // length of message
|
|
SUBS $32, R2
|
|
BMI completeSve
|
|
|
|
WORD $0x2518e3e1 // ptrue p1.b
|
|
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
|
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
|
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
|
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
|
|
|
// Load zipper merge constants table pointer
|
|
MOVD $·zipperMergeSve(SB), R3
|
|
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
|
WORD $0x25b8c006 // mov z6.s, #0
|
|
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
|
|
|
|
loopSve:
|
|
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
|
ADD $32, R1
|
|
|
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
|
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
|
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
|
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
|
|
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
|
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
|
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
|
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
|
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
|
|
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
|
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
|
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
|
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
|
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
|
|
|
SUBS $32, R2
|
|
BPL loopSve
|
|
|
|
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
|
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
|
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
|
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
|
|
|
completeSve:
|
|
RET
|
|
|
|
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
|
|
MOVD state+0(FP), R0
|
|
MOVD msg_base+8(FP), R1
|
|
MOVD msg_len+16(FP), R2 // length of message
|
|
SUBS $32, R2
|
|
BMI completeSve2
|
|
|
|
WORD $0x2518e3e1 // ptrue p1.b
|
|
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
|
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
|
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
|
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
|
|
|
// Load zipper merge constants table pointer
|
|
MOVD $·zipperMergeSve(SB), R3
|
|
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
|
|
|
loopSve2:
|
|
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
|
ADD $32, R1
|
|
|
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
|
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
|
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
|
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
|
|
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
|
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
|
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
|
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
|
|
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
|
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
|
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
|
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
|
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
|
|
|
SUBS $32, R2
|
|
BPL loopSve2
|
|
|
|
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
|
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
|
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
|
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
|
|
|
completeSve2:
|
|
RET
|
|
|
|
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
|
|
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
|
|
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
|
|
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
|
|
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32
|