/*
 * Decompiled with CFR 0.152.
 */
package com.oracle.svm.core.graal.amd64;

import java.util.EnumSet;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import jdk.vm.ci.meta.ValueKind;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64ComplexVectorOp;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="AMD64_COPY_LONGS")
public final class AMD64CopyLongsOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64CopyLongsOp> TYPE = LIRInstructionClass.create(AMD64CopyLongsOp.class);
    private final boolean forward;
    private final int useAVX3Threshold;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rsrc;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rdst;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rlen;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rsrcTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rdstTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rlenTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rtmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] vtmp;

    public AMD64CopyLongsOp(LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, int useAVX3Threshold, boolean forward, Value src, Value dst, Value len) {
        super(TYPE, tool, runtimeCheckedCPUFeatures, AVXKind.AVXSize.ZMM);
        this.forward = forward;
        assert (CodeUtil.isPowerOf2((int)useAVX3Threshold)) : "AVX3Threshold must be power of 2";
        this.useAVX3Threshold = useAVX3Threshold;
        assert (ValueUtil.asRegister((Value)src).equals((Object)AMD64.rsi));
        assert (ValueUtil.asRegister((Value)dst).equals((Object)AMD64.rdi));
        assert (ValueUtil.asRegister((Value)len).equals((Object)AMD64.rdx));
        this.rsrcTemp = this.rsrc = src;
        this.rdstTemp = this.rdst = dst;
        this.rlenTemp = this.rlen = len;
        this.rtmp = tool.newVariable((ValueKind)LIRKind.value((PlatformKind)AMD64Kind.QWORD));
        this.vtmp = this.allocateVectorRegisters(tool, JavaKind.Byte, 4);
    }

    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        if (this.forward) {
            this.emitCopyForward(masm);
        } else {
            this.emitCopyBackward(masm);
        }
    }

    private void emitCopyForward(AMD64MacroAssembler masm) {
        Label copyBytes = new Label();
        Label copy8Bytes = new Label();
        Label exit = new Label();
        Register src = ValueUtil.asRegister((Value)this.rsrc);
        Register dst = ValueUtil.asRegister((Value)this.rdst);
        Register len = ValueUtil.asRegister((Value)this.rlen);
        Register tmp = ValueUtil.asRegister((Value)this.rtmp);
        masm.leaq(src, new AMD64Address(src, len, Stride.S1, -8));
        masm.leaq(dst, new AMD64Address(dst, len, Stride.S1, -8));
        masm.shrq(len, 3);
        masm.negq(len);
        masm.jmp(copyBytes);
        masm.bind(copy8Bytes);
        masm.movq(tmp, new AMD64Address(src, len, Stride.S8, 8));
        masm.movq(new AMD64Address(dst, len, Stride.S8, 8), tmp);
        masm.incqAndJcc(len, AMD64Assembler.ConditionFlag.NotZero, copy8Bytes, true);
        masm.jmp(exit);
        this.emitCopyForward(masm, src, dst, len, tmp, copyBytes, copy8Bytes);
        masm.bind(exit);
    }

    private void emitCopyForward(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp, Label copyBytes, Label copy8Bytes) {
        Label loop = new Label();
        masm.align(16);
        if (this.supports(AMD64.CPUFeature.AVX)) {
            Register tmp0 = ValueUtil.asRegister((Value)this.vtmp[0]);
            Register tmp1 = ValueUtil.asRegister((Value)this.vtmp[1]);
            Register tmp2 = ValueUtil.asRegister((Value)this.vtmp[2]);
            Register tmp3 = ValueUtil.asRegister((Value)this.vtmp[3]);
            Label end = new Label();
            if (this.supportsAVX512VLBWAndZMM()) {
                Label avx512Loop = new Label();
                Label avx2Loop = new Label();
                Label copy32Bytes = new Label();
                Label aboveThreshold = new Label();
                Label belowThreshold = new Label();
                masm.bind(copyBytes);
                masm.cmpqAndJcc(len, -this.useAVX3Threshold / 8, AMD64Assembler.ConditionFlag.Less, aboveThreshold, true);
                masm.jmpb(belowThreshold);
                masm.bind(avx512Loop);
                masm.vmovdqu64(tmp0, new AMD64Address(src, len, Stride.S8, -56));
                masm.vmovdqu64(new AMD64Address(dst, len, Stride.S8, -56), tmp0);
                masm.bind(aboveThreshold);
                masm.addqAndJcc(len, 8, AMD64Assembler.ConditionFlag.LessEqual, avx512Loop, true);
                masm.jmpb(copy32Bytes);
                masm.bind(avx2Loop);
                masm.vmovdqu(tmp0, new AMD64Address(src, len, Stride.S8, -56));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, -56), tmp0);
                masm.vmovdqu(tmp1, new AMD64Address(src, len, Stride.S8, -24));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, -24), tmp1);
                masm.bind(belowThreshold);
                masm.addqAndJcc(len, 8, AMD64Assembler.ConditionFlag.LessEqual, avx2Loop, true);
                masm.bind(copy32Bytes);
                masm.subqAndJcc(len, 4, AMD64Assembler.ConditionFlag.Greater, end, true);
            } else {
                masm.bind(loop);
                if (this.supportsAVX2AndYMM()) {
                    masm.vmovdqu(tmp0, new AMD64Address(src, len, Stride.S8, -56));
                    masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, -56), tmp0);
                    masm.vmovdqu(tmp1, new AMD64Address(src, len, Stride.S8, -24));
                    masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, -24), tmp1);
                } else {
                    masm.movdqu(tmp0, new AMD64Address(src, len, Stride.S8, -56));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, -56), tmp0);
                    masm.movdqu(tmp1, new AMD64Address(src, len, Stride.S8, -40));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, -40), tmp1);
                    masm.movdqu(tmp2, new AMD64Address(src, len, Stride.S8, -24));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, -24), tmp2);
                    masm.movdqu(tmp3, new AMD64Address(src, len, Stride.S8, -8));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, -8), tmp3);
                }
                masm.bind(copyBytes);
                masm.addqAndJcc(len, 8, AMD64Assembler.ConditionFlag.LessEqual, loop, true);
                masm.subqAndJcc(len, 4, AMD64Assembler.ConditionFlag.Greater, end, true);
            }
            if (this.supportsAVX2AndYMM()) {
                masm.vmovdqu(tmp0, new AMD64Address(src, len, Stride.S8, -24));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, -24), tmp0);
            } else {
                masm.movdqu(tmp0, new AMD64Address(src, len, Stride.S8, -24));
                masm.movdqu(new AMD64Address(dst, len, Stride.S8, -24), tmp0);
                masm.movdqu(tmp1, new AMD64Address(src, len, Stride.S8, -8));
                masm.movdqu(new AMD64Address(dst, len, Stride.S8, -8), tmp1);
            }
            masm.addq(len, 4);
            masm.bind(end);
        } else {
            masm.bind(loop);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, -24));
            masm.movq(new AMD64Address(dst, len, Stride.S8, -24), tmp);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, -16));
            masm.movq(new AMD64Address(dst, len, Stride.S8, -16), tmp);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, -8));
            masm.movq(new AMD64Address(dst, len, Stride.S8, -8), tmp);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, 0));
            masm.movq(new AMD64Address(dst, len, Stride.S8, 0), tmp);
            masm.bind(copyBytes);
            masm.addqAndJcc(len, 4, AMD64Assembler.ConditionFlag.LessEqual, loop, true);
        }
        masm.subqAndJcc(len, 4, AMD64Assembler.ConditionFlag.Less, copy8Bytes, false);
    }

    private void emitCopyBackward(AMD64MacroAssembler masm) {
        Label copyBytes = new Label();
        Label copy8Bytes = new Label();
        Label exit = new Label();
        Register src = ValueUtil.asRegister((Value)this.rsrc);
        Register dst = ValueUtil.asRegister((Value)this.rdst);
        Register len = ValueUtil.asRegister((Value)this.rlen);
        Register tmp = ValueUtil.asRegister((Value)this.rtmp);
        masm.shrq(len, 3);
        masm.jmp(copyBytes);
        masm.bind(copy8Bytes);
        masm.movq(tmp, new AMD64Address(src, len, Stride.S8, -8));
        masm.movq(new AMD64Address(dst, len, Stride.S8, -8), tmp);
        masm.decqAndJcc(len, AMD64Assembler.ConditionFlag.NotZero, copy8Bytes, true);
        masm.jmp(exit);
        this.emitCopyBackward(masm, src, dst, len, tmp, copyBytes, copy8Bytes);
        masm.bind(exit);
    }

    private void emitCopyBackward(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp, Label copyBytes, Label copy8Bytes) {
        Label loop = new Label();
        masm.align(16);
        if (this.supports(AMD64.CPUFeature.AVX)) {
            Register tmp0 = ValueUtil.asRegister((Value)this.vtmp[0]);
            Register tmp1 = ValueUtil.asRegister((Value)this.vtmp[1]);
            Register tmp2 = ValueUtil.asRegister((Value)this.vtmp[2]);
            Register tmp3 = ValueUtil.asRegister((Value)this.vtmp[3]);
            Label end = new Label();
            if (this.supportsAVX512VLBWAndZMM()) {
                Label avx512Loop = new Label();
                Label avx2Loop = new Label();
                Label copy32Bytes = new Label();
                Label aboveThreshold = new Label();
                Label belowThreshold = new Label();
                masm.bind(copyBytes);
                masm.cmpqAndJcc(len, this.useAVX3Threshold / 8, AMD64Assembler.ConditionFlag.Greater, aboveThreshold, true);
                masm.jmpb(belowThreshold);
                masm.bind(avx512Loop);
                masm.vmovdqu64(tmp0, new AMD64Address(src, len, Stride.S8, 0));
                masm.vmovdqu64(new AMD64Address(dst, len, Stride.S8, 0), tmp0);
                masm.bind(aboveThreshold);
                masm.subqAndJcc(len, 8, AMD64Assembler.ConditionFlag.GreaterEqual, avx512Loop, true);
                masm.jmpb(copy32Bytes);
                masm.bind(avx2Loop);
                masm.vmovdqu(tmp0, new AMD64Address(src, len, Stride.S8, 32));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, 32), tmp0);
                masm.vmovdqu(tmp1, new AMD64Address(src, len, Stride.S8, 0));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, 0), tmp1);
                masm.bind(belowThreshold);
                masm.subqAndJcc(len, 8, AMD64Assembler.ConditionFlag.GreaterEqual, avx2Loop, true);
                masm.bind(copy32Bytes);
                masm.addqAndJcc(len, 4, AMD64Assembler.ConditionFlag.Less, end, true);
            } else {
                masm.bind(loop);
                if (this.supportsAVX2AndYMM()) {
                    masm.vmovdqu(tmp0, new AMD64Address(src, len, Stride.S8, 32));
                    masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, 32), tmp0);
                    masm.vmovdqu(tmp1, new AMD64Address(src, len, Stride.S8, 0));
                    masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, 0), tmp1);
                } else {
                    masm.movdqu(tmp0, new AMD64Address(src, len, Stride.S8, 48));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, 48), tmp0);
                    masm.movdqu(tmp1, new AMD64Address(src, len, Stride.S8, 32));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, 32), tmp1);
                    masm.movdqu(tmp2, new AMD64Address(src, len, Stride.S8, 16));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, 16), tmp2);
                    masm.movdqu(tmp3, new AMD64Address(src, len, Stride.S8, 0));
                    masm.movdqu(new AMD64Address(dst, len, Stride.S8, 0), tmp3);
                }
                masm.bind(copyBytes);
                masm.subqAndJcc(len, 8, AMD64Assembler.ConditionFlag.GreaterEqual, loop, true);
                masm.addqAndJcc(len, 4, AMD64Assembler.ConditionFlag.Less, end, true);
            }
            if (this.supportsAVX2AndYMM()) {
                masm.vmovdqu(tmp0, new AMD64Address(src, len, Stride.S8, 0));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S8, 0), tmp0);
            } else {
                masm.movdqu(tmp0, new AMD64Address(src, len, Stride.S8, 16));
                masm.movdqu(new AMD64Address(dst, len, Stride.S8, 16), tmp0);
                masm.movdqu(tmp1, new AMD64Address(src, len, Stride.S8, 0));
                masm.movdqu(new AMD64Address(dst, len, Stride.S8, 0), tmp1);
            }
            masm.subq(len, 4);
            masm.bind(end);
        } else {
            masm.bind(loop);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, 24));
            masm.movq(new AMD64Address(dst, len, Stride.S8, 24), tmp);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, 16));
            masm.movq(new AMD64Address(dst, len, Stride.S8, 16), tmp);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, 8));
            masm.movq(new AMD64Address(dst, len, Stride.S8, 8), tmp);
            masm.movq(tmp, new AMD64Address(src, len, Stride.S8, 0));
            masm.movq(new AMD64Address(dst, len, Stride.S8, 0), tmp);
            masm.bind(copyBytes);
            masm.subqAndJcc(len, 4, AMD64Assembler.ConditionFlag.GreaterEqual, loop, true);
        }
        masm.addqAndJcc(len, 4, AMD64Assembler.ConditionFlag.Greater, copy8Bytes, false);
    }
}

