/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64LIRInstruction;
import org.graalvm.compiler.lir.amd64.AMD64StringLatin1InflateOp;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="AMD64_STRING_COMPRESS")
public final class AMD64StringUTF16CompressOp
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64StringUTF16CompressOp> TYPE = LIRInstructionClass.create(AMD64StringUTF16CompressOp.class);
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value rres;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rsrc;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rdst;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rlen;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rsrcTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rdstTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rlenTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rtmp5;

    public AMD64StringUTF16CompressOp(LIRGeneratorTool tool, Value res, Value src, Value dst, Value len) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
        assert (ValueUtil.asRegister((Value)src).equals((Object)AMD64.rsi));
        assert (ValueUtil.asRegister((Value)dst).equals((Object)AMD64.rdi));
        assert (ValueUtil.asRegister((Value)len).equals((Object)AMD64.rdx));
        assert (ValueUtil.asRegister((Value)res).equals((Object)AMD64.rax));
        this.rres = res;
        this.rsrcTemp = this.rsrc = src;
        this.rdstTemp = this.rdst = dst;
        this.rlenTemp = this.rlen = len;
        LIRKind vkind = AMD64StringLatin1InflateOp.useAVX512ForStringInflateCompress(tool.target()) ? LIRKind.value((PlatformKind)AMD64Kind.V512_BYTE) : LIRKind.value((PlatformKind)AMD64Kind.V128_BYTE);
        this.vtmp1 = tool.newVariable(vkind);
        this.vtmp2 = tool.newVariable(vkind);
        this.vtmp3 = tool.newVariable(vkind);
        this.vtmp4 = tool.newVariable(vkind);
        this.rtmp5 = tool.newVariable(LIRKind.value((PlatformKind)AMD64Kind.DWORD));
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Register res = ValueUtil.asRegister((Value)this.rres);
        Register src = ValueUtil.asRegister((Value)this.rsrc);
        Register dst = ValueUtil.asRegister((Value)this.rdst);
        Register len = ValueUtil.asRegister((Value)this.rlen);
        Register tmp1 = ValueUtil.asRegister((Value)this.vtmp1);
        Register tmp2 = ValueUtil.asRegister((Value)this.vtmp2);
        Register tmp3 = ValueUtil.asRegister((Value)this.vtmp3);
        Register tmp4 = ValueUtil.asRegister((Value)this.vtmp4);
        Register tmp5 = ValueUtil.asRegister((Value)this.rtmp5);
        AMD64StringUTF16CompressOp.charArrayCompress(masm, src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, res);
    }

    private static void charArrayCompress(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp, Register res) {
        assert (tmp1.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp2.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp3.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp4.getRegisterCategory().equals((Object)AMD64.XMM));
        Label labelReturnLength = new Label();
        Label labelReturnZero = new Label();
        Label labelDone = new Label();
        Label labelBelowThreshold = new Label();
        assert (len.number != res.number);
        masm.push(len);
        if (AMD64StringLatin1InflateOp.useAVX512ForStringInflateCompress(masm.target)) {
            Label labelRestoreK1ReturnZero = new Label();
            Label labelAvxPostAlignment = new Label();
            masm.testl(len, -32);
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold);
            masm.movl(res, 255);
            masm.evpbroadcastw(tmp2, res);
            masm.kmovq(AMD64.k3, AMD64.k1);
            masm.testl(len, -64);
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelAvxPostAlignment);
            masm.movl(tmp, dst);
            masm.andl(tmp, 31);
            masm.negl(tmp);
            masm.andl(tmp, 31);
            masm.testl(tmp, tmp);
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelAvxPostAlignment);
            masm.movl(res, -1);
            masm.shlxl(res, res, tmp);
            masm.notl(res);
            masm.kmovd(AMD64.k1, res);
            masm.evmovdqu16(tmp1, AMD64.k1, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k1, tmp1, tmp2, 2);
            masm.ktestd(AMD64.k2, AMD64.k1);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelRestoreK1ReturnZero);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k1, tmp1);
            masm.addq(src, tmp);
            masm.addq(src, tmp);
            masm.addq(dst, tmp);
            masm.subl(len, tmp);
            masm.bind(labelAvxPostAlignment);
            Label labelAvx512LoopTail = new Label();
            masm.movl(tmp, len);
            masm.andl(tmp, -32);
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelAvx512LoopTail);
            masm.andl(len, 31);
            masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times2));
            masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times1));
            masm.negq(tmp);
            Label labelAvx512Loop = new Label();
            masm.bind(labelAvx512Loop);
            masm.evmovdqu16(tmp1, new AMD64Address(src, tmp, AMD64Address.Scale.Times2));
            masm.evpcmpuw(AMD64.k2, tmp1, tmp2, 2);
            masm.kortestd(AMD64.k2, AMD64.k2);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelRestoreK1ReturnZero);
            masm.evpmovwb(new AMD64Address(dst, tmp, AMD64Address.Scale.Times1), tmp1);
            masm.addq(tmp, 32);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelAvx512Loop);
            masm.bind(labelAvx512LoopTail);
            masm.kmovq(AMD64.k1, AMD64.k3);
            masm.testl(len, len);
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelReturnLength);
            masm.movl(res, -1);
            masm.shlxl(res, res, len);
            masm.notl(res);
            masm.kmovd(AMD64.k1, res);
            masm.evmovdqu16(tmp1, AMD64.k1, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k1, tmp1, tmp2, 2);
            masm.ktestd(AMD64.k2, AMD64.k1);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelRestoreK1ReturnZero);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k1, tmp1);
            masm.kmovq(AMD64.k1, AMD64.k3);
            masm.jmp(labelReturnLength);
            masm.bind(labelRestoreK1ReturnZero);
            masm.kmovq(AMD64.k1, AMD64.k3);
            masm.jmp(labelReturnZero);
        }
        if (masm.supports(AMD64.CPUFeature.SSE4_2)) {
            Label labelSSETail = new Label();
            masm.bind(labelBelowThreshold);
            masm.movl(tmp, -16711936);
            masm.movl(res, len);
            masm.andl(res, -16);
            masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelSSETail);
            masm.andl(len, 15);
            masm.movdl(tmp1, tmp);
            masm.pshufd(tmp1, tmp1, 0);
            masm.pxor(tmp4, tmp4);
            masm.leaq(src, new AMD64Address(src, res, AMD64Address.Scale.Times2));
            masm.leaq(dst, new AMD64Address(dst, res, AMD64Address.Scale.Times1));
            masm.negq(res);
            Label lSSELoop = new Label();
            masm.bind(lSSELoop);
            masm.movdqu(tmp2, new AMD64Address(src, res, AMD64Address.Scale.Times2));
            masm.movdqu(tmp3, new AMD64Address(src, res, AMD64Address.Scale.Times2, 16));
            masm.por(tmp4, tmp2);
            masm.por(tmp4, tmp3);
            masm.ptest(tmp4, tmp1);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
            masm.packuswb(tmp2, tmp3);
            masm.movdqu(new AMD64Address(dst, res, AMD64Address.Scale.Times1), tmp2);
            masm.addq(res, 16);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, lSSELoop);
            Label labelCopyChars = new Label();
            masm.bind(labelSSETail);
            masm.movl(res, len);
            masm.andl(res, -8);
            masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelCopyChars);
            masm.andl(len, 7);
            masm.movdl(tmp1, tmp);
            masm.pshufd(tmp1, tmp1, 0);
            masm.pxor(tmp3, tmp3);
            masm.movdqu(tmp2, new AMD64Address(src));
            masm.ptest(tmp2, tmp1);
            masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
            masm.packuswb(tmp2, tmp3);
            masm.movq(new AMD64Address(dst), tmp2);
            masm.addq(src, 16);
            masm.addq(dst, 8);
            masm.bind(labelCopyChars);
        }
        masm.testl(len, len);
        masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelReturnLength);
        masm.leaq(src, new AMD64Address(src, len, AMD64Address.Scale.Times2));
        masm.leaq(dst, new AMD64Address(dst, len, AMD64Address.Scale.Times1));
        masm.negq(len);
        Label labelCopyCharsLoop = new Label();
        masm.bind(labelCopyCharsLoop);
        masm.movzwl(res, new AMD64Address(src, len, AMD64Address.Scale.Times2));
        masm.testl(res, 65280);
        masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
        masm.movb(new AMD64Address(dst, len, AMD64Address.Scale.Times1), res);
        masm.incrementq(len, 1);
        masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop);
        masm.bind(labelReturnLength);
        masm.pop(res);
        masm.jmpb(labelDone);
        masm.bind(labelReturnZero);
        masm.xorl(res, res);
        masm.addq(AMD64.rsp, 8);
        masm.bind(labelDone);
    }
}

