/*
|
* Copyright (C) 2017 The Android Open Source Project
|
*
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*/
|
|
#include "code_generator_x86_64.h"
|
|
#include "mirror/array-inl.h"
|
#include "mirror/string.h"
|
|
namespace art {
|
namespace x86_64 {
|
|
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
|
#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
|
|
void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
|
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
|
HInstruction* input = instruction->InputAt(0);
|
bool is_zero = IsZeroBitPattern(input);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
|
: Location::RequiresRegister());
|
locations->SetOut(Location::RequiresFpuRegister());
|
break;
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
|
: Location::RequiresFpuRegister());
|
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
|
: Location::SameAsFirstInput());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
|
// Shorthand for any type of zero.
|
if (IsZeroBitPattern(instruction->InputAt(0))) {
|
__ xorps(dst, dst);
|
return;
|
}
|
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
|
__ punpcklbw(dst, dst);
|
__ punpcklwd(dst, dst);
|
__ pshufd(dst, dst, Immediate(0));
|
break;
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
|
__ punpcklwd(dst, dst);
|
__ pshufd(dst, dst, Immediate(0));
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
|
__ pshufd(dst, dst, Immediate(0));
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
|
__ punpcklqdq(dst, dst);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
__ shufps(dst, dst, Immediate(0));
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
__ shufpd(dst, dst, Immediate(0));
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
|
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
locations->SetInAt(0, Location::RequiresFpuRegister());
|
locations->SetOut(Location::RequiresRegister());
|
break;
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
locations->SetInAt(0, Location::RequiresFpuRegister());
|
locations->SetOut(Location::SameAsFirstInput());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16: // TODO: up to here, and?
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
|
break;
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 4u);
|
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
// Helper to set up locations for vector unary operations.
|
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
|
LocationSummary* locations = new (allocator) LocationSummary(instruction);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
locations->SetInAt(0, Location::RequiresFpuRegister());
|
locations->SetOut(Location::RequiresFpuRegister());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
|
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
|
// Long reduction or min/max require a temporary.
|
if (instruction->GetPackedType() == DataType::Type::kInt64 ||
|
instruction->GetReductionKind() == HVecReduce::kMin ||
|
instruction->GetReductionKind() == HVecReduce::kMax) {
|
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
switch (instruction->GetReductionKind()) {
|
case HVecReduce::kSum:
|
__ movaps(dst, src);
|
__ phaddd(dst, dst);
|
__ phaddd(dst, dst);
|
break;
|
case HVecReduce::kMin:
|
case HVecReduce::kMax:
|
// Historical note: We've had a broken implementation here. b/117863065
|
// Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
|
LOG(FATAL) << "Unsupported reduction type.";
|
}
|
break;
|
case DataType::Type::kInt64: {
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
|
switch (instruction->GetReductionKind()) {
|
case HVecReduce::kSum:
|
__ movaps(tmp, src);
|
__ movaps(dst, src);
|
__ punpckhqdq(tmp, tmp);
|
__ paddq(dst, tmp);
|
break;
|
case HVecReduce::kMin:
|
case HVecReduce::kMax:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
}
|
break;
|
}
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
|
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
DataType::Type from = instruction->GetInputType();
|
DataType::Type to = instruction->GetResultType();
|
if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ cvtdq2ps(dst, src);
|
} else {
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
|
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ pxor(dst, dst);
|
__ psubb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pxor(dst, dst);
|
__ psubw(dst, src);
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pxor(dst, dst);
|
__ psubd(dst, src);
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ pxor(dst, dst);
|
__ psubq(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ xorps(dst, dst);
|
__ subps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ xorpd(dst, dst);
|
__ subpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
|
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
|
// Integral-abs requires a temporary for the comparison.
|
if (instruction->GetPackedType() == DataType::Type::kInt32) {
|
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kInt32: {
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
|
__ movaps(dst, src);
|
__ pxor(tmp, tmp);
|
__ pcmpgtd(tmp, dst);
|
__ pxor(dst, tmp);
|
__ psubd(dst, tmp);
|
break;
|
}
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pcmpeqb(dst, dst); // all ones
|
__ psrld(dst, Immediate(1));
|
__ andps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ pcmpeqb(dst, dst); // all ones
|
__ psrlq(dst, Immediate(1));
|
__ andpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
|
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
|
// Boolean-not requires a temporary to construct the 16 x one.
|
if (instruction->GetPackedType() == DataType::Type::kBool) {
|
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool: { // special case boolean-not
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
|
__ pxor(dst, dst);
|
__ pcmpeqb(tmp, tmp); // all ones
|
__ psubb(dst, tmp); // 16 x one
|
__ pxor(dst, src);
|
break;
|
}
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
__ pcmpeqb(dst, dst); // all ones
|
__ pxor(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pcmpeqb(dst, dst); // all ones
|
__ xorps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ pcmpeqb(dst, dst); // all ones
|
__ xorpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
// Helper to set up locations for vector binary operations.
|
static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
|
LocationSummary* locations = new (allocator) LocationSummary(instruction);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
locations->SetInAt(0, Location::RequiresFpuRegister());
|
locations->SetInAt(1, Location::RequiresFpuRegister());
|
locations->SetOut(Location::SameAsFirstInput());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ paddb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ paddw(dst, src);
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ paddd(dst, src);
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ paddq(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ addps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ addpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ paddusb(dst, src);
|
break;
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ paddsb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ paddusw(dst, src);
|
break;
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ paddsw(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
|
DCHECK(instruction->IsRounded());
|
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ pavgb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pavgw(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ psubb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ psubw(dst, src);
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ psubd(dst, src);
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ psubq(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ subps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ subpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ psubusb(dst, src);
|
break;
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ psubsb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ psubusw(dst, src);
|
break;
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ psubsw(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pmullw(dst, src);
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pmulld(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ mulps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ mulpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ divps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ divpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ pminub(dst, src);
|
break;
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ pminsb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pminuw(dst, src);
|
break;
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pminsw(dst, src);
|
break;
|
case DataType::Type::kUint32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pminud(dst, src);
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pminsd(dst, src);
|
break;
|
// Next cases are sloppy wrt 0.0 vs -0.0.
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ minps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ minpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ pmaxub(dst, src);
|
break;
|
case DataType::Type::kInt8:
|
DCHECK_EQ(16u, instruction->GetVectorLength());
|
__ pmaxsb(dst, src);
|
break;
|
case DataType::Type::kUint16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pmaxuw(dst, src);
|
break;
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ pmaxsw(dst, src);
|
break;
|
case DataType::Type::kUint32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pmaxud(dst, src);
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pmaxsd(dst, src);
|
break;
|
// Next cases are sloppy wrt 0.0 vs -0.0.
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ maxps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ maxpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
__ pand(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ andps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ andpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
__ pandn(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ andnps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ andnpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
__ por(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ orps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ orpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
|
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
__ pxor(dst, src);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ xorps(dst, src);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ xorpd(dst, src);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
// Helper to set up locations for vector shift operations.
|
static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
|
LocationSummary* locations = new (allocator) LocationSummary(instruction);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
locations->SetInAt(0, Location::RequiresFpuRegister());
|
locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
|
locations->SetOut(Location::SameAsFirstInput());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
|
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ psllw(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ pslld(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ psllq(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
|
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ psraw(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ psrad(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
|
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
DCHECK(locations->InAt(0).Equals(locations->Out()));
|
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
__ psrlw(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ psrld(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ psrlq(dst, Immediate(static_cast<int8_t>(value)));
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
|
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
|
|
DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
|
|
HInstruction* input = instruction->InputAt(0);
|
bool is_zero = IsZeroBitPattern(input);
|
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
|
: Location::RequiresRegister());
|
locations->SetOut(Location::RequiresFpuRegister());
|
break;
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
|
: Location::RequiresFpuRegister());
|
locations->SetOut(Location::RequiresFpuRegister());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
|
|
DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
|
|
// Zero out all other elements first.
|
__ xorps(dst, dst);
|
|
// Shorthand for any type of zero.
|
if (IsZeroBitPattern(instruction->InputAt(0))) {
|
return;
|
}
|
|
// Set required elements.
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16: // TODO: up to here, and?
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
case DataType::Type::kInt32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
|
break;
|
case DataType::Type::kInt64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
__ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
__ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
// Helper to set up locations for vector accumulations.
|
static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
|
LocationSummary* locations = new (allocator) LocationSummary(instruction);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
locations->SetInAt(0, Location::RequiresFpuRegister());
|
locations->SetInAt(1, Location::RequiresFpuRegister());
|
locations->SetInAt(2, Location::RequiresFpuRegister());
|
locations->SetOut(Location::SameAsFirstInput());
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
|
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
|
// TODO: pmaddwd?
|
LOG(FATAL) << "No SIMD for " << instruction->GetId();
|
}
|
|
void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
|
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
|
// TODO: psadbw for unsigned?
|
LOG(FATAL) << "No SIMD for " << instruction->GetId();
|
}
|
|
void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
|
LOG(FATAL) << "No SIMD for " << instruction->GetId();
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
|
LOG(FATAL) << "No SIMD for " << instruction->GetId();
|
}
|
|
// Helper to set up locations for vector memory operations.
|
static void CreateVecMemLocations(ArenaAllocator* allocator,
|
HVecMemoryOperation* instruction,
|
bool is_load) {
|
LocationSummary* locations = new (allocator) LocationSummary(instruction);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
case DataType::Type::kFloat32:
|
case DataType::Type::kFloat64:
|
locations->SetInAt(0, Location::RequiresRegister());
|
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
|
if (is_load) {
|
locations->SetOut(Location::RequiresFpuRegister());
|
} else {
|
locations->SetInAt(2, Location::RequiresFpuRegister());
|
}
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
// Helper to construct address for vector memory operations.
|
static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
|
Location base = locations->InAt(0);
|
Location index = locations->InAt(1);
|
ScaleFactor scale = TIMES_1;
|
switch (size) {
|
case 2: scale = TIMES_2; break;
|
case 4: scale = TIMES_4; break;
|
case 8: scale = TIMES_8; break;
|
default: break;
|
}
|
// Incorporate the string or array offset in the address computation.
|
uint32_t offset = is_string_char_at
|
? mirror::String::ValueOffset().Uint32Value()
|
: mirror::Array::DataOffset(size).Uint32Value();
|
return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
|
}
|
|
void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
|
CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
|
// String load requires a temporary for the compressed load.
|
if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
|
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
|
}
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
size_t size = DataType::Size(instruction->GetPackedType());
|
Address address = VecAddress(locations, size, instruction->IsStringCharAt());
|
XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
|
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
|
case DataType::Type::kUint16:
|
DCHECK_EQ(8u, instruction->GetVectorLength());
|
// Special handling of compressed/uncompressed string load.
|
if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
|
NearLabel done, not_compressed;
|
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
|
// Test compression bit.
|
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
|
"Expecting 0=compressed, 1=uncompressed");
|
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
|
__ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
|
__ j(kNotZero, ¬_compressed);
|
// Zero extend 8 compressed bytes into 8 chars.
|
__ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
|
__ pxor(tmp, tmp);
|
__ punpcklbw(reg, tmp);
|
__ jmp(&done);
|
// Load 8 direct uncompressed chars.
|
__ Bind(¬_compressed);
|
is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
|
__ Bind(&done);
|
return;
|
}
|
FALLTHROUGH_INTENDED;
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
|
CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
|
}
|
|
void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
|
LocationSummary* locations = instruction->GetLocations();
|
size_t size = DataType::Size(instruction->GetPackedType());
|
Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
|
XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
|
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
|
switch (instruction->GetPackedType()) {
|
case DataType::Type::kBool:
|
case DataType::Type::kUint8:
|
case DataType::Type::kInt8:
|
case DataType::Type::kUint16:
|
case DataType::Type::kInt16:
|
case DataType::Type::kInt32:
|
case DataType::Type::kInt64:
|
DCHECK_LE(2u, instruction->GetVectorLength());
|
DCHECK_LE(instruction->GetVectorLength(), 16u);
|
is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
|
break;
|
case DataType::Type::kFloat32:
|
DCHECK_EQ(4u, instruction->GetVectorLength());
|
is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
|
break;
|
case DataType::Type::kFloat64:
|
DCHECK_EQ(2u, instruction->GetVectorLength());
|
is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
|
break;
|
default:
|
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
|
UNREACHABLE();
|
}
|
}
|
|
#undef __
|
|
} // namespace x86_64
|
} // namespace art
|