Skip to content

Commit

Permalink
[GR-49374] [DO NOT MERGE] Remove ZeroExtendNode.inputAlwaysPositive a…
Browse files Browse the repository at this point in the history
…nd related optimizations.

PullRequest: graal/15812
  • Loading branch information
mur47x111 committed Apr 17, 2024
2 parents 407ca89 + 1de691d commit 428de6f
Show file tree
Hide file tree
Showing 14 changed files with 391 additions and 192 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.graal.compiler.core.test;

import org.junit.Assume;
import org.junit.Test;

import jdk.graal.compiler.core.phases.EconomyHighTier;
import jdk.graal.compiler.core.phases.EconomyMidTier;
import jdk.graal.compiler.graph.Graph;
import jdk.graal.compiler.nodes.StructuredGraph;
import jdk.graal.compiler.nodes.calc.AddNode;
import jdk.graal.compiler.phases.common.FloatingReadPhase;
import jdk.graal.compiler.phases.common.OptimizeOffsetAddressPhase;
import jdk.graal.compiler.phases.tiers.Suites;

/**
* Test {@link jdk.graal.compiler.phases.common.OptimizeOffsetAddressPhase}.
*/
public class OptimizeOffsetAddressTest extends GraalCompilerTest {

static int snippet0(byte[] a) {
int sum = 0;
for (int i = 16; i < a.length; i++) {
sum += a[i - 16];
}
return sum;
}

@Test
public void testSnippet0() {
Assume.assumeTrue(UNSAFE.arrayBaseOffset(byte[].class) == 16);

StructuredGraph graph = parseEager("snippet0", StructuredGraph.AllowAssumptions.YES);

new EconomyHighTier().apply(graph, getDefaultHighTierContext());
new FloatingReadPhase(createCanonicalizerPhase()).apply(graph, getDefaultMidTierContext());
new EconomyMidTier().apply(graph, getDefaultMidTierContext());

assertTrue(graph.getNodes().filter(AddNode.class).count() == 4);
new OptimizeOffsetAddressPhase(createCanonicalizerPhase()).apply(graph, getDefaultLowTierContext());
// We turn Add(16, ZeroExtend(Add(i, -16))) into Add(16, Add(ZeroExtend(i), -16)),
// and expect both Adds to be folded.
// The inner Add has other dependency and won't be eliminated.
assertTrue(graph.getNodes().filter(AddNode.class).count() == 3);
}

static int snippet1(byte[] a) {
int sum = 0;
int i = -2147483646;
do {
int index = -2147483647 * i + -2147483647;
sum += a[index];
i += -2;
} while (i <= -2147483646);
return sum;
}

@Test
public void testSnippet1() {
StructuredGraph graph = parseEager("snippet1", StructuredGraph.AllowAssumptions.YES);

Suites suites = super.createSuites(getInitialOptions());
suites.getHighTier().apply(graph, getDefaultHighTierContext());
suites.getMidTier().apply(graph, getDefaultMidTierContext());

Graph.Mark mark = graph.getMark();
new OptimizeOffsetAddressPhase(createCanonicalizerPhase()).apply(graph, getDefaultLowTierContext());
// OptimizeOffsetAddressPhase is not applicable
assertTrue(graph.getNewNodes(mark).isEmpty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ public Value emitNarrow(Value inputVal, int toBits) {
}

@Override
public Value emitZeroExtend(Value inputVal, int fromBits, int toBits) {
public Value emitZeroExtend(Value inputVal, int fromBits, int toBits, boolean requiresExplicitZeroExtend, boolean requiresLIRKindChange) {
assert fromBits <= toBits && toBits <= 64 : fromBits + " " + toBits;
if (fromBits == toBits) {
return inputVal;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,7 @@ public Value emitSignExtend(Value inputVal, int fromBits, int toBits) {
}

@Override
public Value emitZeroExtend(Value inputVal, int fromBits, int toBits) {
public Value emitZeroExtend(Value inputVal, int fromBits, int toBits, boolean requiresExplicitZeroExtend, boolean requiresLIRKindChange) {
assert fromBits <= toBits && toBits <= 64 : fromBits + " " + toBits;
if (fromBits == toBits) {
return inputVal;
Expand All @@ -1013,16 +1013,34 @@ public Value emitZeroExtend(Value inputVal, int fromBits, int toBits) {
/*
* Always emit DWORD operations, even if the resultKind is Long. On AMD64, all DWORD
* operations implicitly set the upper half of the register to 0, which is what we want
* anyway. Compared to the QWORD oparations, the encoding of the DWORD operations is
* sometimes one byte shorter.
* anyway. Compared to the QWORD operations, the encoding of the DWORD operations is
* sometimes one byte shorter. See section 3.4.1.1 in
* @formatter:off
* https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
* @formatter:on
*/
switch (fromBits) {
case 8:
return emitConvertOp(resultKind, MOVZXB, DWORD, inputVal);
case 16:
return emitConvertOp(resultKind, MOVZX, DWORD, inputVal);
case 32:
return emitConvertOp(resultKind, MOV, DWORD, inputVal);
if (requiresExplicitZeroExtend) {
return emitConvertOp(resultKind, MOV, DWORD, inputVal);
} else if (requiresLIRKindChange) {
// We assume that the DWORD-size inputVal is generated via DWORD operation
// and its upper half is implicitly set to 0. If the input is only used by
// the current ZeroExtendNode, the MovToRegOp will hint the register
// allocator to use the same register for both result and input, which then
// becomes a no-op.
Variable result = getLIRGen().newVariable(resultKind);
getLIRGen().append(new AMD64Move.MoveToRegOp(AMD64Kind.DWORD, result, asAllocatable(inputVal)));
return result;
} else {
// There is no need for changing the LIRKind. We can directly treat the
// input value as implicitly zero-extended to 64-bits.
return inputVal;
}
}

// odd bit count, fall back on manual masking
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import jdk.graal.compiler.phases.common.FixReadsPhase;
import jdk.graal.compiler.phases.common.LowTierLoweringPhase;
import jdk.graal.compiler.phases.common.OptimizeExtendsPhase;
import jdk.graal.compiler.phases.common.OptimizeOffsetAddressPhase;
import jdk.graal.compiler.phases.common.ProfileCompiledMethodsPhase;
import jdk.graal.compiler.phases.common.PropagateDeoptimizeProbabilityPhase;
import jdk.graal.compiler.phases.common.RemoveOpaqueValuePhase;
Expand Down Expand Up @@ -74,6 +75,8 @@ public LowTier(OptionValues options) {

appendPhase(new ExpandLogicPhase(canonicalizerWithGVN));

appendPhase(new OptimizeOffsetAddressPhase(canonicalizerWithGVN));

appendPhase(new FixReadsPhase(true,
new SchedulePhase(GraalOptions.StressTestEarlyReads.getValue(options) ? SchedulingStrategy.EARLIEST : SchedulingStrategy.LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS)));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,41 +25,18 @@

package jdk.graal.compiler.hotspot.amd64;

import org.graalvm.collections.EconomicMap;

import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.core.amd64.AMD64AddressNode;
import jdk.graal.compiler.core.amd64.AMD64CompressAddressLowering;
import jdk.graal.compiler.core.common.CompressEncoding;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.core.common.type.IntegerStamp;
import jdk.graal.compiler.graph.Node;
import jdk.graal.compiler.hotspot.GraalHotSpotVMConfig;
import jdk.graal.compiler.nodes.CompressionNode;
import jdk.graal.compiler.nodes.ConstantNode;
import jdk.graal.compiler.nodes.NodeView;
import jdk.graal.compiler.nodes.PhiNode;
import jdk.graal.compiler.nodes.StructuredGraph;
import jdk.graal.compiler.nodes.ValueNode;
import jdk.graal.compiler.nodes.calc.AddNode;
import jdk.graal.compiler.nodes.calc.SignExtendNode;
import jdk.graal.compiler.nodes.calc.ZeroExtendNode;
import jdk.graal.compiler.nodes.loop.BasicInductionVariable;
import jdk.graal.compiler.nodes.loop.CountedLoopInfo;
import jdk.graal.compiler.nodes.loop.DerivedInductionVariable;
import jdk.graal.compiler.nodes.loop.InductionVariable;
import jdk.graal.compiler.nodes.loop.LoopEx;
import jdk.graal.compiler.nodes.loop.LoopsData;
import jdk.graal.compiler.nodes.memory.address.AddressNode;
import jdk.graal.compiler.nodes.memory.address.OffsetAddressNode;
import jdk.graal.compiler.nodes.spi.LoopsDataProvider;
import jdk.vm.ci.code.Register;

public class AMD64HotSpotAddressLowering extends AMD64CompressAddressLowering {

private static final int ADDRESS_BITS = 64;
private static final int INT_BITS = 32;

private final long heapBase;
private final Register heapBaseRegister;

Expand Down Expand Up @@ -101,119 +78,4 @@ protected final boolean improveUncompression(AMD64AddressNode addr, CompressionN
addr.setIndex(compression.getValue());
return true;
}

@Override
public void preProcess(StructuredGraph graph, LoopsDataProvider loopsDataProvider) {
if (graph.hasLoops()) {
LoopsData loopsData = loopsDataProvider.getLoopsData(graph);
loopsData.detectCountedLoops();
for (LoopEx loop : loopsData.countedLoops()) {
for (OffsetAddressNode offsetAdressNode : loop.whole().nodes().filter(OffsetAddressNode.class)) {
tryOptimize(offsetAdressNode, loop);
}
}
}
}

@Override
public void postProcess(AddressNode lowered) {
// Allow implicit zero extend for always positive input. This
// assumes that the upper bits of the operand is zero out by
// the backend.
AMD64AddressNode address = (AMD64AddressNode) lowered;
address.setBase(tryImplicitZeroExtend(address.getBase()));
address.setIndex(tryImplicitZeroExtend(address.getIndex()));
}

private static void tryOptimize(OffsetAddressNode offsetAddress, LoopEx loop) {
EconomicMap<Node, InductionVariable> ivs = loop.getInductionVariables();
InductionVariable currentIV = ivs.get(offsetAddress.getOffset());
while (currentIV != null) {
if (!(currentIV instanceof DerivedInductionVariable)) {
break;
}
ValueNode currentValue = currentIV.valueNode();
if (currentValue.isDeleted()) {
break;
}

if (currentValue instanceof ZeroExtendNode) {
ZeroExtendNode zeroExtendNode = (ZeroExtendNode) currentValue;
if (applicableToImplicitZeroExtend(zeroExtendNode)) {
ValueNode input = zeroExtendNode.getValue();
if (input instanceof AddNode) {
AddNode add = (AddNode) input;
if (add.getX().isConstant()) {
optimizeAdd(zeroExtendNode, (ConstantNode) add.getX(), add.getY(), loop);
} else if (add.getY().isConstant()) {
optimizeAdd(zeroExtendNode, (ConstantNode) add.getY(), add.getX(), loop);
}
}
}
}

currentIV = ((DerivedInductionVariable) currentIV).getBase();
}
}

/**
* Given that Add(a, cst) is always positive, performs the following: ZeroExtend(Add(a, cst)) ->
* Add(SignExtend(a), SignExtend(cst)).
*/
private static void optimizeAdd(ZeroExtendNode zeroExtendNode, ConstantNode constant, ValueNode other, LoopEx loop) {
StructuredGraph graph = zeroExtendNode.graph();
AddNode addNode = graph.unique(new AddNode(signExtend(other, loop), ConstantNode.forLong(constant.asJavaConstant().asInt(), graph)));
zeroExtendNode.replaceAtUsages(addNode);
}

/**
* Create a sign extend for {@code input}, or zero extend if {@code input} can be proven
* positive.
*/
private static ValueNode signExtend(ValueNode input, LoopEx loop) {
StructuredGraph graph = input.graph();
if (input instanceof PhiNode) {
EconomicMap<Node, InductionVariable> ivs = loop.getInductionVariables();
InductionVariable inductionVariable = ivs.get(input);
if (inductionVariable != null && inductionVariable instanceof BasicInductionVariable) {
CountedLoopInfo countedLoopInfo = loop.counted();
IntegerStamp initStamp = (IntegerStamp) inductionVariable.initNode().stamp(NodeView.DEFAULT);
if (initStamp.isPositive()) {
if (inductionVariable.isConstantExtremum() && countedLoopInfo.counterNeverOverflows()) {
long init = inductionVariable.constantInit();
long stride = inductionVariable.constantStride();
long extremum = inductionVariable.constantExtremum();

if (init >= 0 && extremum >= 0) {
long shortestTrip = (extremum - init) / stride + 1;
if (countedLoopInfo.constantMaxTripCount().equals(shortestTrip)) {
return graph.unique(new ZeroExtendNode(input, INT_BITS, ADDRESS_BITS, true));
}
}
}
if (countedLoopInfo.getLimitCheckedIV() == inductionVariable &&
inductionVariable.direction() == InductionVariable.Direction.Up &&
(countedLoopInfo.getOverFlowGuard() != null || countedLoopInfo.counterNeverOverflows())) {
return graph.unique(new ZeroExtendNode(input, INT_BITS, ADDRESS_BITS, true));
}
}
}
}
return input.graph().addOrUnique(SignExtendNode.create(input, ADDRESS_BITS, NodeView.DEFAULT));
}

private static boolean applicableToImplicitZeroExtend(ZeroExtendNode zeroExtendNode) {
return zeroExtendNode.isInputAlwaysPositive() && zeroExtendNode.getInputBits() == INT_BITS && zeroExtendNode.getResultBits() == ADDRESS_BITS;
}

private static ValueNode tryImplicitZeroExtend(ValueNode input) {
if (input instanceof ZeroExtendNode) {
ZeroExtendNode zeroExtendNode = (ZeroExtendNode) input;
if (applicableToImplicitZeroExtend(zeroExtendNode)) {
return zeroExtendNode.getValue();
}
}
return input;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,16 @@ public interface ArithmeticLIRGeneratorTool {

Value emitSignExtend(Value inputVal, int fromBits, int toBits);

Value emitZeroExtend(Value inputVal, int fromBits, int toBits);
default Value emitZeroExtend(Value inputVal, int fromBits, int toBits) {
return emitZeroExtend(inputVal, fromBits, toBits, true, true);
}

/**
* Some architectures support implicit zero extend. Hint the backend to omit zero extend where
* possible by passing false to {@code requiresExplicitZeroExtend}, and to avoid generating
* intermediate value of the result LIRKind by passing false to {@code requiresLIRKindChange}.
*/
Value emitZeroExtend(Value inputVal, int fromBits, int toBits, boolean requiresExplicitZeroExtend, boolean requiresLIRKindChange);

Value emitMathAbs(Value input);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ public ValueNode canonical(CanonicalizerTool tool, ValueNode forValue) {
} else if (other instanceof ZeroExtendNode) {
// xxxx -(zero-extend)-> 00000000 0000xxxx -(narrow)-> 0000xxxx
// ==> xxxx -(zero-extend)-> 0000xxxx
return new ZeroExtendNode(other.getValue(), other.getInputBits(), getResultBits(), ((ZeroExtendNode) other).isInputAlwaysPositive());
return new ZeroExtendNode(other.getValue(), other.getInputBits(), getResultBits());
}
}
} else if (forValue instanceof AndNode) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ private static ValueNode canonical(SignExtendNode self, ValueNode forValue, int
if (other.getResultBits() > other.getInputBits()) {
// sxxx -(zero-extend)-> 0000 sxxx -(sign-extend)-> 00000000 0000sxxx
// ==> sxxx -(zero-extend)-> 00000000 0000sxxx
return ZeroExtendNode.create(other.getValue(), other.getInputBits(), resultBits, view, other.isInputAlwaysPositive());
return ZeroExtendNode.create(other.getValue(), other.getInputBits(), resultBits, view);
}
}

Expand All @@ -124,7 +124,7 @@ private static ValueNode canonical(SignExtendNode self, ValueNode forValue, int
if ((inputStamp.mayBeSet() & (1L << (inputBits - 1))) == 0L) {
// 0xxx -(sign-extend)-> 0000 0xxx
// ==> 0xxx -(zero-extend)-> 0000 0xxx
return ZeroExtendNode.create(forValue, inputBits, resultBits, view, true);
return ZeroExtendNode.create(forValue, inputBits, resultBits, view);
}
}
if (forValue instanceof NarrowNode) {
Expand Down
Loading

0 comments on commit 428de6f

Please sign in to comment.