Skip to content

Commit

Permalink
[SYCL] Reimplemented -f[no]sycl-early-optimizations flag (#7701)
Browse files Browse the repository at this point in the history
Reimplemented the `-f[no]sycl-early-optimizations` compiler flag to
separate it from the meaning of `-disable-llvm-passes` for more
fidelity. This required a change to its definition, setting of a new
codegen option behind-the-scenes, and small logic changes to the
optimization pipeline to factor in the new flag. Existing tests all
still pass.
  • Loading branch information
andylshort committed Jan 6, 2023
1 parent 4713aeb commit d164fd9
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 91 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,9 @@ CODEGENOPT(OpaquePointers, 1, 0)
/// non-deleting destructors. (No effect on Microsoft ABI.)
CODEGENOPT(CtorDtorReturnThis, 1, 0)

/// Whether to disable the standard optimization pipeline for the SYCL device compiler.
CODEGENOPT(DisableSYCLEarlyOpts, 1, 0)

#undef CODEGENOPT
#undef ENUM_CODEGENOPT
#undef VALUE_CODEGENOPT
3 changes: 2 additions & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5110,7 +5110,8 @@ def : Flag<["-"], "fno-sycl-explicit-simd">,
Flags<[CoreOption, Deprecated]>,
Group<clang_ignored_legacy_options_Group>,
HelpText<"Disable SYCL explicit SIMD extension. (deprecated)">;
defm sycl_early_optimizations : OptOutCC1FFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>;
defm sycl_early_optimizations : OptOutCC1FFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>,
MarshallingInfoFlag<CodeGenOpts<"DisableSYCLEarlyOpts">>;
def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">,
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Enables "
"elimination of DPC++ dead kernel arguments">;
Expand Down
51 changes: 22 additions & 29 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,11 +916,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(

ModulePassManager MPM;

// FIXME: Change this when -fno-sycl-early-optimizations is not tied to
// -disable-llvm-passes.
if (CodeGenOpts.DisableLLVMPasses && LangOpts.SYCLIsDevice)
MPM.addPass(SYCLPropagateAspectsUsagePass());

if (!CodeGenOpts.DisableLLVMPasses) {
// Map our optimization levels into one of the distinct levels used to
// configure the pipeline.
Expand Down Expand Up @@ -1021,7 +1016,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
MPM.addPass(InstrProfiling(*Options, false));
});

if (CodeGenOpts.OptimizationLevel == 0) {
if (CodeGenOpts.DisableSYCLEarlyOpts) {
MPM =
PB.buildO0DefaultPipeline(OptimizationLevel::O0, IsLTO || IsThinLTO);
} else if (CodeGenOpts.OptimizationLevel == 0) {
MPM = PB.buildO0DefaultPipeline(Level, IsLTO || IsThinLTO);
} else if (IsThinLTO) {
MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level);
Expand All @@ -1035,31 +1033,26 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
MPM.addPass(ModuleMemProfilerPass());
}
}
if (LangOpts.SYCLIsDevice) {
MPM.addPass(SYCLMutatePrintfAddrspacePass());
if (!CodeGenOpts.DisableLLVMPasses && LangOpts.EnableDAEInSpirKernels)
MPM.addPass(DeadArgumentEliminationSYCLPass());
}

// Add SPIRITTAnnotations pass to the pass manager if
// -fsycl-instrument-device-code option was passed. This option can be used
// only with spir triple.
if (LangOpts.SYCLIsDevice && CodeGenOpts.SPIRITTAnnotations) {
assert(TargetTriple.isSPIR() &&
"ITT annotations can only be added to a module with spir target");
MPM.addPass(SPIRITTAnnotationsPass());
}
if (LangOpts.SYCLIsDevice) {
MPM.addPass(SYCLMutatePrintfAddrspacePass());
if (LangOpts.EnableDAEInSpirKernels)
MPM.addPass(DeadArgumentEliminationSYCLPass());

// Add SPIRITTAnnotations pass to the pass manager if
// -fsycl-instrument-device-code option was passed. This option can be
// used only with spir triple.
if (CodeGenOpts.SPIRITTAnnotations) {
assert(
TargetTriple.isSPIR() &&
"ITT annotations can only be added to a module with spir target");
MPM.addPass(SPIRITTAnnotationsPass());
}

// Allocate static local memory in SYCL kernel scope for each allocation
// call. It should be called after inlining pass.
if (LangOpts.SYCLIsDevice) {
// Group local memory pass depends on inlining. Turn it on even in case if
// all llvm passes or SYCL early optimizations are disabled.
// FIXME: Remove this workaround when dependency on inlining is eliminated.
if (CodeGenOpts.DisableLLVMPasses)
MPM.addPass(AlwaysInlinerPass(false));
MPM.addPass(SYCLLowerWGLocalMemoryPass());
// Allocate static local memory in SYCL kernel scope for each allocation
// call.
MPM.addPass(SYCLLowerWGLocalMemoryPass());
}
}

// Add a verifier pass if requested. We don't have to do this if the action
Expand Down
5 changes: 0 additions & 5 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1691,11 +1691,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
{std::string(Split.first), std::string(Split.second)});
}

Opts.DisableLLVMPasses =
Args.hasArg(OPT_disable_llvm_passes) ||
(Args.hasArg(OPT_fsycl_is_device) && T.isSPIR() &&
Args.hasArg(OPT_fno_sycl_early_optimizations));

const llvm::Triple::ArchType DebugEntryValueArchs[] = {
llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,
Expand Down
18 changes: 6 additions & 12 deletions clang/test/CodeGenSYCL/device_has.cpp
Original file line number Diff line number Diff line change
@@ -1,38 +1,32 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s

// Tests for IR of device_has(aspect, ...) attribute and
// !sycl_used_aspects metadata
// Tests for IR of device_has(aspect, ...) attribute
#include "sycl.hpp"

using namespace sycl;
queue q;

// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]

// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
[[sycl::device_has(sycl::aspect::cpu)]] void func1() {}

// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS2]]
// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
[[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {}

// CHECK: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
[[sycl::device_has()]] void func3() {}

// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS3]]
// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
template <sycl::aspect Aspect>
[[sycl::device_has(Aspect)]] void func4() {}

// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
[[sycl::device_has(sycl::aspect::cpu)]] void func5();
void func5() {}

constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; }
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
[[sycl::device_has(getAspect())]] void func6() {}

class KernelFunctor {
Expand Down
11 changes: 8 additions & 3 deletions clang/test/CodeGenSYCL/group-local-memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@
// Check that AlwaysInliner pass is always run for compilation of SYCL device
// target code, even if all optimizations are disabled.

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -fno-sycl-early-optimizations \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK

// CHECK-INL: Running pass: ModuleInlinerWrapperPass on [module]
// CHECK-ALWINL: Running pass: AlwaysInlinerPass on [module]
// CHECK: Running pass: SYCLLowerWGLocalMemoryPass on [module]

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefixes=CHECK-NO-PASSES-ALWINL,CHECK-NO-PASSES,CHECK-NO-PASSES-INL

// CHECK-NO-PASSES-INL-NOT: Running pass: ModuleInlinerWrapperPass on [module]
// CHECK-NO-PASSES-ALWINL-NOT: Running pass: AlwaysInlinerPass on [module]
// CHECK-NO-PASSES-NOT: Running pass: SYCLLowerWGLocalMemoryPass on [module]
6 changes: 3 additions & 3 deletions clang/test/CodeGenSYCL/sub-group-size.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=NONE,ALL
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=primary -sycl-std=2020 -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=PRIM_DEF,ALL
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=10 -sycl-std=2020 -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=TEN_DEF,ALL
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -triple spir64-unknown-unknown -fno-sycl-early-optimizations -emit-llvm -o - %s | FileCheck %s --check-prefixes=NONE,ALL
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=primary -sycl-std=2020 -triple spir64-unknown-unknown -fno-sycl-early-optimizations -emit-llvm -o - %s | FileCheck %s --check-prefixes=PRIM_DEF,ALL
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=10 -sycl-std=2020 -triple spir64-unknown-unknown -fno-sycl-early-optimizations -emit-llvm -o - %s | FileCheck %s --check-prefixes=TEN_DEF,ALL

// Ensure that both forms of the new sub_group_size properly emit their metadata
// on sycl-kernel and sycl-external functions.
Expand Down
15 changes: 7 additions & 8 deletions clang/test/CodeGenSYCL/uses_aspects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ template <sycl::aspect Aspect>
void func5() {}

[[__sycl_detail__::__uses_aspects__(sycl::aspect::cpu)]] void func6();
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_used_aspects ![[ASPECTS4:[0-9]+]] {
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_used_aspects ![[ASPECTS1]] {
void func6() {
Type1WithAspect TestObj1;
Type2WithAspect TestObj2;
Expand Down Expand Up @@ -58,10 +58,9 @@ void foo() {
});
}
// CHECK: !sycl_types_that_use_aspects = !{![[TYPE1:[0-9]+]], ![[TYPE2:[0-9]+]]}
// CHECK-DAG: [[TYPE1]] = !{!"class.Type1WithAspect", i32 1}
// CHECK-DAG: [[TYPE2]] = !{!"class.Type2WithAspect", i32 5, i32 1}
// CHECK-DAG: [[EMPTYASPECTS]] = !{}
// CHECK-DAG: [[ASPECTS1]] = !{i32 1}
// CHECK-DAG: [[ASPECTS2]] = !{i32 5, i32 2}
// CHECK-DAG: [[ASPECTS3]] = !{i32 0}
// CHECK-DAG: [[ASPECTS4]] = !{i32 1, i32 5}
// CHECK: [[TYPE1]] = !{!"class.Type1WithAspect", i32 1}
// CHECK: [[TYPE2]] = !{!"class.Type2WithAspect", i32 5, i32 1}
// CHECK: [[EMPTYASPECTS]] = !{}
// CHECK: [[ASPECTS1]] = !{i32 1}
// CHECK: [[ASPECTS2]] = !{i32 5, i32 2}
// CHECK: [[ASPECTS3]] = !{i32 0}
43 changes: 43 additions & 0 deletions clang/test/SemaSYCL/sycl-force-inline-kernel-lambda-ast.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -ast-dump -o - %s | FileCheck %s --check-prefixes=NOINLINE,CHECK
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -ast-dump -o - %s | FileCheck %s --check-prefixes=INLINE,CHECK

// Tests that the appropriate inlining attributes are added to kernel lambda functions,
// with no inline attribute being added when -fno-sycl-force-inline-kernel-lambda is set
// and attribute not explicitly provided.

#include "sycl.hpp"

int main() {
sycl::queue q;

q.submit([&](sycl::handler &h) {
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:17
// INLINE: AlwaysInlineAttr
// NOINLINE-NOT: AlwaysInlineAttr
h.parallel_for<class KernelName>([] {});
});

q.submit([&](sycl::handler &h) {
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:23
// CHECK: AlwaysInlineAttr
h.parallel_for<class KernelNameInline>([]() __attribute__((always_inline)) {});
});

q.submit([&](sycl::handler &h) {
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:30
// CHECK: NoInlineAttr
// CHECK-NOT: AlwaysInlineAttr
h.parallel_for<class KernelNameNoInline>([]() __attribute__((noinline)) {});
});

/// The flag is ignored for ESIMD kernels
q.submit([&](sycl::handler &h) {
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:39
// CHECK: SYCLSimdAttr
// CHECK-NOT: AlwaysInlineAttr
// CHECK-NOT: NoInlineAttr
h.parallel_for<class KernelNameESIMD>([]() __attribute__((sycl_explicit_simd)) {});
});

return 0;
}
30 changes: 0 additions & 30 deletions clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp

This file was deleted.

71 changes: 71 additions & 0 deletions sycl/test/check_device_code/device_has.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// RUN: %clangxx -fsycl -Xclang -fsycl-is-device -fsycl-device-only -Xclang -fno-sycl-early-optimizations -S -emit-llvm %s -o - | FileCheck %s

// Tests for IR of device_has(aspect, ...) attribute and
// !sycl_used_aspects metadata
#include <sycl/sycl.hpp>

using namespace sycl;
queue q;

// CHECK: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]] {{.*}}

// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
[[sycl::device_has(sycl::aspect::cpu)]] void func1() {}

// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS2]]
[[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {}

// CHECK: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
[[sycl::device_has()]] void func3() {}

// CHECK: define linkonce_odr dso_local spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS3]]
template <sycl::aspect Aspect> [[sycl::device_has(Aspect)]] void func4() {}

// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
[[sycl::device_has(sycl::aspect::cpu)]] void func5();
void func5() {}

constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; }
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]]
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
[[sycl::device_has(getAspect())]] void func6() {}

class KernelFunctor {
public:
[[sycl::device_has(sycl::aspect::cpu)]] void operator()() const {
func1();
func2();
func3();
func4<sycl::aspect::host>();
func5();
func6();
}
};

void foo() {
q.submit([&](handler &h) {
KernelFunctor f1;
h.single_task<class kernel_name_1>(f1);
// CHECK: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]] {{.*}}
h.single_task<class kernel_name_2>(
[]() [[sycl::device_has(sycl::aspect::gpu)]] {});
});
}

// CHECK: [[ASPECTS1]] = !{i32 1}
// CHECK: [[SRCLOC1]] = !{i32 {{[0-9]+}}}
// CHECK: [[EMPTYASPECTS]] = !{}
// CHECK: [[SRCLOC2]] = !{i32 {{[0-9]+}}}
// CHECK: [[ASPECTS2]] = !{i32 5, i32 2}
// CHECK: [[SRCLOC3]] = !{i32 {{[0-9]+}}}
// CHECK: [[SRCLOC4]] = !{i32 {{[0-9]+}}}
// CHECK: [[ASPECTS3]] = !{i32 0}
// CHECK: [[SRCLOC5]] = !{i32 {{[0-9]+}}}
// CHECK: [[SRCLOC6]] = !{i32 {{[0-9]+}}}
// CHECK: [[SRCLOC7]] = !{i32 {{[0-9]+}}}
// CHECK: [[ASPECTS4]] = !{i32 2}
// CHECK: [[SRCLOC8]] = !{i32 {{[0-9]+}}}

0 comments on commit d164fd9

Please sign in to comment.