Skip to content

Commit

Permalink
[SYCL] Fix segfault on program exit when user thread is not finished …
Browse files Browse the repository at this point in the history
…yet (#7908)

(caused by #6837)

Signed-off-by: Tikhomirova, Kseniya <[email protected]>
  • Loading branch information
KseniyaTikhomirova committed Jan 6, 2023
1 parent d164fd9 commit ac58dd3
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 43 deletions.
91 changes: 53 additions & 38 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,18 @@ namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace detail {

using LockGuard = std::lock_guard<SpinLock>;
SpinLock GlobalHandler::MSyclGlobalHandlerProtector{};

// Utility class to track references on object.
// Used for Scheduler now and created as thread_local object.
// Origin idea is to track usage of Scheduler from main and other used threads -
// they increment MCounter; and to use but not add extra reference by our
// thread_pool threads. For this control MIncrementCounter class member is used.
template <class ResourceHandler> class ObjectUsageCounter {
// Used for GlobalHandler now and created as thread_local object on the first
// Scheduler usage. Origin idea is to track usage of Scheduler from main and
// other used threads - they increment MCounter; and to use but not add extra
// reference by our thread_pool threads. For this control MIncrementCounter
// class member is used.
class ObjectUsageCounter {
public:
// Note: -Wctad-maybe-unsupported may generate warning if no ResourceHandler
// type explicitly declared.
ObjectUsageCounter(std::unique_ptr<ResourceHandler> &Obj, bool ModifyCounter)
: MModifyCounter(ModifyCounter), MObj(Obj) {
ObjectUsageCounter(bool ModifyCounter) : MModifyCounter(ModifyCounter) {
if (MModifyCounter)
MCounter++;
}
Expand All @@ -47,26 +48,35 @@ template <class ResourceHandler> class ObjectUsageCounter {
return;

MCounter--;
if (!MCounter && MObj)
MObj->releaseResources();
if (!MCounter) {
LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
if (RTGlobalObjHandler) {
RTGlobalObjHandler->drainThreadPool();
if (RTGlobalObjHandler->MScheduler.Inst)
RTGlobalObjHandler->MScheduler.Inst->releaseResources();
}
}
}

private:
static std::atomic_uint MCounter;
bool MModifyCounter;
std::unique_ptr<ResourceHandler> &MObj;
};
template <class ResourceHandler>
std::atomic_uint ObjectUsageCounter<ResourceHandler>::MCounter{0};

using LockGuard = std::lock_guard<SpinLock>;
std::atomic_uint ObjectUsageCounter::MCounter{0};

GlobalHandler::GlobalHandler() = default;
GlobalHandler::~GlobalHandler() = default;

GlobalHandler *&GlobalHandler::getInstancePtr() {
static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
return RTGlobalObjHandler;
}

GlobalHandler &GlobalHandler::instance() {
static GlobalHandler *SyclGlobalObjectsHandler = new GlobalHandler();
return *SyclGlobalObjectsHandler;
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
return *RTGlobalObjHandler;
}

template <typename T, typename... Types>
Expand Down Expand Up @@ -94,8 +104,7 @@ Scheduler &GlobalHandler::getScheduler() {
}

void GlobalHandler::registerSchedulerUsage(bool ModifyCounter) {
thread_local ObjectUsageCounter<Scheduler> SchedulerCounter(MScheduler.Inst,
ModifyCounter);
thread_local ObjectUsageCounter SchedulerCounter(ModifyCounter);
}

ProgramManager &GlobalHandler::getProgramManager() {
Expand Down Expand Up @@ -151,14 +160,14 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() {
void GlobalHandler::releaseDefaultContexts() {
// Release shared-pointers to SYCL objects.
#ifndef _WIN32
GlobalHandler::instance().MPlatformToDefaultContextCache.Inst.reset(nullptr);
MPlatformToDefaultContextCache.Inst.reset(nullptr);
#else
// Windows does not maintain dependencies between dynamically loaded libraries
// and can unload SYCL runtime dependencies before sycl.dll's DllMain has
// finished. To avoid calls to nowhere, intentionally leak platform to device
// cache. This will prevent destructors from being called, thus no PI cleanup
// routines will be called in the end.
GlobalHandler::instance().MPlatformToDefaultContextCache.Inst.release();
MPlatformToDefaultContextCache.Inst.release();
#endif
}

Expand All @@ -178,8 +187,8 @@ void GlobalHandler::unloadPlugins() {
// Call to GlobalHandler::instance().getPlugins() initializes plugins. If
// user application has loaded SYCL runtime, and never called any APIs,
// there's no need to load and unload plugins.
if (GlobalHandler::instance().MPlugins.Inst) {
for (plugin &Plugin : GlobalHandler::instance().getPlugins()) {
if (MPlugins.Inst) {
for (plugin &Plugin : getPlugins()) {
// PluginParameter is reserved for future use that can control
// some parameters in the plugin tear-down process.
// Currently, it is not used.
Expand All @@ -189,7 +198,7 @@ void GlobalHandler::unloadPlugins() {
}
}
// Clear after unload to avoid uses after unload.
GlobalHandler::instance().getPlugins().clear();
getPlugins().clear();
}

void GlobalHandler::drainThreadPool() {
Expand All @@ -198,34 +207,40 @@ void GlobalHandler::drainThreadPool() {
}

void shutdown() {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
if (!Handler)
return;

// Ensure neither host task is working so that no default context is accessed
// upon its release
Handler->drainThreadPool();
if (Handler->MScheduler.Inst)
Handler->MScheduler.Inst->releaseResources();

if (GlobalHandler::instance().MScheduler.Inst)
GlobalHandler::instance().MScheduler.Inst->releaseResources();

if (GlobalHandler::instance().MHostTaskThreadPool.Inst)
GlobalHandler::instance().MHostTaskThreadPool.Inst->finishAndWait();
if (Handler->MHostTaskThreadPool.Inst)
Handler->MHostTaskThreadPool.Inst->finishAndWait();

// If default contexts are requested after the first default contexts have
// been released there may be a new default context. These must be released
// prior to closing the plugins.
// Note: Releasing a default context here may cause failures in plugins with
// global state as the global state may have been released.
GlobalHandler::instance().releaseDefaultContexts();
Handler->releaseDefaultContexts();

// First, release resources, that may access plugins.
GlobalHandler::instance().MPlatformCache.Inst.reset(nullptr);
GlobalHandler::instance().MScheduler.Inst.reset(nullptr);
GlobalHandler::instance().MProgramManager.Inst.reset(nullptr);
Handler->MPlatformCache.Inst.reset(nullptr);
Handler->MScheduler.Inst.reset(nullptr);
Handler->MProgramManager.Inst.reset(nullptr);

// Clear the plugins and reset the instance if it was there.
GlobalHandler::instance().unloadPlugins();
if (GlobalHandler::instance().MPlugins.Inst)
GlobalHandler::instance().MPlugins.Inst.reset(nullptr);
Handler->unloadPlugins();
if (Handler->MPlugins.Inst)
Handler->MPlugins.Inst.reset(nullptr);

// Release the rest of global resources.
delete &GlobalHandler::instance();
delete Handler;
Handler = nullptr;
}

#ifdef _WIN32
Expand Down
3 changes: 3 additions & 0 deletions sycl/source/detail/global_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ class GlobalHandler {

private:
friend void shutdown();
friend class ObjectUsageCounter;
static GlobalHandler *&getInstancePtr();
static SpinLock MSyclGlobalHandlerProtector;

// Constructor and destructor are declared out-of-line to allow incomplete
// types as template arguments to unique_ptr.
Expand Down
5 changes: 0 additions & 5 deletions sycl/source/detail/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,11 +392,6 @@ Scheduler::~Scheduler() { DefaultHostQueue.reset(); }

void Scheduler::releaseResources() {
#ifndef _WIN32
if (DefaultHostQueue) {
DefaultHostQueue->wait();
}
GlobalHandler::instance().drainThreadPool();

// There might be some commands scheduled for post enqueue cleanup that
// haven't been freed because of the graph mutex being locked at the time,
// clean them up now.
Expand Down

0 comments on commit ac58dd3

Please sign in to comment.