diff --git a/src/device.cpp b/src/device.cpp index fbdd5381..8612790c 100644 --- a/src/device.cpp +++ b/src/device.cpp @@ -1196,11 +1196,15 @@ cl_int cvk_device::get_device_host_timer(cl_ulong* device_timestamp, return CL_SUCCESS; } -cl_ulong cvk_device::device_timer_to_host(cl_ulong dev, cl_ulong sync_dev, - cl_ulong sync_host) const { - if (sync_host > sync_dev) { - return (sync_host - sync_dev) + dev; +cl_ulong cvk_device::device_timer_to_host(cl_ulong dev) { + if (dev > m_sync_dev) { + if (get_device_host_timer(&m_sync_dev, &m_sync_host) != CL_SUCCESS) { + return dev; + } + } + if (m_sync_host > m_sync_dev) { + return (m_sync_host - m_sync_dev) + dev; } else { - return dev - (sync_dev - sync_host); + return dev - (m_sync_dev - m_sync_host); } } diff --git a/src/device.hpp b/src/device.hpp index 50cc1670..2b523f3c 100644 --- a/src/device.hpp +++ b/src/device.hpp @@ -514,8 +514,7 @@ struct cvk_device : public _cl_device_id, CHECK_RETURN cl_int get_device_host_timer(cl_ulong* dev_ts, cl_ulong* host_ts) const; - cl_ulong device_timer_to_host(cl_ulong dev, cl_ulong sync_dev, - cl_ulong sync_host) const; + cl_ulong device_timer_to_host(cl_ulong dev); uint64_t timestamp_to_ns(uint64_t ts) const { double ns_per_tick = vulkan_limits().timestampPeriod; @@ -708,6 +707,9 @@ struct cvk_device : public _cl_device_id, cl_uint m_preferred_subgroup_size{}; + cl_ulong m_sync_host{}; + cl_ulong m_sync_dev{}; + spv_target_env m_vulkan_spirv_env; std::unique_ptr m_clvk_properties; diff --git a/src/queue.hpp b/src/queue.hpp index 6dcca56b..c28c8e22 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -673,16 +673,18 @@ struct cvk_command_batchable : public cvk_command { CHECK_RETURN cl_int do_action() override; CHECK_RETURN virtual cl_int do_post_action() { return CL_SUCCESS; } - CHECK_RETURN cl_int set_profiling_info_end(cl_ulong sync_dev, - cl_ulong sync_host) { + CHECK_RETURN cl_int set_profiling_info_end() { + // If it has already been set, don't override it + if (m_event->get_profiling_info(CL_PROFILING_COMMAND_END) != 0) { + return CL_SUCCESS; + } cl_ulong start, end; auto perr = get_timestamp_query_results(&start, &end); if (perr != CL_COMPLETE) { return perr; } - start = - m_queue->device()->device_timer_to_host(start, sync_dev, sync_host); - end = m_queue->device()->device_timer_to_host(end, sync_dev, sync_host); + start = m_queue->device()->device_timer_to_host(start); + end = m_queue->device()->device_timer_to_host(end); m_event->set_profiling_info(CL_PROFILING_COMMAND_START, start); m_event->set_profiling_info(CL_PROFILING_COMMAND_END, end); return CL_SUCCESS; @@ -698,12 +700,10 @@ struct cvk_command_batchable : public cvk_command { pinfo == CL_PROFILING_COMMAND_SUBMIT) { return cvk_command::set_profiling_info(pinfo); } else if (pinfo == CL_PROFILING_COMMAND_START) { - return m_queue->device()->get_device_host_timer(&m_sync_dev, - &m_sync_host); + return CL_SUCCESS; } else { CVK_ASSERT(pinfo == CL_PROFILING_COMMAND_END); - CVK_ASSERT(m_sync_dev != 0 && m_sync_host != 0); - return set_profiling_info_end(m_sync_dev, m_sync_host); + return set_profiling_info_end(); } } @@ -714,8 +714,6 @@ struct cvk_command_batchable : public cvk_command { static const int NUM_POOL_QUERIES_PER_COMMAND = 2; static const int POOL_QUERY_CMD_START = 0; static const int POOL_QUERY_CMD_END = 1; - - cl_ulong m_sync_dev{}, m_sync_host{}; }; struct cvk_ndrange { @@ -845,14 +843,12 @@ struct cvk_command_batch : public cvk_command { cl_int status = cvk_command::set_profiling_info(pinfo); if (m_queue->profiling_on_device()) { if (pinfo == CL_PROFILING_COMMAND_START) { - return m_queue->device()->get_device_host_timer(&m_sync_dev, - &m_sync_host); + return status; } else { for (auto& cmd : m_commands) { cl_int err; if (pinfo == CL_PROFILING_COMMAND_END) { - err = cmd->set_profiling_info_end(m_sync_dev, - m_sync_host); + err = cmd->set_profiling_info_end(); } else { err = cmd->set_profiling_info(pinfo); } @@ -880,7 +876,6 @@ struct cvk_command_batch : public cvk_command { private: std::vector> m_commands; std::unique_ptr m_command_buffer; - cl_ulong m_sync_dev, m_sync_host; }; struct cvk_command_map_buffer final : public cvk_command_buffer_base_region {