From f7a7040fc18890f319c76dc6c8709c64ecd3e47f Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 18 Oct 2023 13:12:47 +0200 Subject: [PATCH] remove cvk_command_combine in favor of cvk_event_combine cvk_command_combine prevents optimization where commands could be batch. It is needed to have a single event representing the execution of the group of commands. This PR creates a new event (cvk_event_combine) which allows to have a single event grouping several events, thus keeping the single event for the group of commands. It also allows to make do_action a private function, thus having it called only in one place. This is very convenient to implement future optimization (using VkSemaphore). --- src/api.cpp | 495 ++++++++++++++++++++++++++++++++------------------ src/event.cpp | 8 +- src/event.hpp | 109 +++++++++-- src/queue.cpp | 70 ------- src/queue.hpp | 172 ++++-------------- 5 files changed, 455 insertions(+), 399 deletions(-) diff --git a/src/api.cpp b/src/api.cpp index e30b29ce..99455d61 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -1228,7 +1228,7 @@ cl_event CLVK_API_CALL clCreateUserEvent(cl_context context, } } - auto event = new cvk_event(icd_downcast(context), nullptr, nullptr); + auto event = new cvk_event_command(icd_downcast(context), nullptr, nullptr); if (errcode_ret != nullptr) { *errcode_ret = CL_SUCCESS; @@ -3860,6 +3860,85 @@ void* CLVK_API_CALL clEnqueueMapBuffer(cl_command_queue cq, cl_mem buf, return map_ptr; } +cl_int cvk_enqueue_unmap_image(cvk_command_queue* cq, cvk_image* image, + void* mapped_ptr, bool update_host_ptr, + bool blocking, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) { + const cvk_image_mapping mapping = image->mapping_for(mapped_ptr); + + // We need to hold on the buffer to make sure it has not been release by the + // unmap command buffer creating the copy command which will hold on to it. + cvk_buffer_holder buffer(mapping.buffer); + + bool needs_copy = + (mapping.flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) != 0; + update_host_ptr = update_host_ptr && image->has_flags(CL_MEM_USE_HOST_PTR); + + auto cmd_unmap = + std::make_unique(cq, image, mapped_ptr); + + _cl_event* evt_unmap; + auto err = cq->enqueue_command_with_deps( + cmd_unmap.release(), blocking && !needs_copy, num_events_in_wait_list, + event_wait_list, &evt_unmap); + if (err != CL_SUCCESS) { + return err; + } + + if (needs_copy) { + _cl_event* evt_pre_copy = evt_unmap; + _cl_event* evt_host_ptr_update; + if (update_host_ptr) { + size_t zero_origin[3] = {0, 0, 0}; + auto cmd_host_ptr_update = + std::make_unique( + cq, CL_COMMAND_WRITE_BUFFER_RECT, mapping.buffer, + image->host_ptr(), mapping.origin.data(), zero_origin, + mapping.region.data(), image->row_pitch(), + image->slice_pitch(), image->map_buffer_row_pitch(mapping), + image->map_buffer_slice_pitch(mapping), + image->element_size()); + + err = + cq->enqueue_command_with_deps(cmd_host_ptr_update.release(), 1, + &evt_unmap, &evt_host_ptr_update); + if (err != CL_SUCCESS) { + return err; + } + evt_pre_copy = evt_host_ptr_update; + } + _cl_event* evt_copy; + auto cmd_copy = std::make_unique( + CL_COMMAND_COPY_BUFFER_TO_IMAGE, cq, mapping.buffer, image, 0, + mapping.origin, mapping.region); + + err = cq->enqueue_command_with_deps(cmd_copy.release(), blocking, 1, + &evt_pre_copy, &evt_copy); + if (err != CL_SUCCESS) { + return err; + } + + if (event != nullptr) { + *event = new cvk_event_combine( + cq->context(), CL_COMMAND_UNMAP_MEM_OBJECT, cq, + icd_downcast(evt_unmap), icd_downcast(evt_copy)); + } + icd_downcast(evt_unmap)->release(); + if (update_host_ptr) { + icd_downcast(evt_host_ptr_update)->release(); + } + icd_downcast(evt_copy)->release(); + } else { + if (event != nullptr) { + *event = evt_unmap; + } else { + icd_downcast(evt_unmap)->release(); + } + } + return CL_SUCCESS; +} + cl_int CLVK_API_CALL clEnqueueUnmapMemObject(cl_command_queue cq, cl_mem mem, void* mapped_ptr, cl_uint num_events_in_wait_list, @@ -3881,31 +3960,26 @@ cl_int CLVK_API_CALL clEnqueueUnmapMemObject(cl_command_queue cq, cl_mem mem, return CL_INVALID_MEM_OBJECT; } - cvk_command* cmd; - if (memobj->is_image_type()) { auto image = static_cast(memobj); if (image->is_backed_by_buffer_view()) { auto buffer = static_cast(image->buffer()); - cmd = + auto cmd = new cvk_command_unmap_buffer(command_queue, buffer, mapped_ptr); + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); } else { - auto cmd_unmap = std::make_unique( - command_queue, image, mapped_ptr, true); - - auto err = cmd_unmap->build(); - if (err != CL_SUCCESS) { - return err; - } - cmd = cmd_unmap.release(); + return cvk_enqueue_unmap_image(command_queue, image, mapped_ptr, + true, false, num_events_in_wait_list, + event_wait_list, event); } } else { auto buffer = static_cast(memobj); - cmd = new cvk_command_unmap_buffer(command_queue, buffer, mapped_ptr); + auto cmd = std::make_unique( + command_queue, buffer, mapped_ptr); + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); } - - return command_queue->enqueue_command_with_deps( - cmd, num_events_in_wait_list, event_wait_list, event); } cl_int cvk_enqueue_ndrange_kernel(cvk_command_queue* command_queue, @@ -4741,6 +4815,177 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context, return CL_SUCCESS; } +cl_int cvk_enqueue_map_image(cl_command_queue cq, cl_mem img, + bool user_map_image, cl_bool blocking_map, + cl_map_flags map_flags, const size_t* origin, + const size_t* region, size_t* image_row_pitch, + size_t* image_slice_pitch, cvk_buffer** map_buffer, + void** map_ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) { + auto command_queue = icd_downcast(cq); + auto image = static_cast(img); + + if (!is_valid_command_queue(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid_image(image)) { + return CL_INVALID_MEM_OBJECT; + } + + if (!is_valid_event_wait_list(num_events_in_wait_list, event_wait_list)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + + if (!is_same_context(command_queue, image) || + !is_same_context(command_queue, num_events_in_wait_list, + event_wait_list)) { + return CL_INVALID_CONTEXT; + } + + if (!map_flags_are_valid(map_flags)) { + return CL_INVALID_VALUE; + } + // TODO CL_INVALID_VALUE if region being mapped given by (origin, + // origin+region) is out of bounds + // TODO CL_INVALID_VALUE if values in origin and region do not follow rules + // described in the argument description for origin and region. + + if (image_row_pitch == nullptr) { + return CL_INVALID_VALUE; + } + + switch (image->type()) { + case CL_MEM_OBJECT_IMAGE3D: + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + if (image_slice_pitch == nullptr) { + return CL_INVALID_VALUE; + } + break; + default: + break; + } + // TODO CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, + // specified or compute row and/or slice pitch) for image are not supported + // by device associated with queue. + // TODO CL_INVALID_IMAGE_FORMAT if image format (image channel order and + // data type) for image are not supported by device associated with queue. + // TODO CL_MAP_FAILURE if there is a failure to map the requested region + // into the host address space. This error cannot occur for image objects + // created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. + // TODO CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate + // memory for data store associated with buffer. + if (!command_queue->device()->supports_images()) { + return CL_INVALID_OPERATION; + } + + if ((map_flags & CL_MAP_READ) && + (image->has_any_flag(CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) { + return CL_INVALID_OPERATION; + } + + if (((map_flags & CL_MAP_WRITE) || + (map_flags & CL_MAP_WRITE_INVALIDATE_REGION)) && + (image->has_any_flag(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) && + user_map_image) { + return CL_INVALID_OPERATION; + } + + std::array orig = {origin[0], origin[1], origin[2]}; + std::array reg = {region[0], region[1], region[2]}; + + bool needs_copy = (map_flags & CL_MAP_WRITE_INVALIDATE_REGION) == 0; + bool update_host_ptr = + user_map_image && image->has_flags(CL_MEM_USE_HOST_PTR); + + // Get a mapping + cvk_image_mapping mapping; + if (!image->find_or_create_mapping(mapping, orig, reg, map_flags, + update_host_ptr)) { + cvk_error("cannot find or create a mapping"); + return CL_OUT_OF_RESOURCES; + } + + if (map_ptr != nullptr) { + *map_ptr = mapping.ptr; + } + if (map_buffer != nullptr) { + *map_buffer = mapping.buffer; + } + + if (update_host_ptr) { + *image_row_pitch = image->row_pitch(); + if (image_slice_pitch != nullptr) { + *image_slice_pitch = image->slice_pitch(); + } + } else { + *image_row_pitch = image->map_buffer_row_pitch(mapping); + if (image_slice_pitch != nullptr) { + *image_slice_pitch = image->map_buffer_slice_pitch(mapping); + } + } + + if (needs_copy) { + _cl_event* evt_copy; + auto cmd_copy = std::make_unique( + CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, mapping.buffer, + image, 0, orig, reg); + + auto err = command_queue->enqueue_command_with_deps( + cmd_copy.release(), blocking_map && !update_host_ptr, + num_events_in_wait_list, event_wait_list, &evt_copy); + if (err != CL_SUCCESS) { + return err; + } + + if (update_host_ptr) { + size_t zero_origin[3] = {0, 0, 0}; + auto cmd_host_ptr_update = + std::make_unique( + command_queue, CL_COMMAND_READ_BUFFER_RECT, mapping.buffer, + image->host_ptr(), orig.data(), zero_origin, reg.data(), + image->row_pitch(), image->slice_pitch(), + image->map_buffer_row_pitch(reg), + image->map_buffer_slice_pitch(reg), image->element_size()); + _cl_event* evt_host_ptr_update; + err = command_queue->enqueue_command_with_deps( + cmd_host_ptr_update.release(), blocking_map, 1, &evt_copy, + &evt_host_ptr_update); + if (err != CL_SUCCESS) { + return err; + } + + if (event != nullptr) { + *event = new cvk_event_combine( + command_queue->context(), CL_COMMAND_MAP_IMAGE, + command_queue, icd_downcast(evt_copy), + icd_downcast(evt_host_ptr_update)); + } + icd_downcast(evt_copy)->release(); + icd_downcast(evt_host_ptr_update)->release(); + } else { + if (event != nullptr) { + *event = evt_copy; + } else { + icd_downcast(evt_copy)->release(); + } + } + } else { + auto cmd_map = + std::make_unique(command_queue, image); + + auto err = command_queue->enqueue_command_with_deps( + cmd_map.release(), blocking_map, num_events_in_wait_list, + event_wait_list, event); + if (err != CL_SUCCESS) { + return err; + } + } + + return CL_SUCCESS; +} + cl_int cvk_enqueue_image_copy( cvk_command_queue* queue, cl_command_type command_type, cvk_mem* image, bool blocking, const size_t* origin, const size_t* region, size_t row_pitch, @@ -4758,7 +5003,6 @@ cl_int cvk_enqueue_image_copy( } // Create image map command - std::array orig = {origin[0], origin[1], origin[2]}; std::array reg = {region[0], region[1], region[2]}; cl_map_flags map_flags; @@ -4768,15 +5012,6 @@ cl_int cvk_enqueue_image_copy( map_flags = CL_MAP_READ; } - auto cmd_map = std::make_unique(queue, img, orig, - reg, map_flags); - void* map_ptr; - cl_int err = cmd_map->build(&map_ptr); - if (err != CL_SUCCESS) { - return err; - } - auto map_buffer = cmd_map->map_buffer(); - // Create copy command auto rpitch = row_pitch; if (rpitch == 0) { @@ -4788,31 +5023,48 @@ cl_int cvk_enqueue_image_copy( spitch = region[1] * rpitch; } const size_t zero_origin[3] = {0, 0, 0}; + + _cl_event* evt_map; + cvk_buffer* map_buffer; + void* map_ptr; + size_t image_row_pitch, image_slice_pitch; + auto err = cvk_enqueue_map_image( + queue, image, false, false, map_flags, origin, region, &image_row_pitch, + &image_slice_pitch, &map_buffer, &map_ptr, num_events_in_wait_list, + event_wait_list, &evt_map); + if (err != CL_SUCCESS) { + return err; + } + auto cmd_copy = std::make_unique( queue, command_type, map_buffer, ptr, zero_origin, zero_origin, region, rpitch, spitch, img->map_buffer_row_pitch(reg), img->map_buffer_slice_pitch(reg), img->element_size()); - // Create unmap command - auto cmd_unmap = - std::make_unique(queue, img, map_ptr); - err = cmd_unmap->build(); + _cl_event* evt_copy; + err = queue->enqueue_command_with_deps(cmd_copy.release(), 1, &evt_map, + &evt_copy); if (err != CL_SUCCESS) { return err; } - // Create combine command - std::vector> commands; - commands.emplace_back(std::move(cmd_map)); - commands.emplace_back(std::move(cmd_copy)); - commands.emplace_back(std::move(cmd_unmap)); + _cl_event* evt_unmap; + err = cvk_enqueue_unmap_image(queue, img, map_ptr, false, blocking, 1, + &evt_copy, &evt_unmap); + icd_downcast(evt_copy)->release(); + if (err != CL_SUCCESS) { + return err; + } - auto cmd = - new cvk_command_combine(queue, command_type, std::move(commands)); + if (event != nullptr) { + *event = new cvk_event_combine(queue->context(), command_type, queue, + icd_downcast(evt_map), + icd_downcast(evt_unmap)); + } + icd_downcast(evt_map)->release(); + icd_downcast(evt_unmap)->release(); - // Enqueue combined command - return queue->enqueue_command_with_deps( - cmd, blocking, num_events_in_wait_list, event_wait_list, event); + return CL_SUCCESS; } cl_int CLVK_API_CALL clEnqueueReadImage( @@ -5122,13 +5374,15 @@ cl_int CLVK_API_CALL clEnqueueFillImage( } // Create image map command - std::array orig = {origin[0], origin[1], origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd_map = std::make_unique( - command_queue, img, orig, reg, CL_MAP_WRITE_INVALIDATE_REGION); void* map_ptr; - cl_int err = cmd_map->build(&map_ptr); + _cl_event* evt_map; + size_t image_row_pitch, image_slice_pitch; + auto err = cvk_enqueue_map_image( + command_queue, image, false, false, CL_MAP_WRITE_INVALIDATE_REGION, + origin, region, &image_row_pitch, &image_slice_pitch, nullptr, &map_ptr, + num_events_in_wait_list, event_wait_list, &evt_map); if (err != CL_SUCCESS) { return err; } @@ -5136,26 +5390,30 @@ cl_int CLVK_API_CALL clEnqueueFillImage( auto cmd_fill = std::make_unique( command_queue, map_ptr, pattern, pattern_size, reg); - // Create unmap command - auto cmd_unmap = - std::make_unique(command_queue, img, map_ptr); - err = cmd_unmap->build(); + _cl_event* evt_fill; + err = command_queue->enqueue_command_with_deps(cmd_fill.release(), 1, + &evt_map, &evt_fill); if (err != CL_SUCCESS) { return err; } - // Create combine command - std::vector> commands; - commands.emplace_back(std::move(cmd_map)); - commands.emplace_back(std::move(cmd_fill)); - commands.emplace_back(std::move(cmd_unmap)); + _cl_event* evt_unmap; + err = cvk_enqueue_unmap_image(command_queue, img, map_ptr, false, false, 1, + &evt_fill, &evt_unmap); + icd_downcast(evt_fill)->release(); + if (err != CL_SUCCESS) { + return err; + } - auto cmd = new cvk_command_combine(command_queue, CL_COMMAND_FILL_IMAGE, - std::move(commands)); + if (event != nullptr) { + *event = new cvk_event_combine( + command_queue->context(), CL_COMMAND_FILL_IMAGE, command_queue, + icd_downcast(evt_map), icd_downcast(evt_unmap)); + } + icd_downcast(evt_map)->release(); + icd_downcast(evt_unmap)->release(); - // Enqueue combined command - return command_queue->enqueue_command_with_deps( - cmd, num_events_in_wait_list, event_wait_list, event); + return CL_SUCCESS; } cl_int CLVK_API_CALL clEnqueueCopyImageToBuffer( @@ -5332,119 +5590,6 @@ cl_int CLVK_API_CALL clEnqueueCopyBufferToImage( } } -void* cvk_enqueue_map_image(cl_command_queue cq, cl_mem img, - cl_bool blocking_map, cl_map_flags map_flags, - const size_t* origin, const size_t* region, - size_t* image_row_pitch, size_t* image_slice_pitch, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event, - cl_int* errcode_ret) { - auto command_queue = icd_downcast(cq); - auto image = static_cast(img); - - if (!is_valid_command_queue(command_queue)) { - *errcode_ret = CL_INVALID_COMMAND_QUEUE; - return nullptr; - } - - if (!is_valid_image(image)) { - *errcode_ret = CL_INVALID_MEM_OBJECT; - return nullptr; - } - - if (!is_valid_event_wait_list(num_events_in_wait_list, event_wait_list)) { - *errcode_ret = CL_INVALID_EVENT_WAIT_LIST; - return nullptr; - } - - if (!is_same_context(command_queue, image) || - !is_same_context(command_queue, num_events_in_wait_list, - event_wait_list)) { - *errcode_ret = CL_INVALID_CONTEXT; - return nullptr; - } - - if (!map_flags_are_valid(map_flags)) { - *errcode_ret = CL_INVALID_VALUE; - return nullptr; - } - // TODO CL_INVALID_VALUE if region being mapped given by (origin, - // origin+region) is out of bounds - // TODO CL_INVALID_VALUE if values in origin and region do not follow rules - // described in the argument description for origin and region. - - if (image_row_pitch == nullptr) { - *errcode_ret = CL_INVALID_VALUE; - return nullptr; - } - - switch (image->type()) { - case CL_MEM_OBJECT_IMAGE3D: - case CL_MEM_OBJECT_IMAGE1D_ARRAY: - case CL_MEM_OBJECT_IMAGE2D_ARRAY: - if (image_slice_pitch == nullptr) { - *errcode_ret = CL_INVALID_VALUE; - return nullptr; - } - break; - default: - break; - } - // TODO CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, - // specified or compute row and/or slice pitch) for image are not supported - // by device associated with queue. - // TODO CL_INVALID_IMAGE_FORMAT if image format (image channel order and - // data type) for image are not supported by device associated with queue. - // TODO CL_MAP_FAILURE if there is a failure to map the requested region - // into the host address space. This error cannot occur for image objects - // created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. - // TODO CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate - // memory for data store associated with buffer. - if (!command_queue->device()->supports_images()) { - *errcode_ret = CL_INVALID_OPERATION; - return nullptr; - } - - if ((map_flags & CL_MAP_READ) && - (image->has_any_flag(CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) { - *errcode_ret = CL_INVALID_OPERATION; - return nullptr; - } - - if (((map_flags & CL_MAP_WRITE) || - (map_flags & CL_MAP_WRITE_INVALIDATE_REGION)) && - (image->has_any_flag(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { - *errcode_ret = CL_INVALID_OPERATION; - return nullptr; - } - - std::array orig = {origin[0], origin[1], origin[2]}; - std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - command_queue, image, orig, reg, map_flags, true); - - void* map_ptr; - cl_int err = cmd->build(&map_ptr); - - if (err != CL_SUCCESS) { - *errcode_ret = err; - return nullptr; - } - - *image_row_pitch = cmd->row_pitch(); - if (image_slice_pitch != nullptr) { - *image_slice_pitch = cmd->slice_pitch(); - } - - err = command_queue->enqueue_command_with_deps(cmd.release(), blocking_map, - num_events_in_wait_list, - event_wait_list, event); - - *errcode_ret = err; - - return map_ptr; -} - void* CLVK_API_CALL clEnqueueMapImage( cl_command_queue cq, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t* origin, const size_t* region, @@ -5477,10 +5622,10 @@ void* CLVK_API_CALL clEnqueueMapImage( region[0] * img->element_size(), map_flags, num_events_in_wait_list, event_wait_list, event, &err, CL_COMMAND_MAP_IMAGE); } else { - ret = cvk_enqueue_map_image(command_queue, image, blocking_map, - map_flags, origin, region, image_row_pitch, - image_slice_pitch, num_events_in_wait_list, - event_wait_list, event, &err); + err = cvk_enqueue_map_image( + command_queue, image, true, blocking_map, map_flags, origin, region, + image_row_pitch, image_slice_pitch, nullptr, &ret, + num_events_in_wait_list, event_wait_list, event); } if (errcode_ret != nullptr) { diff --git a/src/event.cpp b/src/event.cpp index 8c654f12..1dcf88a0 100644 --- a/src/event.cpp +++ b/src/event.cpp @@ -22,9 +22,9 @@ static const cl_profiling_info status_to_profiling_info[4] = { CL_PROFILING_COMMAND_QUEUED, }; -cvk_event::cvk_event(cvk_context* ctx, cvk_command* cmd, - cvk_command_queue* queue) - : api_object(ctx), m_cmd(cmd), m_queue(queue) { +cvk_event_command::cvk_event_command(cvk_context* ctx, cvk_command* cmd, + cvk_command_queue* queue) + : cvk_event(ctx, queue), m_cmd(cmd) { if (cmd == nullptr) { m_status = CL_SUBMITTED; m_command_type = CL_COMMAND_USER; @@ -34,7 +34,7 @@ cvk_event::cvk_event(cvk_context* ctx, cvk_command* cmd, } } -void cvk_event::set_status(cl_int status) { +void cvk_event_command::set_status(cl_int status) { cvk_debug_group(loggroup::event, "cvk_event::set_status: event = %p, status = %d", this, status); diff --git a/src/event.hpp b/src/event.hpp index 7e1c79df..846db4a2 100644 --- a/src/event.hpp +++ b/src/event.hpp @@ -37,17 +37,49 @@ struct cvk_event_callback { struct cvk_event : public _cl_event, api_object { - cvk_event(cvk_context* ctx, cvk_command* cmd, cvk_command_queue* queue); + cvk_event(cvk_context* ctx, cvk_command_queue* queue) + : api_object(ctx), m_command_type(0), m_queue(queue) {} - bool completed() { return m_status == CL_COMPLETE; } + virtual cl_int get_status() const = 0; - bool terminated() { return m_status < 0; } + bool completed() { return get_status() == CL_COMPLETE; } - void set_status(cl_int status); + bool terminated() { return get_status() < 0; } + + virtual void set_status(cl_int status) = 0; + + virtual void register_callback(cl_int callback_type, + cvk_event_callback_pointer_type ptr, + void* user_data) = 0; + + cl_command_type command_type() const { return m_command_type; } + + bool is_user_event() const { return m_command_type == CL_COMMAND_USER; } + + cvk_command_queue* queue() const { + CVK_ASSERT(!is_user_event()); + return m_queue; + } + + virtual cl_int wait() = 0; + + virtual uint64_t get_profiling_info(cl_profiling_info pinfo) const = 0; + +protected: + cl_command_type m_command_type; + cvk_command_queue* m_queue; +}; + +struct cvk_event_command : public cvk_event { + + cvk_event_command(cvk_context* ctx, cvk_command* cmd, + cvk_command_queue* queue); + + void set_status(cl_int status) override final; void register_callback(cl_int callback_type, cvk_event_callback_pointer_type ptr, - void* user_data) { + void* user_data) override final { std::lock_guard lock(m_lock); cvk_event_callback cb = {ptr, user_data}; @@ -59,17 +91,9 @@ struct cvk_event : public _cl_event, api_object { } } - cl_int get_status() const { return m_status; } - cl_command_type command_type() const { return m_command_type; } + cl_int get_status() const override final { return m_status; } - bool is_user_event() const { return m_command_type == CL_COMMAND_USER; } - - cvk_command_queue* queue() const { - CVK_ASSERT(!is_user_event()); - return m_queue; - } - - cl_int wait() { + cl_int wait() override final { std::unique_lock lock(m_lock); cvk_debug_group(loggroup::event, "cvk_event::wait: event = %p, status = %d", this, @@ -93,7 +117,7 @@ struct cvk_event : public _cl_event, api_object { set_profiling_info(info, val); } - uint64_t get_profiling_info(cl_profiling_info pinfo) const { + uint64_t get_profiling_info(cl_profiling_info pinfo) const override final { return m_profiling_data[pinfo - CL_PROFILING_COMMAND_QUEUED]; } @@ -117,12 +141,61 @@ struct cvk_event : public _cl_event, api_object { std::condition_variable m_cv; cl_int m_status; cl_ulong m_profiling_data[4]{}; - cl_command_type m_command_type; cvk_command* m_cmd; - cvk_command_queue* m_queue; std::unordered_map> m_callbacks; }; +struct cvk_event_combine : public cvk_event { + + cvk_event_combine(cvk_context* ctx, cl_command_type command_type, + cvk_command_queue* queue, cvk_event* start_event, + cvk_event* end_event) + : cvk_event(ctx, queue), m_start_event(start_event), + m_end_event(end_event) { + m_command_type = command_type; + start_event->retain(); + end_event->retain(); + } + + ~cvk_event_combine() { + m_start_event->release(); + m_end_event->release(); + } + + void set_status(cl_int status) override final { + UNUSED(status); + CVK_ASSERT(false && "Should never be called"); + } + + void register_callback(cl_int callback_type, + cvk_event_callback_pointer_type ptr, + void* user_data) override final { + if (callback_type == CL_COMPLETE) { + m_end_event->register_callback(callback_type, ptr, user_data); + } else { + m_start_event->register_callback(callback_type, ptr, user_data); + } + } + + cl_int get_status() const override final { + return std::min(m_end_event->get_status(), m_start_event->get_status()); + } + + cl_int wait() override final { return m_end_event->wait(); } + + uint64_t get_profiling_info(cl_profiling_info pinfo) const override final { + if (pinfo == CL_PROFILING_COMMAND_END) { + return m_end_event->get_profiling_info(pinfo); + } else { + return m_start_event->get_profiling_info(pinfo); + } + } + +private: + cvk_event* m_start_event; + cvk_event* m_end_event; +}; + using cvk_event_holder = refcounted_holder; static inline cvk_event* icd_downcast(cl_event event) { diff --git a/src/queue.cpp b/src/queue.cpp index fb82f068..fd6da39c 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -1450,20 +1450,6 @@ cl_int cvk_command_unmap_image::do_action() { // TODO flush caches on non-coherent memory m_image->remove_mapping(m_mapped_ptr); - if (m_needs_copy) { - cl_int err; - if (m_update_host_ptr) { - err = m_cmd_host_ptr_update->do_action(); - if (err != CL_COMPLETE) { - return err; - } - } - err = m_cmd_copy.do_action(); - if (err != CL_COMPLETE) { - return err; - } - } - return CL_COMPLETE; } @@ -1560,62 +1546,6 @@ VkBufferImageCopy prepare_buffer_image_copy(const cvk_image* image, return ret; } -cl_int cvk_command_map_image::build(void** map_ptr) { - // Get a mapping - if (!m_image->find_or_create_mapping(m_mapping, m_origin, m_region, m_flags, - m_update_host_ptr)) { - cvk_error("cannot find or create a mapping"); - return CL_OUT_OF_RESOURCES; - } - - *map_ptr = m_mapping.ptr; - - if (needs_copy()) { - m_cmd_copy = std::make_unique( - CL_COMMAND_MAP_IMAGE, m_queue, m_mapping.buffer, m_image, 0, - m_origin, m_region); - - cl_int err = m_cmd_copy->build(); - if (err != CL_SUCCESS) { - return err; - } - - if (m_update_host_ptr && m_image->has_flags(CL_MEM_USE_HOST_PTR)) { - size_t zero_origin[3] = {0, 0, 0}; - m_cmd_host_ptr_update = - std::make_unique( - m_queue, CL_COMMAND_READ_BUFFER_RECT, m_mapping.buffer, - m_image->host_ptr(), m_origin.data(), zero_origin, - m_region.data(), m_image->row_pitch(), - m_image->slice_pitch(), - m_image->map_buffer_row_pitch(m_region), - m_image->map_buffer_slice_pitch(m_region), - m_image->element_size()); - } - } - - return CL_SUCCESS; -} - -cl_int cvk_command_map_image::do_action() { - if (needs_copy()) { - auto err = m_cmd_copy->do_action(); - if (err != CL_COMPLETE) { - return CL_OUT_OF_RESOURCES; - } - - if (m_update_host_ptr) { - if (m_cmd_host_ptr_update->do_action() != CL_COMPLETE) { - return CL_OUT_OF_RESOURCES; - } - } - } - - // TODO invalidate buffer if the memory isn't coherent - - return CL_COMPLETE; -} - void cvk_command_buffer_image_copy::build_inner_image_to_buffer( cvk_command_buffer& cmdbuf, const VkBufferImageCopy& region) { VkImageSubresourceRange subresourceRange = { diff --git a/src/queue.hpp b/src/queue.hpp index 6dcca56b..92e134e9 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -368,7 +368,7 @@ struct cvk_command { cvk_command(cl_command_type type, cvk_command_queue* queue) : m_type(type), m_queue(queue), - m_event(new cvk_event(m_queue->context(), this, queue)) {} + m_event(new cvk_event_command(m_queue->context(), this, queue)) {} virtual ~cvk_command() { m_event->release(); } @@ -435,9 +435,7 @@ struct cvk_command { return status; } - CHECK_RETURN virtual cl_int do_action() = 0; - - cvk_event* event() const { return m_event; } + cvk_event_command* event() const { return m_event; } cl_command_type type() const { return m_type; } @@ -462,9 +460,11 @@ struct cvk_command { protected: cl_command_type m_type; cvk_command_queue_holder m_queue; - cvk_event* m_event; + cvk_event_command* m_event; private: + CHECK_RETURN virtual cl_int do_action() = 0; + std::vector m_event_deps; }; @@ -500,13 +500,13 @@ struct cvk_command_buffer_host_copy final : cvk_command_buffer_base_region(q, type, buffer, offset, size), m_ptr(const_cast(ptr)) {} - CHECK_RETURN cl_int do_action() override final; - const std::vector memory_objects() const override { return {m_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + void* m_ptr; }; @@ -569,13 +569,13 @@ struct cvk_command_copy_host_buffer_rect final : public cvk_command { elem_size), m_buffer(buffer), m_hostptr(hostptr) {} - CHECK_RETURN cl_int do_action() override final; - const std::vector memory_objects() const override final { return {m_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + cvk_rectangle_copier m_copier; cvk_buffer_holder m_buffer; void* m_hostptr; @@ -589,13 +589,13 @@ struct cvk_command_copy_buffer final : public cvk_command { : cvk_command(type, q), m_src_buffer(src), m_dst_buffer(dst), m_src_offset(src_offset), m_dst_offset(dst_offset), m_size(size) {} - CHECK_RETURN cl_int do_action() override final; - const std::vector memory_objects() const override { return {m_src_buffer, m_dst_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + cvk_buffer_holder m_src_buffer; cvk_buffer_holder m_dst_buffer; size_t m_src_offset; @@ -615,13 +615,13 @@ struct cvk_command_copy_buffer_rect final : public cvk_command { src_slice_pitch, dst_row_pitch, dst_slice_pitch, 1), m_src_buffer(src_buffer), m_dst_buffer(dst_buffer) {} - CHECK_RETURN cl_int do_action() override final; - const std::vector memory_objects() const override { return {m_src_buffer, m_dst_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + cvk_rectangle_copier m_copier; cvk_buffer_holder m_src_buffer; cvk_buffer_holder m_dst_buffer; @@ -637,13 +637,13 @@ struct cvk_command_fill_buffer final : public cvk_command_buffer_base_region { memcpy(m_pattern.data(), pattern, pattern_size); } - CHECK_RETURN cl_int do_action() override final; - const std::vector memory_objects() const override { return {m_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + static constexpr int MAX_PATTERN_SIZE = 128; std::array m_pattern; size_t m_pattern_size; @@ -670,8 +670,6 @@ struct cvk_command_batchable : public cvk_command { CHECK_RETURN cl_int build(cvk_command_buffer& cmdbuf); CHECK_RETURN virtual cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) = 0; - CHECK_RETURN cl_int do_action() override; - CHECK_RETURN virtual cl_int do_post_action() { return CL_SUCCESS; } CHECK_RETURN cl_int set_profiling_info_end(cl_ulong sync_dev, cl_ulong sync_host) { @@ -708,6 +706,9 @@ struct cvk_command_batchable : public cvk_command { } private: + CHECK_RETURN virtual cl_int do_post_action() { return CL_SUCCESS; } + CHECK_RETURN cl_int do_action() override; + std::unique_ptr m_command_buffer; VkQueryPool m_query_pool; @@ -767,8 +768,6 @@ struct cvk_command_kernel final : public cvk_command_batchable { CHECK_RETURN cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) override final; - CHECK_RETURN cl_int do_post_action() override final; - bool can_be_batched() const override final { return !m_kernel->uses_printf() && cvk_command_batchable::can_be_batched(); @@ -784,6 +783,8 @@ struct cvk_command_kernel final : public cvk_command_batchable { } private: + CHECK_RETURN cl_int do_post_action() override final; + CHECK_RETURN cl_int build_and_dispatch_regions(cvk_command_buffer& command_buffer); CHECK_RETURN cl_int @@ -808,7 +809,6 @@ struct cvk_command_batch : public cvk_command { cvk_command_batch(cvk_command_queue* queue) : cvk_command(CLVK_COMMAND_BATCH, queue) {} - cl_int do_action() override final; cl_int add_command(cvk_command_batchable* cmd) { if (!m_command_buffer) { // Create command buffer and start recording on first call @@ -878,6 +878,8 @@ struct cvk_command_batch : public cvk_command { } private: + CHECK_RETURN cl_int do_action() override final; + std::vector> m_commands; std::unique_ptr m_command_buffer; cl_ulong m_sync_dev, m_sync_host; @@ -896,13 +898,14 @@ struct cvk_command_map_buffer final : public cvk_command_buffer_base_region { } } CHECK_RETURN cl_int build(void** map_ptr); - CHECK_RETURN cl_int do_action() override final; const std::vector memory_objects() const override { return {m_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + cl_map_flags m_flags; cvk_buffer_mapping m_mapping; bool m_mapping_needs_releasing_on_destruction; @@ -914,13 +917,14 @@ struct cvk_command_unmap_buffer final : public cvk_command_buffer_base { void* map_ptr) : cvk_command_buffer_base(queue, CL_COMMAND_UNMAP_MEM_OBJECT, buffer), m_mapped_ptr(map_ptr) {} - CHECK_RETURN cl_int do_action() override final; const std::vector memory_objects() const override { return {m_buffer}; } private: + CHECK_RETURN cl_int do_action() override final; + void* m_mapped_ptr; }; @@ -928,9 +932,10 @@ struct cvk_command_dep : public cvk_command { cvk_command_dep(cvk_command_queue* q, cl_command_type type) : cvk_command(type, q) {} - CHECK_RETURN cl_int do_action() override final { return CL_COMPLETE; } - const std::vector memory_objects() const override { return {}; } + +private: + CHECK_RETURN cl_int do_action() override final { return CL_COMPLETE; } }; struct cvk_command_buffer_image_copy final : public cvk_command_batchable { @@ -974,137 +979,39 @@ struct cvk_command_buffer_image_copy final : public cvk_command_batchable { cl_command_type m_copy_type; }; -struct cvk_command_combine final : public cvk_command { - cvk_command_combine(cvk_command_queue* queue, cl_command_type type, - std::vector>&& commands) - : cvk_command(type, queue), m_commands(std::move(commands)) {} - - CHECK_RETURN cl_int do_action() override final { - for (auto& cmd : m_commands) { - cl_int ret = cmd->do_action(); - if (ret != CL_COMPLETE) { - return ret; - } - } - - return CL_COMPLETE; - } - const std::vector memory_objects() const override { - std::vector ret; - // Reduce the number of reallocations - ret.reserve(m_commands.size() * 2); - for (auto& cmd : m_commands) { - auto const mems = cmd->memory_objects(); - ret.insert(std::end(ret), std::begin(mems), std::end(mems)); - } - return ret; - } - -private: - std::vector> m_commands; -}; - struct cvk_command_map_image final : public cvk_command { - cvk_command_map_image(cvk_command_queue* q, cvk_image* img, - const std::array& origin, - const std::array& region, - cl_map_flags flags, bool update_host_ptr = false) - : cvk_command(CL_COMMAND_MAP_IMAGE, q), m_image(img), m_origin(origin), - m_region(region), m_flags(flags), - m_update_host_ptr(update_host_ptr && - m_image->has_flags(CL_MEM_USE_HOST_PTR)) {} - - CHECK_RETURN cl_int build(void** map_ptr); - CHECK_RETURN cl_int do_action() override final; - cvk_buffer* map_buffer() { return m_mapping.buffer; } - size_t row_pitch() const { - if (m_update_host_ptr) { - return m_image->row_pitch(); - } else { - return m_image->map_buffer_row_pitch(m_mapping); - } - } - size_t slice_pitch() const { - if (m_update_host_ptr) { - return m_image->slice_pitch(); - } else { - return m_image->map_buffer_slice_pitch(m_mapping); - } - } + // This command intend to manage the dependency with m_image initialization + cvk_command_map_image(cvk_command_queue* q, cvk_image* img) + : cvk_command(CL_COMMAND_MAP_IMAGE, q), m_image(img) {} const std::vector memory_objects() const override { return {m_image}; } private: - bool needs_copy() const { - return (m_flags & CL_MAP_WRITE_INVALIDATE_REGION) == 0; - } + CHECK_RETURN cl_int do_action() override final { return CL_COMPLETE; }; cvk_image_holder m_image; - cvk_image_mapping m_mapping; - std::array m_origin; - std::array m_region; - cl_map_flags m_flags; - std::unique_ptr m_cmd_copy; - std::unique_ptr m_cmd_host_ptr_update; - bool m_update_host_ptr; }; struct cvk_command_unmap_image final : public cvk_command { - cvk_command_unmap_image(cvk_command_queue* q, cvk_image* image, - void* mapptr, bool update_host_ptr = false) - : cvk_command_unmap_image(q, image, mapptr, image->mapping_for(mapptr), - update_host_ptr) { - } // FIXME crashes when the mapping doesn't exist - cvk_command_unmap_image(cvk_command_queue* queue, cvk_image* image, - void* mapped_ptr, const cvk_image_mapping& mapping, - bool update_host_ptr) + void* mapped_ptr) : cvk_command(CL_COMMAND_UNMAP_MEM_OBJECT, queue), - m_needs_copy((mapping.flags & - (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) != 0), - m_mapped_ptr(mapped_ptr), m_image(image), - m_cmd_copy(CL_COMMAND_UNMAP_MEM_OBJECT, queue, mapping.buffer, image, - 0, mapping.origin, mapping.region), - m_update_host_ptr(update_host_ptr && - m_image->has_flags(CL_MEM_USE_HOST_PTR)) {} - cl_int build() { - if (m_needs_copy) { - auto err = m_cmd_copy.build(); - if (err != CL_SUCCESS) { - return err; - } - if (m_update_host_ptr) { - size_t zero_origin[3] = {0, 0, 0}; - auto const& mapping = m_image->mapping_for(m_mapped_ptr); - m_cmd_host_ptr_update = - std::make_unique( - m_queue, CL_COMMAND_WRITE_BUFFER_RECT, mapping.buffer, - m_image->host_ptr(), mapping.origin.data(), zero_origin, - mapping.region.data(), m_image->row_pitch(), - m_image->slice_pitch(), - m_image->map_buffer_row_pitch(mapping), - m_image->map_buffer_slice_pitch(mapping), - m_image->element_size()); - } - } - return CL_SUCCESS; + m_mapped_ptr(mapped_ptr), + m_image(image) { // FIXME crashes when the mapping doesn't exist } - CHECK_RETURN cl_int do_action() override final; const std::vector memory_objects() const override final { return {m_image}; } private: - bool m_needs_copy; + CHECK_RETURN cl_int do_action() override final; + void* m_mapped_ptr; cvk_image_holder m_image; - cvk_command_buffer_image_copy m_cmd_copy; - std::unique_ptr m_cmd_host_ptr_update; - bool m_update_host_ptr; }; struct cvk_command_image_image_copy final : public cvk_command_batchable { @@ -1141,11 +1048,12 @@ struct cvk_command_fill_image final : public cvk_command { const std::array& region) : cvk_command(CL_COMMAND_FILL_IMAGE, queue), m_ptr(ptr), m_pattern(pattern), m_pattern_size(pattern_size), m_region(region) {} - CHECK_RETURN cl_int do_action() override final; const std::vector memory_objects() const override { return {}; } private: + CHECK_RETURN cl_int do_action() override final; + void* m_ptr; cvk_image::fill_pattern_array m_pattern; size_t m_pattern_size;