Skip to content

Commit

Permalink
Merge pull request #12758 from Akshay-Venkatesh/topic/main/reduce-loc…
Browse files Browse the repository at this point in the history
…al-impl

ompi/coll/accelerator: implement reduce_local
  • Loading branch information
bosilca committed Sep 5, 2024
2 parents 1afb524 + 7f6f788 commit e25e897
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
6 changes: 6 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -45,6 +46,11 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
mca_coll_base_module_t *module);

int mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
Expand Down
4 changes: 4 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator_module.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -94,6 +95,7 @@ mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm,

accelerator_module->super.coll_allreduce = mca_coll_accelerator_allreduce;
accelerator_module->super.coll_reduce = mca_coll_accelerator_reduce;
accelerator_module->super.coll_reduce_local = mca_coll_accelerator_reduce_local;
accelerator_module->super.coll_reduce_scatter_block = mca_coll_accelerator_reduce_scatter_block;
if (!OMPI_COMM_IS_INTER(comm)) {
accelerator_module->super.coll_scan = mca_coll_accelerator_scan;
Expand Down Expand Up @@ -141,6 +143,7 @@ mca_coll_accelerator_module_enable(mca_coll_base_module_t *module,

ACCELERATOR_INSTALL_COLL_API(comm, s, allreduce);
ACCELERATOR_INSTALL_COLL_API(comm, s, reduce);
ACCELERATOR_INSTALL_COLL_API(comm, s, reduce_local);
ACCELERATOR_INSTALL_COLL_API(comm, s, reduce_scatter_block);
if (!OMPI_COMM_IS_INTER(comm)) {
/* MPI does not define scan/exscan on intercommunicators */
Expand All @@ -159,6 +162,7 @@ mca_coll_accelerator_module_disable(mca_coll_base_module_t *module,

ACCELERATOR_UNINSTALL_COLL_API(comm, s, allreduce);
ACCELERATOR_UNINSTALL_COLL_API(comm, s, reduce);
ACCELERATOR_UNINSTALL_COLL_API(comm, s, reduce_local);
ACCELERATOR_UNINSTALL_COLL_API(comm, s, reduce_scatter_block);
if (!OMPI_COMM_IS_INTER(comm))
{
Expand Down
58 changes: 58 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator_reduce.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2004-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -84,3 +85,60 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
}
return rc;
}

int
mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
mca_coll_base_module_t *module)
{
ptrdiff_t gap;
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
size_t bufsize;
int rc;

bufsize = opal_datatype_span(&dtype->super, count, &gap);

rc = mca_coll_accelerator_check_buf((void *)sbuf);
if (rc < 0) {
return rc;
}

if ((MPI_IN_PLACE != sbuf) && (rc > 0)) {
sbuf1 = (char*)malloc(bufsize);
if (NULL == sbuf1) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
sbuf = sbuf1 - gap;
}

rc = mca_coll_accelerator_check_buf(rbuf);
if (rc < 0) {
return rc;
}

if (rc > 0) {
rbuf1 = (char*)malloc(bufsize);
if (NULL == rbuf1) {
if (NULL != sbuf1) free(sbuf1);
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - gap;
}

ompi_op_reduce(op, (void *)sbuf, rbuf, count, dtype);
rc = OMPI_SUCCESS;

if (NULL != sbuf1) {
free(sbuf1);
}
if (NULL != rbuf1) {
rbuf = rbuf2;
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
free(rbuf1);
}
return rc;
}

0 comments on commit e25e897

Please sign in to comment.