diff --git a/CMakeLists.txt b/CMakeLists.txt index 3180245d9..c95169c21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(SAMRAI C CXX Fortran) set(SAMRAI_VERSION_MAJOR 4) set(SAMRAI_VERSION_MINOR 2) -set(SAMRAI_VERSION_PATCHLEVEL 0) +set(SAMRAI_VERSION_PATCHLEVEL 1) set(SAMRAI_VERSION "${SAMRAI_VERSION_MAJOR}.${SAMRAI_VERSION_MINOR}.${SAMRAI_VERSION_PATCHLEVEL}") diff --git a/INSTALL-NOTES b/INSTALL-NOTES index 7ea7ef0fa..eb68c6e76 100644 --- a/INSTALL-NOTES +++ b/INSTALL-NOTES @@ -296,9 +296,9 @@ doxygen 1.6.1 graphviz 2.26 -Umpire v2022.03.1 +Umpire v2023.06.0 -RAJA v2022.03.1 +RAJA v2023.06.0 Visualization Tools: diff --git a/RELEASE-NOTES b/RELEASE-NOTES index b3e94f617..37aa43e8a 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -4,7 +4,7 @@ All rights reserved. ***************************************************************************** - Release Notes for SAMRAI v4.2.0 + Release Notes for SAMRAI v4.2.1 (notes for previous releases may be found in /SAMRAI/docs/release) @@ -22,6 +22,14 @@ https://github.com/LLNL/SAMRAI ***************************************************************************** +VERSION 4.2.1 + +Version 4.2.1 is a minor patch release to make SAMRAI compatible with +Umpire v2023.06.0 and RAJA v2023.06.0. The content of the release notes +file for SAMRAI version 4.2.0 is preserved here, except for notes marked +as version 4.2.1 + + VERSION 4.2.0 Version 4.2.0 is considered a beta release due to the introduction of @@ -53,12 +61,14 @@ GPU kernels to be accumulated for a single kernel launch. 2) A minimum_patch_load parameter has been added to CascadePartitioner and TreeLoadBalancer as an option to change how small patches are treated during load balancing. - + ----------------------------------------------------------------------------- Summary of what's changed ----------------------------------------------------------------------------- -1) +1) VERSION 4.2.1 Minor changes have been made for compatibility with RAJA +v2023.06.0 and Umpire v2023.06.0, and those are the recommended releases +for those libraries for use with this SAMRAI release. ***************************************************************************** @@ -108,6 +118,10 @@ count may be more desired than uniformity in cell count. Details about what's changed ---------------------------------------------------------------------------- +3) VERSION 4.2.1 Minor changes have been made for compatibility with RAJA +v2023.06.0 and Umpire v2023.06.0, and those are the recommended releases +for those libraries for use with this SAMRAI release. + ============================================================================= ============================================================================= diff --git a/docs/release/version-4.2.0 b/docs/release/version-4.2.0 new file mode 100644 index 000000000..b3e94f617 --- /dev/null +++ b/docs/release/version-4.2.0 @@ -0,0 +1,113 @@ +***************************************************************************** + Copyright 1997-2023 + Lawrence Livermore National Security, LLC. + All rights reserved. +***************************************************************************** + + Release Notes for SAMRAI v4.2.0 + + (notes for previous releases may be found in /SAMRAI/docs/release) + +***************************************************************************** + + +Where to report Bugs +-------------------- + +If a bug is found in the SAMRAI library, we ask that you kindly report +it to us so that we may fix it. + +Please send email to samrai-bugs@llnl.gov or post an issue on github. +https://github.com/LLNL/SAMRAI + +***************************************************************************** + +VERSION 4.2.0 + +Version 4.2.0 is considered a beta release due to the introduction of +kernel fusion features which may see notable changes in future releases. + + +***************************************************************************** + +---------------------------------------------------------------------------- + Significant bug fixes +---------------------------------------------------------------------------- + +1) A bug in the internal computation of connector widths in +TimeRefinementIntegrator was fixed. There was an incorrect computation of +connector width between adjacent levels when a large tag buffer size was +provided to the input. This computation has been fixed. + +***************************************************************************** + + + +---------------------------------------------------------------------------- + Summary of what's new +----------------------------------------------------------------------------- + +1) RAJA-based kernel fusion features have been added, allowing for independent +GPU kernels to be accumulated for a single kernel launch. + +2) A minimum_patch_load parameter has been added to CascadePartitioner and +TreeLoadBalancer as an option to change how small patches are treated during +load balancing. + +----------------------------------------------------------------------------- + Summary of what's changed +----------------------------------------------------------------------------- + +1) + + +***************************************************************************** + +----------------------------------------------------------------------------- + Details about what's new +----------------------------------------------------------------------------- + +1) RAJA-based kernel fusion features have been added, allowing for independent +GPU kernels to be accumulated for a single kernel launch. + +The core of these features are in the new tbox::KernelFuser class, which uses +RAJA WorkGroup features to enqueue a set of independent kernels that have been +defined as lambda functions in RAJA for_alls. Rather than executing each +kernel on the GPU device when it is reached in the code, the kernels are +stored until the KernelFuser object makes a launch() call, at which time all +kernels are executed concurrently. The intent of this is to reduce the +overhead from launching each kernel separately. + +To support usage of kernel fusion, a new abstract base class +tbox::ScheduleOpsStrategy has been added, with methods that are called from +tbox::Schedule at places where it could be useful for applications to make +calls for kernel fusion operations in their codes. In particular, the calls +to postPack(), postCopy(), and postUnpack() are provided so that +applications can implement calls to kernel fusion launches after the original +calls to data packing, copy, and unpacking operations have enqueued rather +than launched the kernels that do those operations. ScheuduleOpsStrategy +is defined generally without reference to kernel fusion, as applications +could choose to implement other things for their codes to do before and after +Schedule operations. A pointer to a ScheduleOpsStrategy can be provided +using set methods available in RefineSchedule, CoarsenSchedule, or Schedule. + +2) A minimum_patch_load parameter has been added to CascadePartitioner and +TreeLoadBalancer as an option to change how small patches are treated during +load balancing. + +The value given for minimum_patch_load is used to "fool" the load balancing +algorithm into treating small patches with a cell count below the given value +as if they were the size of the given value. This can reduce the likelihood +that the load balancers will accumulate a large number of small patches on +on a single rank, with a known side effect being that the decomposition will +have less uniformity in total cell count on each processor. This is provided +as an option for users running in environments where uniformity in patch +count may be more desired than uniformity in cell count. + +----------------------------------------------------------------------------- + Details about what's changed +---------------------------------------------------------------------------- + + +============================================================================= +============================================================================= diff --git a/source/SAMRAI/tbox/AllocatorDatabase.cpp b/source/SAMRAI/tbox/AllocatorDatabase.cpp index 79bb600ff..000314056 100644 --- a/source/SAMRAI/tbox/AllocatorDatabase.cpp +++ b/source/SAMRAI/tbox/AllocatorDatabase.cpp @@ -70,14 +70,14 @@ AllocatorDatabase::initialize() if (!rm.isAllocator("samrai::data_allocator")) { #if defined(HAVE_CUDA) // Internal pool for allocations -#if 1 +#if 0 auto allocator = rm.makeAllocator( "internal::samrai::um_allocation_advisor", rm.getAllocator(umpire::resource::Unified), // Set preferred location to GPU - "PREFERRED_LOCATION"); + "SET_PREFERRED_LOCATION"); #endif - //auto allocator = rm.getAllocator(umpire::resource::Pinned); + auto allocator = rm.getAllocator(umpire::resource::Pinned); #else auto allocator = rm.getAllocator(umpire::resource::Host); #endif diff --git a/source/SAMRAI/tbox/CommGraphWriter.h b/source/SAMRAI/tbox/CommGraphWriter.h index f19454c46..df1fcab60 100644 --- a/source/SAMRAI/tbox/CommGraphWriter.h +++ b/source/SAMRAI/tbox/CommGraphWriter.h @@ -114,16 +114,6 @@ class CommGraphWriter size_t record_number, std::ostream& os) const; -private: - // Unimplemented copy constructor. - CommGraphWriter( - const CommGraphWriter& other); - - // Unimplemented assignment operator. - CommGraphWriter& - operator = ( - const CommGraphWriter& rhs); - struct Edge { Edge():d_value(0.0), d_dir(TO), @@ -135,6 +125,17 @@ class CommGraphWriter std::string d_label; }; + +private: + // Unimplemented copy constructor. + CommGraphWriter( + const CommGraphWriter& other); + + // Unimplemented assignment operator. + CommGraphWriter& + operator = ( + const CommGraphWriter& rhs); + struct NodeValue { NodeValue():d_value(0.0) { }