Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ jobs:
CAF_IMAGES=$(( CAF_IMAGES / 2 )) ; \
done

- name: Run exit tests
- name: Run exit/failure tests
run: |
echo CAF_IMAGES=${CAF_IMAGES}
set -x
Expand All @@ -301,6 +301,14 @@ jobs:
./run-fpm.sh run --verbose --example fail_image 2>&1 | tee output ; \
test ${PIPESTATUS[0]} > 0 && grep -q "FAIL IMAGE" output \
)
( set +e ; \
./run-fpm.sh run --verbose --example out_of_memory 2>&1 | tee output ; \
test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \
)
( set +e ; \
./run-fpm.sh run --verbose --example out_of_memory -- --coarray 2>&1 | tee output ; \
test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \
)
unset GASNET_SPAWN_VERBOSE
for ((i=1; i<=4; i++)); do \
(set +e ; \
Expand Down
5 changes: 0 additions & 5 deletions docs/implementation-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ the labels in the Caffeine [issue tracker](https://github.com/BerkeleyLab/caffei
Caffeine contains definitions for all of the PRIF-relevant constants from ISO_FORTRAN_ENV and for
all of the PRIF-specific constants.

## `stat` and `errmsg` support

Many PRIF procedures have optional arguments `stat`, `errmsg`, and `errmsg_alloc`. These arguments
are accepted, but in some cases, the associated runtime behavior is not fully implemented.

## Program Startup and Shutdown

| Procedure | Status | Notes |
Expand Down
53 changes: 53 additions & 0 deletions example/support-test/out_of_memory.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
program out_of_memory
use iso_c_binding, only: c_bool, c_size_t, c_ptr, c_null_funptr, c_int64_t
use prif
implicit none

integer :: init_exit_code, me, i
integer(c_size_t) :: size_in_bytes = ishft(500_c_size_t, 40)
type(c_ptr) :: allocated_memory
logical :: coarray = .false.
character(len=256) :: arg

call prif_init(init_exit_code)
if (init_exit_code /= 0 .and. init_exit_code /= PRIF_STAT_ALREADY_INIT) then
call prif_error_stop(quiet=.false._c_bool, stop_code_char="program startup failed")
end if
call prif_this_image_no_coarray(this_image=me)

do i = 1, command_argument_count()
call get_command_argument(i, arg)

if (trim(arg) == "--coarray" .or. trim(arg) == "-c") then
coarray = .true.
else
read(arg, *) size_in_bytes
end if
end do

if (coarray) then
if (me == 1) print *, "prif_allocate_coarray: ", size_in_bytes, " bytes"
block
integer(c_int64_t), dimension(1) :: lcobounds, ucobounds
integer :: num_imgs
type(prif_coarray_handle) :: coarray_handle

call prif_num_images(num_images=num_imgs)
lcobounds(1) = 1
ucobounds(1) = num_imgs

call prif_allocate_coarray( &
lcobounds, ucobounds, size_in_bytes, c_null_funptr, &
coarray_handle, allocated_memory)
end block
else
if (me == 1) print *, "prif_allocate: ", size_in_bytes, " bytes"
call prif_sync_all()
call prif_allocate(size_in_bytes, allocated_memory)
end if


call prif_sync_all()
call prif_error_stop(quiet=.false._c_bool, stop_code_char="test failed")

end program
88 changes: 63 additions & 25 deletions src/caffeine/allocation_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,26 @@
descriptor_size = c_sizeof(unused)
total_size = descriptor_size + size_in_bytes
whole_block = caf_allocate(current_team%info%heap_mspace, total_size)
block_offset = as_int(whole_block) - current_team%info%heap_start
if (.not. c_associated(whole_block)) then
block_offset = -1 ! out of memory
else
block_offset = as_int(whole_block) - current_team%info%heap_start
end if
else
block_offset = 0
end if
call prif_sync_memory ! end the current segment
! Use a co_sum to aggregate broadcasing the information from image 1
! together with the team barrier spec-required by coarray allocation
call prif_co_sum(block_offset)
if (block_offset == -1) then ! out of memory - abort allocation attempt
call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .true.), &
stat, errmsg, errmsg_alloc)
if (caf_have_child_teams()) then ! unroll state change above before return
call caf_establish_child_heap
end if
return
end if
if (me /= 1) whole_block = as_c_ptr(current_team%info%heap_start + block_offset)

call c_f_pointer(whole_block, coarray_handle%info)
Expand All @@ -70,9 +82,51 @@
end procedure

module procedure prif_allocate
allocated_memory = caf_allocate(non_symmetric_heap_mspace, size_in_bytes)
type(c_ptr) :: mem

mem = caf_allocate(non_symmetric_heap_mspace, size_in_bytes)
if (.not. c_associated(mem)) then
call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .false.), &
stat, errmsg, errmsg_alloc)
else
allocated_memory = mem
end if
end procedure

function out_of_memory_message(size_in_bytes, symmetric) result(message)
integer(c_size_t), intent(in) :: size_in_bytes
logical, intent(in) :: symmetric
character(len=:), allocatable :: mem_type
character(len=:), allocatable :: message

message = "Fortran shared heap is out of memory"
if (symmetric) then
mem_type = "coarray"
else
message = message // " on image " // num_to_str(initial_team%this_image)
mem_type = "non-coarray"
end if
message = message // new_line('') &
// " while allocating " // num_to_str(size_in_bytes, .true.) // " of additional " &
// mem_type // " memory." // new_line('') &
// new_line('') &
// " Shared heap size information:" // new_line('') &
// " Total shared heap: " // pad(num_to_str(total_heap_size, .true.)) &
// " (CAF_HEAP_SIZE)" // new_line('') &
// " Total non-coarray heap: " // pad(num_to_str(non_symmetric_heap_size, .true.)) &
// " (CAF_COMP_FRAC * CAF_HEAP_SIZE)" // new_line('') &
// " Current team coarray heap: " // pad(num_to_str(current_team%info%heap_size, .true.)) // new_line('') &
// new_line('') &
// " Consider setting the CAF_HEAP_SIZE environment variable to request a larger heap."
contains
function pad(str) result(s)
character(len=*), intent(in) :: str
character(len=:), allocatable :: s
s = str
s = repeat(' ',max(0, 10 - len(str))) // s
end function
end function

#if CAF_PRIF_VERSION <= 6
module procedure prif_deallocate_coarray
#else
Expand All @@ -82,7 +136,6 @@
module procedure prif_deallocate_coarrays
#endif
integer :: i, num_handles
character(len=*), parameter :: unallocated_message = "Attempted to deallocate unallocated coarray"
type(prif_coarray_handle), target :: coarray_handle
# if HAVE_FINAL_FUNC_SUPPORT
abstract interface
Expand All @@ -102,17 +155,9 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C)
call prif_sync_all ! Need to ensure we don't deallocate anything till everyone gets here
num_handles = size(coarray_handles)
if (.not. all([(associated(coarray_handles(i)%info), i = 1, num_handles)])) then
if (present(stat)) then
stat = 1 ! TODO: decide what our stat codes should be
if (present(errmsg)) then
errmsg = unallocated_message
else if (present(errmsg_alloc)) then
errmsg_alloc = unallocated_message
end if
return
else
call prif_error_stop(.false._c_bool, stop_code_char=unallocated_message)
end if
call report_error(CAF_STAT_INVALID_ARGUMENT, "Attempted to deallocate unallocated coarray", &
stat, errmsg, errmsg_alloc)
return
end if
call_assert(all(coarray_handle_check(coarray_handles)))

Expand All @@ -129,17 +174,9 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C)
if (.not. allocated(local_errmsg)) then ! provide a default errmsg
local_errmsg = "coarray_cleanup finalization callback failed"
end if
if (present(stat)) then
stat = local_stat
if (present(errmsg)) then
errmsg = local_errmsg
else if (present(errmsg_alloc)) then
call move_alloc(local_errmsg, errmsg_alloc)
end if
return ! NOTE: We no longer have guarantees that coarrays are in consistent state
else
call prif_error_stop(.false._c_bool, stop_code_char=local_errmsg)
end if
call report_error(local_stat, local_errmsg, &
stat, errmsg, errmsg_alloc)
return ! NOTE: We no longer have guarantees that coarrays are in consistent state
end if
# else
! TODO: issue a warning that we are ignoring the final_func?
Expand All @@ -164,6 +201,7 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C)

module procedure prif_deallocate
call caf_deallocate(non_symmetric_heap_mspace, mem)
if (present(stat)) stat = 0
end procedure

subroutine add_to_team_list(coarray_handle)
Expand Down
40 changes: 30 additions & 10 deletions src/caffeine/caffeine.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ int caf_image_from_initial(gex_TM_t tm, int image_num) {
// ---------------------------------------------------
// NOTE: gex_TM_T is a typedef to a C pointer, so the `gex_TM_t* initial_team` arg in the C signature matches the BIND(C) interface of an `intent(out)` arg of type `c_ptr` for the same argument
void caf_caffeinate(
intptr_t* total_heap_size,
mspace* symmetric_heap,
intptr_t* symmetric_heap_start,
intptr_t* symmetric_heap_size,
Expand All @@ -90,41 +91,63 @@ void caf_caffeinate(
numprocs = gex_TM_QuerySize(myworldteam);
*initial_team = myworldteam;

#define PAGE_ALIGNUP(sz) ((sz + GASNET_PAGESIZE - 1) & ~(GASNET_PAGESIZE-1))

// query largest possible segment GASNet can give us of the same size across all processes:
size_t max_seg = gasnet_getMaxGlobalSegmentSize();
uintptr_t max_seg = gasnet_getMaxGlobalSegmentSize();
// impose a reasonable default size
#ifndef CAF_DEFAULT_HEAP_SIZE
#define CAF_DEFAULT_HEAP_SIZE (128*1024*1024) // 128 MiB
#endif
size_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE);
uintptr_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE);
// retrieve user preference, defaulting to the above and units of MiB
size_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE",
uintptr_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE",
default_seg, 1024*1024);
// ensure at least two full pages
segsz = MAX(segsz,2*GASNET_PAGESIZE);
// round-up to closest page size
segsz = PAGE_ALIGNUP(segsz);
// cap user request to the largest available:
// TODO: issue a console warning here instead of silently capping
segsz = MIN(segsz,max_seg);
assert(segsz % GASNET_PAGESIZE == 0);

GASNET_SAFE(gex_Segment_Attach(&mysegment, myworldteam, segsz));

*symmetric_heap_start = (intptr_t)gex_Segment_QueryAddr(mysegment);
size_t total_heap_size = gex_Segment_QuerySize(mysegment);
*total_heap_size = gex_Segment_QuerySize(mysegment);
assert(*total_heap_size >= 2*GASNET_PAGESIZE);

#ifndef CAF_DEFAULT_COMP_FRAC
#define CAF_DEFAULT_COMP_FRAC 0.1f // 10%
#endif
float default_comp_frac = MAX(MIN(0.99f, CAF_DEFAULT_COMP_FRAC), 0.01f);
float non_symmetric_fraction = gasnett_getenv_dbl_withdefault("CAF_COMP_FRAC", default_comp_frac);
assert(non_symmetric_fraction > 0 && non_symmetric_fraction < 1); // TODO: real error reporting
if (non_symmetric_fraction <= 0 || non_symmetric_fraction >= 1) {
gasnett_fatalerror_nopos("If used, environment variable 'CAF_COMP_FRAC' must be a valid floating point value or fraction between 0 and 1.");
}

size_t non_symmetric_heap_size = total_heap_size * non_symmetric_fraction;
*symmetric_heap_size = total_heap_size - non_symmetric_heap_size;
uintptr_t non_symmetric_heap_size = *total_heap_size * non_symmetric_fraction;
non_symmetric_heap_size = PAGE_ALIGNUP(non_symmetric_heap_size);
*symmetric_heap_size = *total_heap_size - non_symmetric_heap_size;
if (*symmetric_heap_size == 0) {
assert(non_symmetric_heap_size > GASNET_PAGESIZE);
non_symmetric_heap_size -= GASNET_PAGESIZE;
*symmetric_heap_size += GASNET_PAGESIZE;
}
assert(non_symmetric_heap_size > 0);
assert(non_symmetric_heap_size % GASNET_PAGESIZE == 0);
assert(*symmetric_heap_size > 0);
assert(*symmetric_heap_size % GASNET_PAGESIZE == 0);
intptr_t non_symmetric_heap_start = *symmetric_heap_start + *symmetric_heap_size;

if (myproc == 0) {
*symmetric_heap = create_mspace_with_base((void*)*symmetric_heap_start, *symmetric_heap_size, 0);
assert(*symmetric_heap);
mspace_set_footprint_limit(*symmetric_heap, *symmetric_heap_size);
}
*non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0);
assert(*non_symmetric_heap);
mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size);

// init various subsystems:
Expand Down Expand Up @@ -160,9 +183,6 @@ void caf_fatal_error( const CFI_cdesc_t* Fstr )
void* caf_allocate(mspace heap, size_t bytes)
{
void* allocated_space = mspace_memalign(heap, 8, bytes);
if (!allocated_space) // uh-oh, something went wrong..
gasnett_fatalerror("caf_allocate failed to mspace_memalign(%"PRIuSZ")",
bytes);
return allocated_space;
}

Expand Down
2 changes: 2 additions & 0 deletions src/caffeine/coarray_access_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
dest = current_image_buffer, &
src = remote_ptr, &
size = size_in_bytes)

if (present(stat)) stat = 0
end procedure

! _______________________ Strided Get RMA ____________________________
Expand Down
8 changes: 7 additions & 1 deletion src/caffeine/coarray_queries_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,14 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index)

module procedure prif_initial_team_index
call initial_index_helper(coarray_handle, sub, current_team, initial_team_index)

if (present(stat)) stat = 0
end procedure

module procedure prif_initial_team_index_with_team
call initial_index_helper(coarray_handle, sub, team, initial_team_index)

if (present(stat)) stat = 0
end procedure

module procedure prif_initial_team_index_with_team_number
Expand All @@ -152,7 +156,9 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index)
call initial_index_helper(coarray_handle, sub, current_team, initial_team_index)
else
call unimplemented("prif_initial_team_index_with_team_number: no support for sibling teams")
end if
end if

if (present(stat)) stat = 0
end procedure

!---------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions src/caffeine/critical_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

module procedure prif_critical
call unimplemented("prif_critical")

if (present(stat)) stat = 0
end procedure

module procedure prif_end_critical
Expand Down
8 changes: 8 additions & 0 deletions src/caffeine/locks_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,26 @@

module procedure prif_lock
call unimplemented("prif_lock")

if (present(stat)) stat = 0
end procedure

module procedure prif_lock_indirect
call unimplemented("prif_lock_indirect")

if (present(stat)) stat = 0
end procedure

module procedure prif_unlock
call unimplemented("prif_unlock")

if (present(stat)) stat = 0
end procedure

module procedure prif_unlock_indirect
call unimplemented("prif_unlock_indirect")

if (present(stat)) stat = 0
end procedure

end submodule locks_s
Loading