|
1 | 1 | #ifdef cl_khr_int64_base_atomics |
2 | 2 | #pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable |
3 | 3 |
|
4 | | - long numba_dppl_atomic_add_i64(volatile __generic long *p, long val, int type) { |
| 4 | + long numba_dppl_atomic_add_i64_local(volatile __generic long *p, long val) { |
5 | 5 | long found = *p; |
6 | 6 | long expected; |
7 | 7 | do { |
8 | 8 | expected = found; |
9 | | - if (type == 1) { /* The address qualifier should be __local */ |
10 | | - found = atom_cmpxchg((volatile __local ulong *)p, expected, expected + val); |
11 | | - } else { |
12 | | - found = atom_cmpxchg((volatile __global ulong *)p, expected, expected + val); |
13 | | - } |
| 9 | + found = atom_cmpxchg((volatile __local ulong *)p, expected, expected + val); |
14 | 10 | } while (found != expected); |
15 | 11 | return found; |
16 | 12 | } |
17 | 13 |
|
18 | | - long numba_dppl_atomic_sub_i64(volatile __generic long *p, long val, int type) { |
| 14 | + long numba_dppl_atomic_add_i64_global(volatile __generic long *p, long val) { |
19 | 15 | long found = *p; |
20 | 16 | long expected; |
21 | 17 | do { |
22 | 18 | expected = found; |
23 | | - if (type == 1) { /* The address qualifier should be __local */ |
24 | | - found = atom_cmpxchg((volatile __local ulong *)p, expected, expected - val); |
25 | | - } else { |
26 | | - found = atom_cmpxchg((volatile __global ulong *)p, expected, expected - val); |
27 | | - } |
| 19 | + found = atom_cmpxchg((volatile __global ulong *)p, expected, expected + val); |
| 20 | + } while (found != expected); |
| 21 | + return found; |
| 22 | + } |
| 23 | + |
| 24 | + long numba_dppl_atomic_sub_i64_local(volatile __generic long *p, long val) { |
| 25 | + long found = *p; |
| 26 | + long expected; |
| 27 | + do { |
| 28 | + expected = found; |
| 29 | + found = atom_cmpxchg((volatile __local ulong *)p, expected, expected - val); |
| 30 | + } while (found != expected); |
| 31 | + return found; |
| 32 | + } |
| 33 | + |
| 34 | + long numba_dppl_atomic_sub_i64_global(volatile __generic long *p, long val) { |
| 35 | + long found = *p; |
| 36 | + long expected; |
| 37 | + do { |
| 38 | + expected = found; |
| 39 | + found = atom_cmpxchg((volatile __global ulong *)p, expected, expected - val); |
28 | 40 | } while (found != expected); |
29 | 41 | return found; |
30 | 42 | } |
31 | 43 |
|
32 | 44 | #ifdef cl_khr_fp64 |
33 | 45 | #pragma OPENCL EXTENSION cl_khr_fp64: enable |
34 | 46 |
|
35 | | - double numba_dppl_atomic_cmpxchg_f64(volatile __generic double *p, double cmp, double val, int type) { |
| 47 | + double numba_dppl_atomic_cmpxchg_f64_local(volatile __generic double *p, double cmp, double val) { |
36 | 48 | union { |
37 | 49 | ulong u64; |
38 | 50 | double f64; |
39 | 51 | } cmp_union, val_union, old_union; |
40 | 52 |
|
41 | 53 | cmp_union.f64 = cmp; |
42 | 54 | val_union.f64 = val; |
43 | | - if (type == 1) { /* The address qualifier should be __local */ |
44 | | - old_union.u64 = atom_cmpxchg((volatile __local ulong *) p, cmp_union.u64, val_union.u64); |
45 | | - } else { |
46 | | - old_union.u64 = atom_cmpxchg((volatile __global ulong *) p, cmp_union.u64, val_union.u64); |
47 | | - } |
| 55 | + old_union.u64 = atom_cmpxchg((volatile __local ulong *) p, cmp_union.u64, val_union.u64); |
48 | 56 | return old_union.f64; |
49 | 57 | } |
50 | 58 |
|
51 | | - double numba_dppl_atomic_add_f64(volatile __generic double *p, double val, int type) { |
| 59 | + double numba_dppl_atomic_cmpxchg_f64_global(volatile __generic double *p, double cmp, double val) { |
| 60 | + union { |
| 61 | + ulong u64; |
| 62 | + double f64; |
| 63 | + } cmp_union, val_union, old_union; |
| 64 | + |
| 65 | + cmp_union.f64 = cmp; |
| 66 | + val_union.f64 = val; |
| 67 | + old_union.u64 = atom_cmpxchg((volatile __global ulong *) p, cmp_union.u64, val_union.u64); |
| 68 | + return old_union.f64; |
| 69 | + } |
| 70 | + |
| 71 | + double numba_dppl_atomic_add_f64_local(volatile __generic double *p, double val) { |
| 72 | + double found = *p; |
| 73 | + double expected; |
| 74 | + do { |
| 75 | + expected = found; |
| 76 | + found = numba_dppl_atomic_cmpxchg_f64_local(p, expected, expected + val); |
| 77 | + } while (found != expected); |
| 78 | + return found; |
| 79 | + } |
| 80 | + |
| 81 | + double numba_dppl_atomic_add_f64_global(volatile __generic double *p, double val) { |
52 | 82 | double found = *p; |
53 | 83 | double expected; |
54 | 84 | do { |
55 | 85 | expected = found; |
56 | | - found = numba_dppl_atomic_cmpxchg_f64(p, expected, expected + val, type); |
| 86 | + found = numba_dppl_atomic_cmpxchg_f64_global(p, expected, expected + val); |
57 | 87 | } while (found != expected); |
58 | 88 | return found; |
59 | 89 | } |
60 | 90 |
|
61 | | - double numba_dppl_atomic_sub_f64(volatile __generic double *p, double val, int type) { |
| 91 | + |
| 92 | + double numba_dppl_atomic_sub_f64_local(volatile __generic double *p, double val) { |
62 | 93 | double found = *p; |
63 | 94 | double expected; |
64 | 95 | do { |
65 | 96 | expected = found; |
66 | | - found = numba_dppl_atomic_cmpxchg_f64(p, expected, expected - val, type); |
| 97 | + found = numba_dppl_atomic_cmpxchg_f64_local(p, expected, expected - val); |
| 98 | + } while (found != expected); |
| 99 | + return found; |
| 100 | + } |
| 101 | + |
| 102 | + double numba_dppl_atomic_sub_f64_global(volatile __generic double *p, double val) { |
| 103 | + double found = *p; |
| 104 | + double expected; |
| 105 | + do { |
| 106 | + expected = found; |
| 107 | + found = numba_dppl_atomic_cmpxchg_f64_global(p, expected, expected - val); |
67 | 108 | } while (found != expected); |
68 | 109 | return found; |
69 | 110 | } |
70 | 111 | #endif |
71 | 112 | #endif |
72 | 113 |
|
73 | | -float numba_dppl_atomic_cmpxchg_f32(volatile __generic float *p, float cmp, float val, int type) { |
| 114 | +float numba_dppl_atomic_cmpxchg_f32_local(volatile __generic float *p, float cmp, float val) { |
74 | 115 | union { |
75 | 116 | unsigned int u32; |
76 | 117 | float f32; |
77 | 118 | } cmp_union, val_union, old_union; |
78 | 119 |
|
79 | 120 | cmp_union.f32 = cmp; |
80 | 121 | val_union.f32 = val; |
81 | | - if (type == 1) { /* The address qualifier should be __local */ |
82 | | - old_union.u32 = atomic_cmpxchg((volatile __local unsigned int *) p, cmp_union.u32, val_union.u32); |
83 | | - } else { |
84 | | - old_union.u32 = atomic_cmpxchg((volatile __global unsigned int *) p, cmp_union.u32, val_union.u32); |
85 | | - } |
| 122 | + old_union.u32 = atomic_cmpxchg((volatile __local unsigned int *) p, cmp_union.u32, val_union.u32); |
| 123 | + return old_union.f32; |
| 124 | +} |
| 125 | + |
| 126 | +float numba_dppl_atomic_cmpxchg_f32_global(volatile __generic float *p, float cmp, float val) { |
| 127 | + union { |
| 128 | + unsigned int u32; |
| 129 | + float f32; |
| 130 | + } cmp_union, val_union, old_union; |
| 131 | + |
| 132 | + cmp_union.f32 = cmp; |
| 133 | + val_union.f32 = val; |
| 134 | + old_union.u32 = atomic_cmpxchg((volatile __global unsigned int *) p, cmp_union.u32, val_union.u32); |
86 | 135 | return old_union.f32; |
87 | 136 | } |
88 | 137 |
|
89 | | -float numba_dppl_atomic_add_f32(volatile __generic float *p, float val, int type) { |
| 138 | +float numba_dppl_atomic_add_f32_local(volatile __generic float *p, float val) { |
90 | 139 | float found = *p; |
91 | 140 | float expected; |
92 | 141 | do { |
93 | 142 | expected = found; |
94 | | - found = numba_dppl_atomic_cmpxchg_f32(p, expected, expected + val, type); |
| 143 | + found = numba_dppl_atomic_cmpxchg_f32_local(p, expected, expected + val); |
95 | 144 | } while (found != expected); |
96 | 145 | return found; |
97 | 146 | } |
98 | 147 |
|
99 | | -int numba_dppl_atomic_add_i32(volatile __generic int *p, int val, int type) { |
100 | | - int found = *p; |
101 | | - int expected; |
| 148 | +float numba_dppl_atomic_add_f32_global(volatile __generic float *p, float val) { |
| 149 | + float found = *p; |
| 150 | + float expected; |
102 | 151 | do { |
103 | 152 | expected = found; |
104 | | - if (type == 1) { /* The address qualifier should be __local */ |
105 | | - found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected + val); |
106 | | - } else { |
107 | | - found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected + val); |
108 | | - } |
| 153 | + found = numba_dppl_atomic_cmpxchg_f32_global(p, expected, expected + val); |
109 | 154 | } while (found != expected); |
110 | 155 | return found; |
111 | 156 | } |
112 | 157 |
|
113 | | -float numba_dppl_atomic_sub_f32(volatile __generic float *p, float val, int type) { |
| 158 | +float numba_dppl_atomic_sub_f32_local(volatile __generic float *p, float val) { |
114 | 159 | float found = *p; |
115 | 160 | float expected; |
116 | 161 | do { |
117 | 162 | expected = found; |
118 | | - found = numba_dppl_atomic_cmpxchg_f32(p, expected, expected - val, type); |
| 163 | + found = numba_dppl_atomic_cmpxchg_f32_local(p, expected, expected - val); |
| 164 | + } while (found != expected); |
| 165 | + return found; |
| 166 | +} |
| 167 | + |
| 168 | +float numba_dppl_atomic_sub_f32_global(volatile __generic float *p, float val) { |
| 169 | + float found = *p; |
| 170 | + float expected; |
| 171 | + do { |
| 172 | + expected = found; |
| 173 | + found = numba_dppl_atomic_cmpxchg_f32_global(p, expected, expected - val); |
| 174 | + } while (found != expected); |
| 175 | + return found; |
| 176 | +} |
| 177 | + |
| 178 | +int numba_dppl_atomic_add_i32_local(volatile __generic int *p, int val) { |
| 179 | + int found = *p; |
| 180 | + int expected; |
| 181 | + do { |
| 182 | + expected = found; |
| 183 | + found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected + val); |
| 184 | + } while (found != expected); |
| 185 | + return found; |
| 186 | +} |
| 187 | + |
| 188 | +int numba_dppl_atomic_add_i32_global(volatile __generic int *p, int val) { |
| 189 | + int found = *p; |
| 190 | + int expected; |
| 191 | + do { |
| 192 | + expected = found; |
| 193 | + found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected + val); |
| 194 | + } while (found != expected); |
| 195 | + return found; |
| 196 | +} |
| 197 | + |
| 198 | +int numba_dppl_atomic_sub_i32_local(volatile __generic int *p, int val) { |
| 199 | + int found = *p; |
| 200 | + int expected; |
| 201 | + do { |
| 202 | + expected = found; |
| 203 | + found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected - val); |
119 | 204 | } while (found != expected); |
120 | 205 | return found; |
121 | 206 | } |
122 | 207 |
|
123 | | -int numba_dppl_atomic_sub_i32(volatile __generic int *p, int val, int type) { |
| 208 | +int numba_dppl_atomic_sub_i32_global(volatile __generic int *p, int val) { |
124 | 209 | int found = *p; |
125 | 210 | int expected; |
126 | 211 | do { |
127 | 212 | expected = found; |
128 | | - if (type == 1) { /* The address qualifier should be __local */ |
129 | | - found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected - val); |
130 | | - } else { |
131 | | - found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected - val); |
132 | | - } |
| 213 | + found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected - val); |
133 | 214 | } while (found != expected); |
134 | 215 | return found; |
135 | 216 | } |
0 commit comments