Skip to content

Commit 2770dcd

Browse files
committed
Remove branch and have different function fro different addrspaces
1 parent d75b22b commit 2770dcd

File tree

1 file changed

+127
-46
lines changed

1 file changed

+127
-46
lines changed
Lines changed: 127 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,135 +1,216 @@
11
#ifdef cl_khr_int64_base_atomics
22
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable
33

4-
long numba_dppl_atomic_add_i64(volatile __generic long *p, long val, int type) {
4+
long numba_dppl_atomic_add_i64_local(volatile __generic long *p, long val) {
55
long found = *p;
66
long expected;
77
do {
88
expected = found;
9-
if (type == 1) { /* The address qualifier should be __local */
10-
found = atom_cmpxchg((volatile __local ulong *)p, expected, expected + val);
11-
} else {
12-
found = atom_cmpxchg((volatile __global ulong *)p, expected, expected + val);
13-
}
9+
found = atom_cmpxchg((volatile __local ulong *)p, expected, expected + val);
1410
} while (found != expected);
1511
return found;
1612
}
1713

18-
long numba_dppl_atomic_sub_i64(volatile __generic long *p, long val, int type) {
14+
long numba_dppl_atomic_add_i64_global(volatile __generic long *p, long val) {
1915
long found = *p;
2016
long expected;
2117
do {
2218
expected = found;
23-
if (type == 1) { /* The address qualifier should be __local */
24-
found = atom_cmpxchg((volatile __local ulong *)p, expected, expected - val);
25-
} else {
26-
found = atom_cmpxchg((volatile __global ulong *)p, expected, expected - val);
27-
}
19+
found = atom_cmpxchg((volatile __global ulong *)p, expected, expected + val);
20+
} while (found != expected);
21+
return found;
22+
}
23+
24+
long numba_dppl_atomic_sub_i64_local(volatile __generic long *p, long val) {
25+
long found = *p;
26+
long expected;
27+
do {
28+
expected = found;
29+
found = atom_cmpxchg((volatile __local ulong *)p, expected, expected - val);
30+
} while (found != expected);
31+
return found;
32+
}
33+
34+
long numba_dppl_atomic_sub_i64_global(volatile __generic long *p, long val) {
35+
long found = *p;
36+
long expected;
37+
do {
38+
expected = found;
39+
found = atom_cmpxchg((volatile __global ulong *)p, expected, expected - val);
2840
} while (found != expected);
2941
return found;
3042
}
3143

3244
#ifdef cl_khr_fp64
3345
#pragma OPENCL EXTENSION cl_khr_fp64: enable
3446

35-
double numba_dppl_atomic_cmpxchg_f64(volatile __generic double *p, double cmp, double val, int type) {
47+
double numba_dppl_atomic_cmpxchg_f64_local(volatile __generic double *p, double cmp, double val) {
3648
union {
3749
ulong u64;
3850
double f64;
3951
} cmp_union, val_union, old_union;
4052

4153
cmp_union.f64 = cmp;
4254
val_union.f64 = val;
43-
if (type == 1) { /* The address qualifier should be __local */
44-
old_union.u64 = atom_cmpxchg((volatile __local ulong *) p, cmp_union.u64, val_union.u64);
45-
} else {
46-
old_union.u64 = atom_cmpxchg((volatile __global ulong *) p, cmp_union.u64, val_union.u64);
47-
}
55+
old_union.u64 = atom_cmpxchg((volatile __local ulong *) p, cmp_union.u64, val_union.u64);
4856
return old_union.f64;
4957
}
5058

51-
double numba_dppl_atomic_add_f64(volatile __generic double *p, double val, int type) {
59+
double numba_dppl_atomic_cmpxchg_f64_global(volatile __generic double *p, double cmp, double val) {
60+
union {
61+
ulong u64;
62+
double f64;
63+
} cmp_union, val_union, old_union;
64+
65+
cmp_union.f64 = cmp;
66+
val_union.f64 = val;
67+
old_union.u64 = atom_cmpxchg((volatile __global ulong *) p, cmp_union.u64, val_union.u64);
68+
return old_union.f64;
69+
}
70+
71+
double numba_dppl_atomic_add_f64_local(volatile __generic double *p, double val) {
72+
double found = *p;
73+
double expected;
74+
do {
75+
expected = found;
76+
found = numba_dppl_atomic_cmpxchg_f64_local(p, expected, expected + val);
77+
} while (found != expected);
78+
return found;
79+
}
80+
81+
double numba_dppl_atomic_add_f64_global(volatile __generic double *p, double val) {
5282
double found = *p;
5383
double expected;
5484
do {
5585
expected = found;
56-
found = numba_dppl_atomic_cmpxchg_f64(p, expected, expected + val, type);
86+
found = numba_dppl_atomic_cmpxchg_f64_global(p, expected, expected + val);
5787
} while (found != expected);
5888
return found;
5989
}
6090

61-
double numba_dppl_atomic_sub_f64(volatile __generic double *p, double val, int type) {
91+
92+
double numba_dppl_atomic_sub_f64_local(volatile __generic double *p, double val) {
6293
double found = *p;
6394
double expected;
6495
do {
6596
expected = found;
66-
found = numba_dppl_atomic_cmpxchg_f64(p, expected, expected - val, type);
97+
found = numba_dppl_atomic_cmpxchg_f64_local(p, expected, expected - val);
98+
} while (found != expected);
99+
return found;
100+
}
101+
102+
double numba_dppl_atomic_sub_f64_global(volatile __generic double *p, double val) {
103+
double found = *p;
104+
double expected;
105+
do {
106+
expected = found;
107+
found = numba_dppl_atomic_cmpxchg_f64_global(p, expected, expected - val);
67108
} while (found != expected);
68109
return found;
69110
}
70111
#endif
71112
#endif
72113

73-
float numba_dppl_atomic_cmpxchg_f32(volatile __generic float *p, float cmp, float val, int type) {
114+
float numba_dppl_atomic_cmpxchg_f32_local(volatile __generic float *p, float cmp, float val) {
74115
union {
75116
unsigned int u32;
76117
float f32;
77118
} cmp_union, val_union, old_union;
78119

79120
cmp_union.f32 = cmp;
80121
val_union.f32 = val;
81-
if (type == 1) { /* The address qualifier should be __local */
82-
old_union.u32 = atomic_cmpxchg((volatile __local unsigned int *) p, cmp_union.u32, val_union.u32);
83-
} else {
84-
old_union.u32 = atomic_cmpxchg((volatile __global unsigned int *) p, cmp_union.u32, val_union.u32);
85-
}
122+
old_union.u32 = atomic_cmpxchg((volatile __local unsigned int *) p, cmp_union.u32, val_union.u32);
123+
return old_union.f32;
124+
}
125+
126+
float numba_dppl_atomic_cmpxchg_f32_global(volatile __generic float *p, float cmp, float val) {
127+
union {
128+
unsigned int u32;
129+
float f32;
130+
} cmp_union, val_union, old_union;
131+
132+
cmp_union.f32 = cmp;
133+
val_union.f32 = val;
134+
old_union.u32 = atomic_cmpxchg((volatile __global unsigned int *) p, cmp_union.u32, val_union.u32);
86135
return old_union.f32;
87136
}
88137

89-
float numba_dppl_atomic_add_f32(volatile __generic float *p, float val, int type) {
138+
float numba_dppl_atomic_add_f32_local(volatile __generic float *p, float val) {
90139
float found = *p;
91140
float expected;
92141
do {
93142
expected = found;
94-
found = numba_dppl_atomic_cmpxchg_f32(p, expected, expected + val, type);
143+
found = numba_dppl_atomic_cmpxchg_f32_local(p, expected, expected + val);
95144
} while (found != expected);
96145
return found;
97146
}
98147

99-
int numba_dppl_atomic_add_i32(volatile __generic int *p, int val, int type) {
100-
int found = *p;
101-
int expected;
148+
float numba_dppl_atomic_add_f32_global(volatile __generic float *p, float val) {
149+
float found = *p;
150+
float expected;
102151
do {
103152
expected = found;
104-
if (type == 1) { /* The address qualifier should be __local */
105-
found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected + val);
106-
} else {
107-
found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected + val);
108-
}
153+
found = numba_dppl_atomic_cmpxchg_f32_global(p, expected, expected + val);
109154
} while (found != expected);
110155
return found;
111156
}
112157

113-
float numba_dppl_atomic_sub_f32(volatile __generic float *p, float val, int type) {
158+
float numba_dppl_atomic_sub_f32_local(volatile __generic float *p, float val) {
114159
float found = *p;
115160
float expected;
116161
do {
117162
expected = found;
118-
found = numba_dppl_atomic_cmpxchg_f32(p, expected, expected - val, type);
163+
found = numba_dppl_atomic_cmpxchg_f32_local(p, expected, expected - val);
164+
} while (found != expected);
165+
return found;
166+
}
167+
168+
float numba_dppl_atomic_sub_f32_global(volatile __generic float *p, float val) {
169+
float found = *p;
170+
float expected;
171+
do {
172+
expected = found;
173+
found = numba_dppl_atomic_cmpxchg_f32_global(p, expected, expected - val);
174+
} while (found != expected);
175+
return found;
176+
}
177+
178+
int numba_dppl_atomic_add_i32_local(volatile __generic int *p, int val) {
179+
int found = *p;
180+
int expected;
181+
do {
182+
expected = found;
183+
found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected + val);
184+
} while (found != expected);
185+
return found;
186+
}
187+
188+
int numba_dppl_atomic_add_i32_global(volatile __generic int *p, int val) {
189+
int found = *p;
190+
int expected;
191+
do {
192+
expected = found;
193+
found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected + val);
194+
} while (found != expected);
195+
return found;
196+
}
197+
198+
int numba_dppl_atomic_sub_i32_local(volatile __generic int *p, int val) {
199+
int found = *p;
200+
int expected;
201+
do {
202+
expected = found;
203+
found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected - val);
119204
} while (found != expected);
120205
return found;
121206
}
122207

123-
int numba_dppl_atomic_sub_i32(volatile __generic int *p, int val, int type) {
208+
int numba_dppl_atomic_sub_i32_global(volatile __generic int *p, int val) {
124209
int found = *p;
125210
int expected;
126211
do {
127212
expected = found;
128-
if (type == 1) { /* The address qualifier should be __local */
129-
found = atomic_cmpxchg((volatile __local unsigned int *)p, expected, expected - val);
130-
} else {
131-
found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected - val);
132-
}
213+
found = atomic_cmpxchg((volatile __global unsigned int *)p, expected, expected - val);
133214
} while (found != expected);
134215
return found;
135216
}

0 commit comments

Comments
 (0)