Skip to content

Commit 809e1cb

Browse files
committed
Better FP16 vectorized GEMV - 20% faster.
1 parent e07a9ae commit 809e1cb

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

kernel/riscv64/sbgemv_t_vector.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838
#define IFLOAT_V_T vfloat16m4_t
3939
#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4)
4040
#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_f16m4)
41-
#define VFMACCVV_FLOAT RISCV_RVV(vfwmacc_vv_f32m8)
41+
#define VFMACCVV_FLOAT(a,b,c,d) RISCV_RVV(vfwmul_vv_f32m8)(b,c,d)
4242
#else
4343
#define IFLOAT_V_T vbfloat16m4_t
4444
#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4)
@@ -62,7 +62,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
6262
FLOAT temp;
6363

6464
IFLOAT_V_T va, vx;
65-
FLOAT_V_T vr, vz;
65+
#if !defined(HFLOAT16)
66+
FLOAT_V_T vz;
67+
#endif
68+
FLOAT_V_T vr;
6669
BLASLONG gvl = 0;
6770
FLOAT_V_T_M1 v_res;
6871

@@ -71,7 +74,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
7174
v_res = VFMVVF_FLOAT_M1(0, 1);
7275
gvl = VSETVL(m);
7376
j = 0;
77+
#if !defined(HFLOAT16)
7478
vz = VFMVVF_FLOAT(0, gvl);
79+
#endif
7580
for (k = 0; k < m/gvl; k++) {
7681
va = VLEV_IFLOAT(&a_ptr[j], gvl);
7782
vx = VLEV_IFLOAT(&x[j], gvl);
@@ -99,7 +104,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
99104
gvl = VSETVL(m);
100105
j = 0;
101106
ix = 0;
107+
#if !defined(HFLOAT16)
102108
vz = VFMVVF_FLOAT(0, gvl);
109+
#endif
103110
for (k = 0; k < m/gvl; k++) {
104111
va = VLEV_IFLOAT(&a_ptr[j], gvl);
105112
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);

0 commit comments

Comments
 (0)