From 35d076881ed7b71947aa588f04325d25ae1c63c3 Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Thu, 11 May 2023 19:38:47 +0300 Subject: [PATCH 01/21] bench: add missing u16 benchmarks --- bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp | 5 +++++ bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp b/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp index 2c783d4..72884ca 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp @@ -12,6 +12,11 @@ using namespace vxsort::types; using benchmark::TimeUnit; using vm = vxsort::vector_machine; +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); + BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); diff --git a/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp b/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp index 29e337f..8e6ec74 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp @@ -12,6 +12,11 @@ using namespace vxsort::types; using benchmark::TimeUnit; using vm = vxsort::vector_machine; +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); + BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); From d73da6babdae42767fcac7d2f0fc65de9049811f Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Thu, 11 May 2023 19:39:18 +0300 Subject: [PATCH 02/21] README: update with relevant documentation --- README.md | 184 +++++++++--------------------------------------------- 1 file changed, 30 insertions(+), 154 deletions(-) diff --git a/README.md b/README.md index adc895f..f6a7563 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # vxsort-cpp +## Tests + [![Build and Test](https://github.com/damageboy/vxsort-cpp/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/damageboy/vxsort-cpp/actions/workflows/build-and-test.yml) ![Latest Test Status](https://gist.githubusercontent.com/damageboy/dfd9d01f2c710f96b444532b92539321/raw/vxsort-suites-badge.svg) ![Latest Test Status](https://gist.githubusercontent.com/damageboy/dfd9d01f2c710f96b444532b92539321/raw/vxsort-tests-badge.svg) @@ -7,26 +9,45 @@ ## What -This is a port of the C# [VxSort](https://github.com/damageboy/VxSort/) to high-perf C++. +vxsort is a fast, somewhat novel, hybrid, vectorized quicksort+bitonic primitive sorter implemented in C++. +The name vxsort stands for vectorized 10x sort. +It currently supports the following combination of vector ISA and primitive types: + +| | i64 | i32 | i16 | u64 | u32 | u16 | f64 | f32 | f16 | +|--------|-----|-----|---------------|-----|-----|---------------|-----|-----|-----| +| AVX2 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AVX512 | ✅ | ✅ | ✅1 | ✅ | ✅ | ✅1 | ✅ | ✅ | ❌ | +| ARM-Neon| ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| ARM-SVE2| ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| RiscV-V 1.0 | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | + +1 - Requires AVX512/VBMI2 support which is available for all Intel AVX512 CPUs post Icelake, AMD post Zen4 + + +## Benchmark Results ## Building -```bash -mkdir build-release -cd build-release +```shell +mkdir build +cd build # For better code-gen -export CC=clang -export CXX=clang++ -cmake .. -make -j 4 +export CC=clang CXX=clang++ +cmake .. -G Ninja +ninja ``` ## Testing +To run tests, use ctest, preferrably with `-J $(nproc)` to avoid waiting for a long time: + ```bash -./test/vxsort_test +ctest -J $(nproc) ``` +Tests are built into 3 executables (signed integers, unsigned integer, floating-point) per supported vector ISA. +This allows for easy to expolit parallelization both whne building and executing the tests. + ## Benchmarking 1. Plug in to a power-source if on laptop @@ -42,149 +63,4 @@ make -j 4 ./bench/run.sh ``` -## Results (Ryzen 3950X, 3.8Ghz) - -### int64 - -Compared to Introspective Sort, we can hit: -* For 1M `int64` elements, roughly 4.8x improvement with 8x unroll (`55ns` per element -> `11.5ns`) -* For 128K `int64` elements, roughly 4.5x improvement with 8x unroll (`45ns` per element -> `10.5ns`) - -#### Introspective Sort (Scalar Baseline): - -```bash ------------------------------------------------------------------------------------------ -Benchmark Time CPU Iterations Time/N ------------------------------------------------------------------------------------------ -BM_full_introsort/4096 1.18 ms 1.18 ms 586 28.8719ns -BM_full_introsort/8192 2.78 ms 2.77 ms 262 33.8639ns -BM_full_introsort/16384 5.86 ms 5.86 ms 120 35.7374ns -BM_full_introsort/32768 12.4 ms 12.4 ms 54 37.948ns -BM_full_introsort/65536 27.7 ms 27.6 ms 25 42.1669ns -BM_full_introsort/131072 59.3 ms 59.3 ms 12 45.2203ns -BM_full_introsort/262144 124 ms 124 ms 6 47.261ns -BM_full_introsort/524288 268 ms 268 ms 3 51.0427ns -BM_full_introsort/1048576 557 ms 557 ms 1 53.0751ns -``` - -#### VxSort No Unroll, Bitonic Sort 64-elements - -```bash ------------------------------------------------------------------------------------------ -Benchmark Time CPU Iterations Time/N ------------------------------------------------------------------------------------------ -BM_vxsort/4096 0.486 ms 0.485 ms 1457 11.8505ns -BM_vxsort/8192 1.29 ms 1.29 ms 561 15.7416ns -BM_vxsort/16384 2.76 ms 2.75 ms 253 16.8082ns -BM_vxsort/32768 6.03 ms 6.03 ms 116 18.3878ns -BM_vxsort/65536 13.1 ms 13.1 ms 53 19.927ns -BM_vxsort/131072 27.7 ms 27.7 ms 25 21.1131ns -BM_vxsort/262144 60.1 ms 60.1 ms 12 22.9112ns -BM_vxsort/524288 127 ms 126 ms 5 24.1048ns -BM_vxsort/1048576 269 ms 269 ms 3 25.6178ns -``` - -#### VxSort Unroll x 4, Bitonic Sort 64-elements - -```bash ------------------------------------------------------------------------------------------ -Benchmark Time CPU Iterations Time/N ------------------------------------------------------------------------------------------ -BM_vxsort/4096 0.279 ms 0.279 ms 2462 6.79957ns -BM_vxsort/8192 0.673 ms 0.672 ms 1000 8.20411ns -BM_vxsort/16384 1.52 ms 1.52 ms 455 9.25887ns -BM_vxsort/32768 3.37 ms 3.36 ms 210 10.2602ns -BM_vxsort/65536 7.20 ms 7.20 ms 96 10.982ns -BM_vxsort/131072 15.1 ms 15.1 ms 46 11.4838ns -BM_vxsort/262144 32.5 ms 32.5 ms 21 12.3887ns -BM_vxsort/524288 67.4 ms 67.3 ms 10 12.8354ns -BM_vxsort/1048576 144 ms 144 ms 5 13.689ns -``` -#### VxSort Unroll x 8, Bitonic Sort 64-elements - -```bash ------------------------------------------------------------------------------------------ -Benchmark Time CPU Iterations Time/N ------------------------------------------------------------------------------------------ -BM_vxsort/4096 0.271 ms 0.271 ms 2601 6.61364ns -BM_vxsort/8192 0.603 ms 0.603 ms 1190 7.35612ns -BM_vxsort/16384 1.35 ms 1.35 ms 517 8.23185ns -BM_vxsort/32768 2.96 ms 2.96 ms 232 9.02835ns -BM_vxsort/65536 6.32 ms 6.31 ms 111 9.634ns -BM_vxsort/131072 13.3 ms 13.3 ms 52 10.1321ns -BM_vxsort/262144 27.8 ms 27.8 ms 25 10.61ns -BM_vxsort/524288 59.7 ms 59.6 ms 12 11.3669ns -BM_vxsort/1048576 121 ms 121 ms 6 11.5373ns -``` - -#### VxSort Unroll x 12, Bitonic Sort 64-elements - -```bash ------------------------------------------------------------------------------------------ -Benchmark Time CPU Iterations Time/N ------------------------------------------------------------------------------------------ -BM_vxsort/4096 0.275 ms 0.275 ms 2504 6.70556ns -BM_vxsort/8192 0.593 ms 0.593 ms 1140 7.23399ns -BM_vxsort/16384 1.38 ms 1.37 ms 496 8.38849ns -BM_vxsort/32768 2.95 ms 2.95 ms 235 8.9886ns -BM_vxsort/65536 6.39 ms 6.38 ms 111 9.73922ns -BM_vxsort/131072 13.2 ms 13.2 ms 53 10.0404ns -BM_vxsort/262144 28.4 ms 28.4 ms 25 10.833ns -BM_vxsort/524288 58.9 ms 58.8 ms 12 11.2206ns -BM_vxsort/1048576 125 ms 124 ms 6 11.8665ns -``` - -### int32 - -#### VxSort No Unroll, Bitonic Sort 128-elements - -``` -```bash ----------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Time/N ----------------------------------------------------------------------------------------- -BM_vxsort/4096 0.169 ms 0.169 ms 4261 4.11785ns -BM_vxsort/8192 0.459 ms 0.459 ms 1516 5.60347ns -BM_vxsort/16384 1.19 ms 1.19 ms 596 7.27952ns -BM_vxsort/32768 2.61 ms 2.61 ms 269 7.96259ns -BM_vxsort/65536 5.70 ms 5.69 ms 120 8.68616ns -BM_vxsort/131072 12.6 ms 12.6 ms 57 9.62647ns -BM_vxsort/262144 26.4 ms 26.4 ms 26 10.0556ns -BM_vxsort/524288 56.9 ms 56.8 ms 12 10.8417ns -BM_vxsort/1048576 120 ms 120 ms 6 11.407ns -``` - -#### VxSort Unroll x 4, Bitonic Sort 128-elements - -```bash ----------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Time/N ----------------------------------------------------------------------------------------- -BM_vxsort/4096 0.135 ms 0.135 ms 4836 3.29119ns -BM_vxsort/8192 0.291 ms 0.291 ms 2372 3.55463ns -BM_vxsort/16384 0.657 ms 0.656 ms 1061 4.00521ns -BM_vxsort/32768 1.57 ms 1.57 ms 462 4.79389ns -BM_vxsort/65536 3.35 ms 3.34 ms 205 5.10211ns -BM_vxsort/131072 7.20 ms 7.19 ms 98 5.48511ns -BM_vxsort/262144 15.4 ms 15.4 ms 45 5.87159ns -BM_vxsort/524288 34.0 ms 34.0 ms 22 6.48067ns -BM_vxsort/1048576 68.6 ms 68.5 ms 10 6.53424ns -``` - -#### VxSort Unroll x 8, Bitonic Sort 128-elements - -```bash ----------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Time/N ----------------------------------------------------------------------------------------- -BM_vxsort/4096 0.132 ms 0.132 ms 5341 3.21375ns -BM_vxsort/8192 0.292 ms 0.292 ms 2495 3.56416ns -BM_vxsort/16384 0.631 ms 0.631 ms 1145 3.84954ns -BM_vxsort/32768 1.43 ms 1.43 ms 524 4.35009ns -BM_vxsort/65536 3.21 ms 3.21 ms 232 4.89271ns -BM_vxsort/131072 6.41 ms 6.40 ms 108 4.88355ns -BM_vxsort/262144 13.8 ms 13.8 ms 51 5.26214ns -BM_vxsort/524288 29.1 ms 29.0 ms 24 5.53438ns -BM_vxsort/1048576 59.8 ms 59.7 ms 11 5.69466ns -``` From 2b57bf3756d431f8ba1c04c935459dd8d09748d9 Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Sun, 14 May 2023 20:24:37 +0300 Subject: [PATCH 03/21] Update make-figure to generate better charts --- bench/make-figure.py | 168 +++++++++++++++++++++++++++++++++-------- bench/requirements.txt | 5 ++ 2 files changed, 140 insertions(+), 33 deletions(-) diff --git a/bench/make-figure.py b/bench/make-figure.py index 46e9e12..4ab82c2 100755 --- a/bench/make-figure.py +++ b/bench/make-figure.py @@ -6,19 +6,41 @@ import pandas as pd import plotly.express as px import argparse +import math def make_vxsort_types_frame(df_orig): df = df_orig[df_orig['name'].str.startswith('BM_vxsort<')] df = pd.concat( - [df, df['name'].str.extract(r'BM_vxsort<(?P[^,]+), vm::(?P[^,]+), (?P\d+)>.*/(?P\d+)/')], + [df, df['name'].str.extract( + r'BM_vxsort<(?P[^,]+), vm::(?P[^,]+), (?P\d+)>.*/(?P\d+)/')], axis="columns") - df = pd.concat([df, df['type'].str.extract(r'(?P.)(?P\d+)')], axis="columns") + df = pd.concat([df, df['type'].str.extract( + r'(?P.)(?P\d+)')], axis="columns") df = df.astype({"width": int}, errors='raise') df = df.astype({"unroll": int}, errors='raise') df = df.astype({"len": int}, errors='raise') + df['len_bytes'] = df['len'] * df['width'] / 8 + + return df + + +def make_bitonic_types_frame(df_orig): + df = df_orig[df_orig['name'].str.startswith('BM_bitonic_sort<')] + + df = pd.concat( + [df, df['name'].str.extract( + r'BM_bitonic_sort<(?P[^,]+), vm::(?P[^,]+)>.*/(?P\d+)/')], + axis="columns") + df = pd.concat([df, df['type'].str.extract( + r'(?P.)(?P\d+)')], axis="columns") + df = df.astype({"width": int}, errors='raise') + df = df.astype({"len": int}, errors='raise') + + df['len_bytes'] = df['len'] * df['width'] / 8 + return df @@ -30,18 +52,62 @@ def make_title(title: str): } -def plot_vxsort_types_frame(df): - fig = px.line(df, x='len', y='rdtsc-cycles/N', color='type', symbol='vm', +def add_cache_vline(fig, cache, name, color, len_min, len_max): + if cache < len_min or cache > len_max: + return + + fig.add_vline(cache, line_width=2, + line_dash="dash", + line_color=color) + + fig.add_annotation(x=(math.log(cache)) / math.log(10), y=2, + showarrow=False, + xshift=-15, + font=dict( + family="sans serif", + size=14, + color=color), + text=name, + textangle=-30, ) + + +def make_log2_ticks(min, max): + ticks = [] + tick_labels = [] + while min <= max: + ticks.append(min) + tick_labels.append(humanize.naturalsize(int(min), gnu=True, + binary=True).replace('B', '')) + min *= 2 + return ticks, tick_labels + + +def plot_sort_types_frame(df, title, args, caches): + fig = px.line(df, + x='len_bytes', + y='rdtsc-cycles/N', + color='type', + symbol='vm', width=1000, height=600, log_x=True, labels={ - "len_title": "Problem size", "len": "Problem size", + "len_bytes": "Problem size (bytes)", "rdtsc-cycles/N": "cycles per element", }, - template='plotly_dark') + template=args.template) + + len_min, len_max = df['len_bytes'].min(), df['len_bytes'].max() + add_cache_vline(fig, caches[0], "L1", "green", len_min, len_max) + add_cache_vline(fig, caches[1], "L2", "gold", len_min, len_max) + add_cache_vline(fig, caches[2], "L3", "red", len_min, len_max) + + tick_values, tick_labels = make_log2_ticks( + df['len_bytes'].min(), df['len_bytes'].max()) - fig.update_layout(title=make_title("vxsort full-sorting"), + fig.update_xaxes(tickvals=tick_values, ticktext=tick_labels) + + fig.update_layout(title=make_title(title), yaxis_tickangle=-30) return fig @@ -52,25 +118,29 @@ def make_vxsort_vs_all_frame(df_orig): df = pd.concat([df_orig, df_orig['name'].str.extract( r'BM_(?Pvxsort|pdqsort_branchless|stdsort)<(?P[^,]+).*>/(?P\d+)/')], axis="columns") - df = pd.concat([df, df['name'].str.extract(r'BM_vxsort<.*vm::(?P[^,]+), (?P\d+)>/')], axis="columns") - df = pd.concat([df, df['type'].str.extract(r'(?P.)(?P\d+)')], axis="columns") + df = pd.concat([df, df['name'].str.extract( + r'BM_vxsort<.*vm::(?P[^,]+), (?P\d+)>/')], axis="columns") + df = pd.concat([df, df['type'].str.extract( + r'(?P.)(?P\d+)')], axis="columns") df.fillna(0, inplace=True) df = df.astype({"width": int}, errors='raise') df = df.astype({"unroll": int}, errors='raise') df = df.astype({"len": int}, errors='raise') - df['sorter_title'] = df.apply(lambda x: f"{x['sorter']}{'/' + x['vm'] if x['vm'] != 0 else ''}", axis=1) + df['sorter_title'] = df.apply( + lambda x: f"{x['sorter']}{'/' + x['vm'] if x['vm'] != 0 else ''}", axis=1) df.dropna(axis=0, subset=['sorter'], inplace=True) return df -def plot_vxsort_vs_all_frame(df, speedup_baseline): - - df['len_title'] = df.apply(lambda x: f"{humanize.naturalsize(x['len'], gnu=True, binary=True).replace('B', '')}", axis=1) +def plot_vxsort_vs_all_frame(df, args): + df['len_title'] = df.apply( + lambda x: f"{humanize.naturalsize(x['len'], gnu=True, binary=True).replace('B', '')}", axis=1) - cardinality = df[['len_title', 'type', 'sorter_title']].nunique(dropna=True) + cardinality = df[['len_title', 'type', + 'sorter_title']].nunique(dropna=True) if cardinality['sorter_title'] == 1: raise ValueError("Only one sorter in the frame") @@ -82,35 +152,42 @@ def plot_vxsort_vs_all_frame(df, speedup_baseline): title_suffix = f"({df['len_title'].unique()[0]} elements)" y_column = 'type' else: - raise ValueError(f"Can't figure out the comparison axis for the plot: {cardinality}") - - if speedup_baseline: - baseline_df = df[df['sorter_title'] == speedup_baseline] - df['speedup'] = df.groupby(y_column)['rdtsc-cycles/N'].\ - transform(lambda x: baseline_df[baseline_df[y_column] == x.name]['rdtsc-cycles/N'].values[0] / x) + raise ValueError( + f"Can't figure out the comparison axis for the plot: {cardinality}") + + if args.speedup: + baseline_df = df[df['sorter_title'] == args.speedup] + df['speedup'] = df.groupby(y_column)['rdtsc-cycles/N']. \ + transform(lambda x: baseline_df[baseline_df[y_column] + == x.name]['rdtsc-cycles/N'].values[0] / x) x_column = 'speedup' else: x_column = 'rdtsc-cycles/N' + df.sort_values([x_column], ascending=[False], inplace=True) + fig = px.bar(df, barmode='group', orientation='h', color='sorter_title', - y=y_column, x=x_column, + y=y_column, width=1000, height=600, labels={ "len_title": "Problem size", "len": "Problem size", - "rdtsc-cycles/N": "cycles per element", - "speedup": f"speedup over {speedup_baseline}", + "sorter_title": "Sorter", + "rdtsc-cycles/N": "Cycles/element", + "speedup": f"speedup over {args.speedup}", }, - template='plotly_dark') + template=args.template) fig.update_layout(title=make_title(f"vxsort vs. others {title_suffix}"), bargap=0.3, bargroupgap=0.2, yaxis_tickangle=-30, ) + if format == 'html': + fig.update_layout(margin=dict(t=100, b=0, l=0, r=0)) return fig @@ -122,35 +199,54 @@ def parse_args(): parser.add_argument('filename') parser.add_argument('--mode', - choices=('vxsort-types', 'vxsort-vs-all'), + choices=('vxsort-types', 'vxsort-vs-all', 'bitonic-types'), const='vxsort-types', default='vxsort-types', nargs='?', help='which figure to generate (default: %(const)s)') - parser.add_argument('--format', choices=['svg', 'png', 'html'], default='svg') + parser.add_argument( + '--format', choices=['svg', 'png', 'html'], default='svg') parser.add_argument('--query', action='append', help='pandas query to filter the data-frame with before plotting') - parser.add_argument('--speedup', help='plot speedup vs. supplied baseline sorter') + parser.add_argument( + '--speedup', help='plot speedup vs. supplied baseline sorter') parser.add_argument('--debug-df', action='store_true', help='just show the last data-frame before generating a figure and quit') parser.add_argument('-o', '--output', default=sys.stdout.buffer) + parser.add_argument('--template', default='plotly_dark') args = parser.parse_args() return args +def parse_cache_tidbit(cache_type, text): + m = re.search(cache_type + ' (\d+) (KiB|MiB)', text) + if m: + cachesize = int(m.group(1)) + unit = m.group(2) + cachesize *= 1024 if unit == 'KiB' else 1024 * 1024 + return cachesize + return None + + def parse_csv_into_dataframe(filename): with open(filename) as f: m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) for match in re.finditer(b'name,iterations,real_time,cpu_time,time_unit', m): + header = f.read(match.start()) f.seek(match.start()) break + + l1d_size = parse_cache_tidbit('L1 Data', header) + l2_size = parse_cache_tidbit('L2 Unified', header) + l3_size = parse_cache_tidbit('L3 Unified', header) + df = pd.read_csv(f) # drop some commonly useless columns df.drop(['iterations', 'real_time', 'cpu_time', 'time_unit', 'label', 'items_per_second', 'error_occurred', 'error_message'], axis=1, inplace=True) - return df + return ((l1d_size, l2_size, l3_size), df) def apply_queries(df, queries): @@ -166,21 +262,27 @@ def apply_queries(df, queries): def make_figures(): args = parse_args() - df = parse_csv_into_dataframe(args.filename) + caches, df = parse_csv_into_dataframe(args.filename) if args.mode == 'vxsort-types': if args.speedup: - raise argparse.ArgumentError("Speedup mode is not supported for vxsort-types mode") + raise argparse.ArgumentError( + "Speedup mode is not supported for vxsort-types mode") plot_df = make_vxsort_types_frame(df) plot_df = apply_queries(plot_df, args.query) - fig = plot_vxsort_types_frame(plot_df) + fig = plot_sort_types_frame(plot_df, "vxsort full-sorting", args, caches) elif args.mode == 'vxsort-vs-all': plot_df = make_vxsort_vs_all_frame(df) if not args.query or len(args.query) == 0: - args.query = ["len <= 1048576 & width == 32 & typecat == 'i' & (sorter != 'vxsort' | unroll == 8)"] + args.query = [ + "len <= 1048576 & width == 32 & typecat == 'i' & (sorter != 'vxsort' | unroll == 8)"] plot_df = apply_queries(plot_df, args.query) - fig = plot_vxsort_vs_all_frame(plot_df, args.speedup) + fig = plot_vxsort_vs_all_frame(plot_df, args) + elif args.mode == 'bitonic-types': + plot_df = make_bitonic_types_frame(df) + plot_df = apply_queries(plot_df, args.query) + fig = plot_sort_types_frame(plot_df, "vxsort bitonic-sorting", args, caches) if args.debug_df: print(plot_df) diff --git a/bench/requirements.txt b/bench/requirements.txt index a898531..6d8f090 100644 --- a/bench/requirements.txt +++ b/bench/requirements.txt @@ -3,3 +3,8 @@ plotly pandas humanize ipython +humanize==4.4.0 +ipython==8.6.0 +kaleido==0.2.1 +pandas==1.5.1 +plotly==5.11.0 From 2f8c5a7373be265657ded9d059b20b5ebc03b9ac Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Sun, 14 May 2023 20:28:57 +0300 Subject: [PATCH 04/21] Formatting --- vxsort/vxsort.h | 95 ++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 53 deletions(-) diff --git a/vxsort/vxsort.h b/vxsort/vxsort.h index 6988a66..f2f6f99 100644 --- a/vxsort/vxsort.h +++ b/vxsort/vxsort.h @@ -3,14 +3,14 @@ #include +#include #include "alignment.h" #include "defs.h" #include "isa_detection.h" -#include "vector_machine/machine_traits.h" -#include "partition_machine.h" #include "pack_machine.h" +#include "partition_machine.h" #include "smallsort/bitonic_sort.h" -#include +#include "vector_machine/machine_traits.h" #ifdef VXSORT_STATS #include "stats/vxsort_stats.h" @@ -28,7 +28,7 @@ using namespace vxsort::types; * @tparam Shift Optional; specify how many LSB bits are known to be zero in the original input. Can be used * to further speed up sorting. */ -template +template class vxsort { static_assert(Unroll >= 1, "Unroll can be in the range [1..12]"); static_assert(Unroll <= 12, "Unroll can be in the range [1..12]"); @@ -61,13 +61,11 @@ class vxsort { // In other words, while we allocated this much temp memory, the actual amount of elements inside said memory // is smaller by 8 elements + 1 for each alignment (max alignment is actually N-1, I just round up to N...) // This long sense just means that we over-allocate N+2 elements... - static const i32 PARTITION_SPILL_SIZE_IN_ELEMENTS = - (2 * SLACK_PER_SIDE_IN_ELEMENTS + N + 4*N); + static const i32 PARTITION_SPILL_SIZE_IN_ELEMENTS = (2 * SLACK_PER_SIDE_IN_ELEMENTS + N + 4 * N); static_assert(PARTITION_SPILL_SIZE_IN_ELEMENTS < SMALL_SORT_THRESHOLD_ELEMENTS, "Unroll-level must match small-sorting threshold"); static const i32 PackUnroll = (Unroll / 2 > 0) ? Unroll / 2 : 1; - void reset(T* start, T* end) { _depth = 0; _start = start; @@ -125,9 +123,7 @@ class vxsort { *(lo + i - 1) = d; } - void sort(T* left, T* right, - T left_hint, T right_hint, - AH alignment, i32 depth_limit) { + void sort(T* left, T* right, T left_hint, T right_hint, AH alignment, i32 depth_limit) { auto length = static_cast(right - left + 1); T* mid; @@ -153,7 +149,7 @@ class vxsort { vxsort_stats::record_small_sort_size(length); #endif - auto* const aligned_left = reinterpret_cast(reinterpret_cast(left) & ~(N - 1)); + auto* const aligned_left = reinterpret_cast(reinterpret_cast(left) & ~(N - 1)); if (aligned_left < _start) { smallsort::bitonic::sort(left, length); return; @@ -349,14 +345,14 @@ class vxsort { // Broadcast the selected pivot const auto P = VMT::broadcast(pivot); - auto * RESTRICT spill_read_left = _spill; - auto * RESTRICT spill_write_left = spill_read_left; - auto * RESTRICT spill_read_right = _spill + PARTITION_SPILL_SIZE_IN_ELEMENTS; - auto * RESTRICT spill_write_right = spill_read_right; + auto* RESTRICT spill_read_left = _spill; + auto* RESTRICT spill_write_left = spill_read_left; + auto* RESTRICT spill_read_right = _spill + PARTITION_SPILL_SIZE_IN_ELEMENTS; + auto* RESTRICT spill_write_right = spill_read_right; // mutable pointer copies of the originals - auto * RESTRICT read_left = left; - auto * RESTRICT read_right = right; + auto* RESTRICT read_left = left; + auto* RESTRICT read_right = right; // the read heads always advance by N elements towards te middle, // It would be wise to spend some extra effort here to align the read @@ -365,17 +361,12 @@ class vxsort { // is close, for example, assuming 64-byte cache-line: // * unaligned 256-bit loads create split-line loads 50% of the time // * unaligned 512-bit loads create a split-line loads 100% of the time - PMT::align_vectorized(alignment.left_masked_amount, - alignment.right_unmasked_amount, - P, - read_left, read_right, - spill_read_left, spill_write_left, + PMT::align_vectorized(alignment.left_masked_amount, alignment.right_unmasked_amount, P, read_left, read_right, spill_read_left, spill_write_left, spill_read_right, spill_write_right); - assert((right - left) == - ((read_right + N) - read_left) + // Unpartitioned elements (+N for right-side vec reads) - (spill_write_left - spill_read_left) + // partitioned to left-spill - (spill_read_right - (spill_write_right + N))); // partitioned to right-spill (+N for right-side vec reads) + assert((right - left) == ((read_right + N) - read_left) + // Unpartitioned elements (+N for right-side vec reads) + (spill_write_left - spill_read_left) + // partitioned to left-spill + (spill_read_right - (spill_write_right + N))); // partitioned to right-spill (+N for right-side vec reads) assert(((usize)read_left & ALIGN_MASK) == 0); assert(((usize)read_right & ALIGN_MASK) == 0); @@ -384,8 +375,8 @@ class vxsort { // From now on, we are fully aligned // and all reading is done in full vector units - auto * RESTRICT read_left_v = reinterpret_cast(read_left); - auto * RESTRICT read_right_v = reinterpret_cast(read_right); + auto* RESTRICT read_left_v = reinterpret_cast(read_left); + auto* RESTRICT read_right_v = reinterpret_cast(read_right); #ifndef NDEBUG read_left = nullptr; @@ -405,14 +396,14 @@ class vxsort { // Adjust for the reading that was made above read_left_v += InnerUnroll; read_right_v += 1; - read_right_v -= InnerUnroll*2; + read_right_v -= InnerUnroll * 2; TV* nextPtr; - auto * RESTRICT write_left = left; - auto * RESTRICT write_right = right - N; + auto* RESTRICT write_left = left; + auto* RESTRICT write_right = right - N; while (read_left_v < read_right_v) { - if (write_right - ((T *)read_right_v) < (2 * (InnerUnroll * N) - N)) { + if (write_right - ((T*)read_right_v) < (2 * (InnerUnroll * N) - N)) { nextPtr = read_right_v; read_right_v -= InnerUnroll; } else { @@ -458,7 +449,7 @@ class vxsort { read_right_v += (InnerUnroll - 1); while (read_left_v <= read_right_v) { - if (write_right - (T *)read_right_v < N) { + if (write_right - (T*)read_right_v < N) { nextPtr = read_right_v; read_right_v -= 1; } else { @@ -483,7 +474,7 @@ class vxsort { *write_left++ = pivot; assert(write_left > left); - assert(write_left <= right+1); + assert(write_left <= right + 1); return write_left; } @@ -501,17 +492,15 @@ class vxsort { /// the nearest vector-alignment left+right of the partition /// is situated. /// \return The amount of elements partitioned to the left side - size_t vectorized_packed_partition(T* const left, T* const right, - T min_bounding, const AH alignment) { + size_t vectorized_packed_partition(T* const left, T* const right, T min_bounding, const AH alignment) { assert(right - left >= SMALL_SORT_THRESHOLD_ELEMENTS); assert((reinterpret_cast(left) & ELEMENT_ALIGN) == 0); assert((reinterpret_cast(right) & ELEMENT_ALIGN) == 0); #ifndef NDEBUG - memset((void *)_spill, 0, PARTITION_SPILL_SIZE_IN_ELEMENTS * sizeof(T)); + memset((void*)_spill, 0, PARTITION_SPILL_SIZE_IN_ELEMENTS * sizeof(T)); #endif - #ifdef VXSORT_STATS vxsort_stats::bump_partitions((right - left) + 1); #endif @@ -527,13 +516,13 @@ class vxsort { const TV offset_v = VMT::broadcast(offset); //const TV offset_v = PKM::prepare_offset(min_bounding); - auto * RESTRICT read_left = left; - auto * RESTRICT read_right = right; + auto* RESTRICT read_left = left; + auto* RESTRICT read_right = right; - auto * RESTRICT spill_read_left = _spill; - auto * RESTRICT spill_write_left = spill_read_left; - auto * RESTRICT spill_read_right = _spill + PARTITION_SPILL_SIZE_IN_ELEMENTS; - auto * RESTRICT spill_write_right = spill_read_right; + auto* RESTRICT spill_read_left = _spill; + auto* RESTRICT spill_write_left = spill_read_left; + auto* RESTRICT spill_read_right = _spill + PARTITION_SPILL_SIZE_IN_ELEMENTS; + auto* RESTRICT spill_write_right = spill_read_right; // the read heads always advance by N elements towards te middle, // It would be wise to spend some extra effort here to align the read @@ -557,15 +546,15 @@ class vxsort { // From now on, we are fully aligned // and all reading is done in full vector units - auto * RESTRICT read_left_v = reinterpret_cast(read_left); - auto * RESTRICT read_right_v = reinterpret_cast(read_right); + auto* RESTRICT read_left_v = reinterpret_cast(read_left); + auto* RESTRICT read_right_v = reinterpret_cast(read_right); #ifndef NDEBUG read_left = nullptr; read_right = nullptr; #endif auto* RESTRICT write_left = reinterpret_cast(left); - auto* RESTRICT write_right = reinterpret_cast(right+1) - 2*N; + auto* RESTRICT write_right = reinterpret_cast(right + 1) - 2 * N; // We will be packing before partitioning, so // We must generate a pre-packed pivot @@ -586,7 +575,7 @@ class vxsort { auto dl = VMT::load_vec(read_left_v + i); auto dr = VMT::load_vec(read_right_v - i); - auto packed_data = PKM::pack_vectors(dl, dr, offset_v); + auto packed_data = PKM::pack_vectors(dl, dr, offset_v); vxsort::PMT::partition_block(packed_data, PPP, write_left, write_right); } @@ -594,20 +583,20 @@ class vxsort { // We might have one more vector worth of stuff to partition, so we'll do it with // scalar partitioning into the tmp space if (len_v > 0) { - auto slack = VMT::load_vec((TV *) (read_left_v + len_dv)); + auto slack = VMT::load_vec((TV*)(read_left_v + len_dv)); PMT::partition_block(slack, P, spill_write_left, spill_write_right); } // Fix-up spill_write_right after the last vector operation // potentially *writing* through it is done spill_write_right += N; - write_right += 2*N; + write_right += 2 * N; - for (auto *p = spill_read_left; p < spill_write_left; p++) { + for (auto* p = spill_read_left; p < spill_write_left; p++) { *(write_left++) = static_cast(VMT::template shift_n_sub(*p, offset)); } - for (auto *p = spill_write_right; p < spill_read_right; p++) { + for (auto* p = spill_write_right; p < spill_read_right; p++) { *(--write_right) = static_cast(VMT::template shift_n_sub(*p, offset)); } @@ -797,7 +786,7 @@ class vxsort { T offset = VMT::template shift_n_sub(base, MIN); auto mem_read = mem_end - len; - auto mem_write = reinterpret_cast(mem_end) - len; + auto mem_write = reinterpret_cast(mem_end) - len; // Include a "special" pass to handle very short lengths if (len < 2 * N) { From fe26d6ee2d4e952fedc41fe058e301dfb55df2b5 Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Sun, 14 May 2023 20:29:36 +0300 Subject: [PATCH 05/21] vxsort: make small-sort cutoff point in bytes, translated to # of elements per-type --- vxsort/vxsort.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vxsort/vxsort.h b/vxsort/vxsort.h index f2f6f99..4f5e4e9 100644 --- a/vxsort/vxsort.h +++ b/vxsort/vxsort.h @@ -47,7 +47,8 @@ class vxsort { static const i32 N = sizeof(TV) / sizeof(T); static_assert(is_powerof2(N), "vector-size / element-size must be a power of 2"); - static const i32 SMALL_SORT_THRESHOLD_ELEMENTS = 1024; + static const i32 SMALL_SORT_THRESHOLD_BYTES = 4096; + static const i32 SMALL_SORT_THRESHOLD_ELEMENTS = SMALL_SORT_THRESHOLD_BYTES / sizeof(T); static const i32 SMALL_SORT_THRESHOLD_VECTORS = SMALL_SORT_THRESHOLD_ELEMENTS / N; static const i32 SLACK_PER_SIDE_IN_VECTORS = Unroll; static const size_t ALIGN = AH::ALIGN; From 8c31e81820ea5c02f34a908314aa83fc26c668bf Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Thu, 25 May 2023 20:25:08 +0300 Subject: [PATCH 06/21] bench: move array creation into generate_unique_values --- bench/fullsort/BM_fullsort.pdqsort.cpp | 3 +-- bench/fullsort/BM_fullsort.stdsort.cpp | 3 +-- bench/fullsort/BM_fullsort.vxsort.h | 6 ++---- bench/smallsort/BM_blacher.avx2.cpp | 3 +-- bench/smallsort/BM_smallsort.h | 6 ++---- bench/util.h | 27 +++++++++++++------------- 6 files changed, 21 insertions(+), 27 deletions(-) diff --git a/bench/fullsort/BM_fullsort.pdqsort.cpp b/bench/fullsort/BM_fullsort.pdqsort.cpp index c07da5a..dc55cba 100644 --- a/bench/fullsort/BM_fullsort.pdqsort.cpp +++ b/bench/fullsort/BM_fullsort.pdqsort.cpp @@ -13,10 +13,9 @@ using namespace vxsort::types; template static void BM_pdqsort_branchless(benchmark::State& state) { auto n = state.range(0); - auto v = std::vector((i32)n); const auto ITERATIONS = 10; - generate_unique_values_vec(v, (Q)0x1000, (Q)8); + auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); diff --git a/bench/fullsort/BM_fullsort.stdsort.cpp b/bench/fullsort/BM_fullsort.stdsort.cpp index 7e877dc..3594118 100644 --- a/bench/fullsort/BM_fullsort.stdsort.cpp +++ b/bench/fullsort/BM_fullsort.stdsort.cpp @@ -13,10 +13,9 @@ using namespace vxsort::types; template static void BM_stdsort(benchmark::State& state) { auto n = state.range(0); - auto v = std::vector((i32)n); const auto ITERATIONS = 10; - generate_unique_values_vec(v, (Q)0x1000, (Q)8); + auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); diff --git a/bench/fullsort/BM_fullsort.vxsort.h b/bench/fullsort/BM_fullsort.vxsort.h index 1d31c25..91ca638 100644 --- a/bench/fullsort/BM_fullsort.vxsort.h +++ b/bench/fullsort/BM_fullsort.vxsort.h @@ -21,10 +21,9 @@ static void BM_vxsort(benchmark::State& state) { VXSORT_BENCH_ISA(); auto n = state.range(0); - auto v = std::vector((i32)n); const auto ITERATIONS = 10; - generate_unique_values_vec(v, (Q)0x1000, (Q)0x8); + auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); @@ -62,13 +61,12 @@ static void BM_vxsort_strided(benchmark::State& state) { auto n = StridedSortSize; auto stride = state.range(0); - auto v = std::vector(n); const auto ITERATIONS = 10; const auto min_value = StridedSortMinValue; const auto max_value = min_value + StridedSortSize * stride; - generate_unique_values_vec(v, (Q) 0x80000000, (Q) stride); + auto v = generate_unique_values_vec(n, (Q) 0x80000000, (Q) stride); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); diff --git a/bench/smallsort/BM_blacher.avx2.cpp b/bench/smallsort/BM_blacher.avx2.cpp index cd88e43..31e6297 100644 --- a/bench/smallsort/BM_blacher.avx2.cpp +++ b/bench/smallsort/BM_blacher.avx2.cpp @@ -93,8 +93,7 @@ void BM_blacher(benchmark::State& state) static const i32 ITERATIONS = 1024; auto n = 16; - auto v = std::vector(n); - generate_unique_values_vec(v, (i32)0x1000, (i32)0x8); + auto v = generate_unique_values_vec(n, (i32)0x1000, (i32)0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); diff --git a/bench/smallsort/BM_smallsort.h b/bench/smallsort/BM_smallsort.h index 6fadcc9..d8d5748 100644 --- a/bench/smallsort/BM_smallsort.h +++ b/bench/smallsort/BM_smallsort.h @@ -23,8 +23,7 @@ static void BM_bitonic_sort(benchmark::State& state) { static const i32 ITERATIONS = 1024; auto n = state.range(0); - auto v = std::vector(n); - generate_unique_values_vec(v, (Q)0x1000, (Q)0x8); + auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); @@ -59,8 +58,7 @@ static void BM_bitonic_machine(benchmark::State& state) { static const i32 ITERATIONS = 1024; auto n = N * BM::N; - auto v = std::vector(n); - generate_unique_values_vec(v, (Q)0x1000, (Q)0x8); + auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); diff --git a/bench/util.h b/bench/util.h index def6832..27fbdb4 100644 --- a/bench/util.h +++ b/bench/util.h @@ -29,13 +29,14 @@ void process_perf_counters(UserCounters &counters, i64 num_elements); extern std::random_device::result_type global_bench_random_seed; template -void generate_unique_values_vec(std::vector& vec, T start, T stride) { - for (usize i = 0; i < vec.size(); i++, start += stride) - vec[i] = start; +std::vector generate_unique_values_vec(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 0; i < v.size(); i++, start += stride) + v[i] = start; std::mt19937_64 g(global_bench_random_seed); - - std::shuffle(vec.begin(), vec.end(), g); + std::shuffle(v.begin(), v.end(), g); + return v; } template @@ -67,7 +68,7 @@ std::vector> generate_copies(usize num_copies, i64 n, std::vector template std::vector shuffled_seq(usize size, T start, T stride, std::mt19937_64& rng) { - std::vector v; v.reserve(size); + std::vector v(size); for (usize i = 0; i < size; ++i) v.push_back(start + stride * i); std::shuffle(v.begin(), v.end(), rng); @@ -76,7 +77,7 @@ std::vector shuffled_seq(usize size, T start, T stride, std::mt19937_64& rng) template std::vector shuffled_16_values(usize size, T start, T stride, std::mt19937_64& rng) { - std::vector v; v.reserve(size); + std::vector v(size); for (usize i = 0; i < size; ++i) v.push_back(start + stride * (i % 16)); std::shuffle(v.begin(), v.end(), rng); @@ -85,7 +86,7 @@ std::vector shuffled_16_values(usize size, T start, T stride, std::mt19937_ template std::vector all_equal(isize size, T start) { - std::vector v; v.reserve(size); + std::vector v(size); for (i32 i = 0; i < size; ++i) v.push_back(start); return v; @@ -93,7 +94,7 @@ std::vector all_equal(isize size, T start) { template std::vector ascending_int(isize size, T start, T stride) { - std::vector v; v.reserve(size); + std::vector v(size); for (isize i = 0; i < size; ++i) v.push_back(start + stride * i); return v; @@ -101,7 +102,7 @@ std::vector ascending_int(isize size, T start, T stride) { template std::vector descending_int(isize size, T start, T stride) { - std::vector v; v.reserve(size); + std::vector v(size); for (isize i = size - 1; i >= 0; --i) v.push_back(start + stride * i); return v; @@ -109,7 +110,7 @@ std::vector descending_int(isize size, T start, T stride) { template std::vector pipe_organ(isize size, T start, T stride, std::mt19937_64&) { - std::vector v; v.reserve(size); + std::vector v(size); for (isize i = 0; i < size/2; ++i) v.push_back(start + stride * i); for (isize i = size/2; i < size; ++i) @@ -119,7 +120,7 @@ std::vector pipe_organ(isize size, T start, T stride, std::mt19937_64&) { template std::vector push_front(isize size, T start, T stride, std::mt19937_64&) { - std::vector v; v.reserve(size); + std::vector v(size); for (isize i = 1; i < size; ++i) v.push_back(start + stride * i); v.push_back(start); @@ -128,7 +129,7 @@ std::vector push_front(isize size, T start, T stride, std::mt19937_64&) { template std::vector push_middle(isize size, T start, T stride, std::mt19937_64&) { - std::vector v; v.reserve(size); + std::vector v(size); for (isize i = 0; i < size; ++i) { if (i != size/2) v.push_back(start + stride * i); From 0c3a9a099b4d3ef54e4aa8137f0d9c800903e833 Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Mon, 26 Jun 2023 09:51:05 +0300 Subject: [PATCH 07/21] Update .clang-format --- .clang-format | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index ee94745..1ee7ab6 100644 --- a/.clang-format +++ b/.clang-format @@ -3,7 +3,7 @@ BasedOnStyle: Chromium --- Language: Cpp -ColumnLimit: 160 +ColumnLimit: 100 IndentWidth: 4 ... From c446b67cb972a079c568d0ef0d28f9847bba34c4 Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Mon, 26 Jun 2023 09:51:39 +0300 Subject: [PATCH 08/21] Whitespace removal --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 96381d9..0cc3604 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(VXSORT_USE_LINKER "" CACHE STRING "Custom linker for -fuse-ld=...") find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM AND ${VXSORT_CCACHE}) message("ccache detected - using ccache to cache object files across compilations") - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") endif() # Make sure we can import out CMake functions From 510c9b2f878cd84e5ccc0c85ed6786dd37d2aa82 Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Mon, 26 Jun 2023 09:55:32 +0300 Subject: [PATCH 09/21] Rework benchmark project: * Add a new dimension of test-pattern to the mix * Remove much of the copy-paste related to registering the existing benchmark by using RegisterBenchmark directly with template meta-programming --- CMakeLists.txt | 3 +- bench/CMakeLists.txt | 2 + bench/bench.cpp | 29 ++++- bench/fullsort/BM_fullsort.pdqsort.cpp | 2 +- bench/fullsort/BM_fullsort.stdsort.cpp | 2 +- bench/fullsort/BM_fullsort.vxsort.avx2.f.cpp | 19 +-- bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp | 24 +--- bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp | 24 +--- .../fullsort/BM_fullsort.vxsort.avx512.f.cpp | 18 +-- .../fullsort/BM_fullsort.vxsort.avx512.i.cpp | 22 +--- .../fullsort/BM_fullsort.vxsort.avx512.u.cpp | 22 +--- bench/fullsort/BM_fullsort.vxsort.h | 119 +++++++++++++++++- bench/smallsort/BM_blacher.avx2.cpp | 2 +- bench/smallsort/BM_smallsort.h | 4 +- bench/util.h | 62 ++++----- 15 files changed, 207 insertions(+), 147 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cc3604..87b3304 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,7 +214,8 @@ CPMAddPackage( GIT_TAG main OPTIONS "BUILD_TESTING OFF" ) -CPMAddPackage("gh:fmtlib/fmt#9.1.0") +CPMAddPackage("gh:fmtlib/fmt#10.0.0") +CPMAddPackage("gh:Neargye/magic_enum#v0.9.2") CPMAddPackage("gh:okdshin/PicoSHA2#master") enable_testing() diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index 920b86c..0e75b31 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -11,6 +11,8 @@ target_link_libraries(${TARGET_NAME} ${CMAKE_PROJECT_NAME}_lib benchmark picosha2 + fmt::fmt + magic_enum::magic_enum ${CMAKE_THREAD_LIBS_INIT}) configure_file(run.sh run.sh COPYONLY) diff --git a/bench/bench.cpp b/bench/bench.cpp index 4b4753c..4dc009e 100644 --- a/bench/bench.cpp +++ b/bench/bench.cpp @@ -1,9 +1,28 @@ #include "benchmark/benchmark.h" +namespace vxsort_bench { + +void register_fullsort_avx2_i_benchmarks(); +void register_fullsort_avx2_u_benchmarks(); +void register_fullsort_avx2_f_benchmarks(); +void register_fullsort_avx512_i_benchmarks(); +void register_fullsort_avx512_u_benchmarks(); +void register_fullsort_avx512_f_benchmarks(); + +void register_benchmarks() { + register_fullsort_avx2_i_benchmarks(); + register_fullsort_avx2_u_benchmarks(); + register_fullsort_avx2_f_benchmarks(); + register_fullsort_avx512_i_benchmarks(); + register_fullsort_avx512_u_benchmarks(); + register_fullsort_avx512_f_benchmarks(); +} +} // namespace vxsort_bench + using namespace std; -int main(int argc, char** argv) -{ - ::benchmark::Initialize(&argc, argv); - ::benchmark::RunSpecifiedBenchmarks(); -} \ No newline at end of file +int main(int argc, char** argv) { + vxsort_bench::register_benchmarks(); + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); +} diff --git a/bench/fullsort/BM_fullsort.pdqsort.cpp b/bench/fullsort/BM_fullsort.pdqsort.cpp index dc55cba..7d88f81 100644 --- a/bench/fullsort/BM_fullsort.pdqsort.cpp +++ b/bench/fullsort/BM_fullsort.pdqsort.cpp @@ -15,7 +15,7 @@ static void BM_pdqsort_branchless(benchmark::State& state) { auto n = state.range(0); const auto ITERATIONS = 10; - auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)8); + auto v = unique_values(n, (Q) 0x1000, (Q) 8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); diff --git a/bench/fullsort/BM_fullsort.stdsort.cpp b/bench/fullsort/BM_fullsort.stdsort.cpp index 3594118..b2c7ecc 100644 --- a/bench/fullsort/BM_fullsort.stdsort.cpp +++ b/bench/fullsort/BM_fullsort.stdsort.cpp @@ -15,7 +15,7 @@ static void BM_stdsort(benchmark::State& state) { auto n = state.range(0); const auto ITERATIONS = 10; - auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)8); + auto v = unique_values(n, (Q) 0x1000, (Q) 8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); diff --git a/bench/fullsort/BM_fullsort.vxsort.avx2.f.cpp b/bench/fullsort/BM_fullsort.vxsort.avx2.f.cpp index 9d05df6..6642f66 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx2.f.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx2.f.cpp @@ -1,7 +1,7 @@ - #include "vxsort_targets_enable_avx2.h" +#include "vxsort_targets_enable_avx2.h" -#include #include +#include #include @@ -9,19 +9,12 @@ namespace vxsort_bench { using namespace vxsort::types; -using benchmark::TimeUnit; using vm = vxsort::vector_machine; -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - +void register_fullsort_avx2_f_benchmarks() { + register_fullsort_benchmarks(); } +} // namespace vxsort_bench + #include "vxsort_targets_disable.h" diff --git a/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp b/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp index 32c5a4a..baca040 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp @@ -1,31 +1,19 @@ - #include "vxsort_targets_enable_avx2.h" +#include "vxsort_targets_enable_avx2.h" -#include #include - #include +#include #include "BM_fullsort.vxsort.h" namespace vxsort_bench { using namespace vxsort::types; -using benchmark::TimeUnit; using vm = vxsort::vector_machine; -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); +void register_fullsort_avx2_i_benchmarks() { + register_fullsort_benchmarks(); } +} // namespace vxsort_bench + #include "vxsort_targets_disable.h" diff --git a/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp b/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp index 72884ca..23dbfe3 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx2.u.cpp @@ -1,7 +1,7 @@ - #include "vxsort_targets_enable_avx2.h" +#include "vxsort_targets_enable_avx2.h" -#include #include +#include #include @@ -9,24 +9,12 @@ namespace vxsort_bench { using namespace vxsort::types; -using benchmark::TimeUnit; using vm = vxsort::vector_machine; -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX2, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX2, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX2, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX2, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - +void register_fullsort_avx2_u_benchmarks() { + register_fullsort_benchmarks(); } +} // namespace vxsort_bench + #include "vxsort_targets_disable.h" diff --git a/bench/fullsort/BM_fullsort.vxsort.avx512.f.cpp b/bench/fullsort/BM_fullsort.vxsort.avx512.f.cpp index 62c3f62..97ddb37 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx512.f.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx512.f.cpp @@ -1,7 +1,7 @@ #include "vxsort_targets_enable_avx512.h" -#include #include +#include #include @@ -9,20 +9,12 @@ namespace vxsort_bench { using namespace vxsort::types; -using benchmark::TimeUnit; using vm = vxsort::vector_machine; -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f32, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, f64, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - +void register_fullsort_avx512_f_benchmarks() { + register_fullsort_benchmarks(); } +} // namespace vxsort_bench + #include "vxsort_targets_disable.h" diff --git a/bench/fullsort/BM_fullsort.vxsort.avx512.i.cpp b/bench/fullsort/BM_fullsort.vxsort.avx512.i.cpp index 1ffaf2d..0554ea6 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx512.i.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx512.i.cpp @@ -1,7 +1,7 @@ #include "vxsort_targets_enable_avx512.h" -#include #include +#include #include @@ -9,24 +9,12 @@ namespace vxsort_bench { using namespace vxsort::types; -using benchmark::TimeUnit; using vm = vxsort::vector_machine; -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i16, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i32, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, i64, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - +void register_fullsort_avx512_i_benchmarks() { + register_fullsort_benchmarks(); } +} // namespace vxsort_bench + #include "vxsort_targets_disable.h" diff --git a/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp b/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp index 8e6ec74..0dfedc4 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx512.u.cpp @@ -1,7 +1,7 @@ #include "vxsort_targets_enable_avx512.h" -#include #include +#include #include @@ -9,24 +9,12 @@ namespace vxsort_bench { using namespace vxsort::types; -using benchmark::TimeUnit; using vm = vxsort::vector_machine; -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u16, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u32, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX512, 1)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX512, 2)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX512, 4)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); -BENCHMARK_TEMPLATE(BM_vxsort, u64, vm::AVX512, 8)->RangeMultiplier(2)->Range(MIN_SORT, MAX_SORT)->Unit(kMillisecond)->ThreadRange(1, processor_count); - +void register_fullsort_avx512_u_benchmarks() { + register_fullsort_benchmarks(); } +} // namespace vxsort_bench + #include "vxsort_targets_disable.h" diff --git a/bench/fullsort/BM_fullsort.vxsort.h b/bench/fullsort/BM_fullsort.vxsort.h index 91ca638..8b9979b 100644 --- a/bench/fullsort/BM_fullsort.vxsort.h +++ b/bench/fullsort/BM_fullsort.vxsort.h @@ -2,11 +2,14 @@ #define VXSORT_BM_FULLSORT_VXSORT_H #include +#include +#include #include +#include #include #include -#include "../util.h" #include "../bench_isa.h" +#include "../util.h" #include @@ -14,8 +17,44 @@ namespace vxsort_bench { using namespace vxsort::types; +using benchmark::TimeUnit; using vxsort::vector_machine; +enum class SortPattern { + unique_values, + shuffled_16_values, + all_equal, + ascending_int, + descending_int, + pipe_organ, + push_front, + push_middle +}; + +template +std::vector generate_pattern(SortPattern pattern, usize size, Q start, Q stride) { + switch (pattern) { + case SortPattern::unique_values: + return unique_values(size, start, stride); + case SortPattern::shuffled_16_values: + return shuffled_16_values(size, start, stride); + case SortPattern::all_equal: + return all_equal(size, start, stride); + case SortPattern::ascending_int: + return ascending_int(size, start, stride); + case SortPattern::descending_int: + return descending_int(size, start, stride); + case SortPattern::pipe_organ: + return pipe_organ(size, start, stride); + case SortPattern::push_front: + return push_front(size, start, stride); + case SortPattern::push_middle: + return push_middle(size, start, stride); + default: + return unique_values(size, start, stride); + } +} + template static void BM_vxsort(benchmark::State& state) { VXSORT_BENCH_ISA(); @@ -23,7 +62,7 @@ static void BM_vxsort(benchmark::State& state) { auto n = state.range(0); const auto ITERATIONS = 10; - auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)0x8); + auto v = unique_values(n, (Q)0x1000, (Q)0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); @@ -49,7 +88,45 @@ static void BM_vxsort(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * n * ITERATIONS * sizeof(Q)); process_perf_counters(state.counters, n * ITERATIONS); if (!state.counters.contains("cycles/N")) - state.counters["rdtsc-cycles/N"] = make_cycle_per_n_counter((f64)total_cycles / (f64)(n * ITERATIONS * state.iterations())); + state.counters["rdtsc-cycles/N"] = make_cycle_per_n_counter( + (f64)total_cycles / (f64)(n * ITERATIONS * state.iterations())); +} + +template +static void BM_vxsort_pattern(benchmark::State& state, i64 n, SortPattern pattern) { + VXSORT_BENCH_ISA(); + + auto v = generate_pattern(pattern, n, (Q)0x1000, (Q)0x8); + + const auto ITERATIONS = 10; + + auto copies = generate_copies(ITERATIONS, n, v); + auto begins = generate_array_beginnings(copies); + auto ends = generate_array_beginnings(copies); + for (usize i = 0; i < copies.size(); i++) + ends[i] = begins[i] + n - 1; + + auto sorter = ::vxsort::vxsort(); + + u64 total_cycles = 0; + for (auto _ : state) { + state.PauseTiming(); + refresh_copies(copies, v); + state.ResumeTiming(); + auto start = cycleclock::Now(); + for (auto i = 0; i < ITERATIONS; i++) { + sorter.sort(begins[i], ends[i]); + } + total_cycles += (cycleclock::Now() - start); + } + + state.SetLabel(get_crypto_hash(begins[0], ends[0])); + state.counters["Time/N"] = make_time_per_n_counter(n * ITERATIONS); + state.SetBytesProcessed(state.iterations() * n * ITERATIONS * sizeof(Q)); + process_perf_counters(state.counters, n * ITERATIONS); + if (!state.counters.contains("cycles/N")) + state.counters["rdtsc-cycles/N"] = make_cycle_per_n_counter( + (f64)total_cycles / (f64)(n * ITERATIONS * state.iterations())); } const i32 StridedSortSize = 1000000; @@ -66,7 +143,7 @@ static void BM_vxsort_strided(benchmark::State& state) { const auto min_value = StridedSortMinValue; const auto max_value = min_value + StridedSortSize * stride; - auto v = generate_unique_values_vec(n, (Q) 0x80000000, (Q) stride); + auto v = unique_values(n, (Q)0x80000000, (Q)stride); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); auto ends = generate_array_beginnings(copies); @@ -90,8 +167,40 @@ static void BM_vxsort_strided(benchmark::State& state) { state.counters["Time/N"] = make_time_per_n_counter(n * ITERATIONS); process_perf_counters(state.counters, n * ITERATIONS); if (!state.counters.contains("cycles/N")) - state.counters["rdtsc-cycles/N"] = make_cycle_per_n_counter((f64)total_cycles / (f64)(n * ITERATIONS * state.iterations())); + state.counters["rdtsc-cycles/N"] = make_cycle_per_n_counter( + (f64)total_cycles / (f64)(n * ITERATIONS * state.iterations())); } + +static inline std::vector test_patterns() { + return { + SortPattern::unique_values, + SortPattern::shuffled_16_values, + SortPattern::all_equal, + }; +}; + +template +void register_type(i64 s, SortPattern p) { + if constexpr (U >= 2) { + register_type(s, p); + } + auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr); + auto bench_name = fmt::format("BM_vxsort_pattern<{}, {}, {}>/{}/{}", realname, U, s, + magic_enum::enum_name(M), magic_enum::enum_name(p)); + ::benchmark::RegisterBenchmark(bench_name.c_str(), BM_vxsort_pattern, s, p) + ->Unit(kMillisecond) + ->ThreadRange(1, processor_count); } +template +void register_fullsort_benchmarks() { + for (auto s : ::benchmark::CreateRange(MIN_SORT, MAX_SORT, 2)) { + for (auto p : test_patterns()) { + (register_type(s, p), ...); + } + } +} + +} // namespace vxsort_bench + #endif // VXSORT_BM_FULLSORT_VXSORT_H diff --git a/bench/smallsort/BM_blacher.avx2.cpp b/bench/smallsort/BM_blacher.avx2.cpp index 31e6297..3189b4a 100644 --- a/bench/smallsort/BM_blacher.avx2.cpp +++ b/bench/smallsort/BM_blacher.avx2.cpp @@ -93,7 +93,7 @@ void BM_blacher(benchmark::State& state) static const i32 ITERATIONS = 1024; auto n = 16; - auto v = generate_unique_values_vec(n, (i32)0x1000, (i32)0x8); + auto v = unique_values(n, (i32) 0x1000, (i32) 0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); diff --git a/bench/smallsort/BM_smallsort.h b/bench/smallsort/BM_smallsort.h index d8d5748..1e4d14b 100644 --- a/bench/smallsort/BM_smallsort.h +++ b/bench/smallsort/BM_smallsort.h @@ -23,7 +23,7 @@ static void BM_bitonic_sort(benchmark::State& state) { static const i32 ITERATIONS = 1024; auto n = state.range(0); - auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)0x8); + auto v = unique_values(n, (Q) 0x1000, (Q) 0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); @@ -58,7 +58,7 @@ static void BM_bitonic_machine(benchmark::State& state) { static const i32 ITERATIONS = 1024; auto n = N * BM::N; - auto v = generate_unique_values_vec(n, (Q)0x1000, (Q)0x8); + auto v = unique_values(n, (Q) 0x1000, (Q) 0x8); auto copies = generate_copies(ITERATIONS, n, v); auto begins = generate_array_beginnings(copies); diff --git a/bench/util.h b/bench/util.h index 27fbdb4..003d0d6 100644 --- a/bench/util.h +++ b/bench/util.h @@ -28,26 +28,6 @@ void process_perf_counters(UserCounters &counters, i64 num_elements); extern std::random_device::result_type global_bench_random_seed; -template -std::vector generate_unique_values_vec(usize size, T start, T stride) { - std::vector v(size); - for (usize i = 0; i < v.size(); i++, start += stride) - v[i] = start; - - std::mt19937_64 g(global_bench_random_seed); - std::shuffle(v.begin(), v.end(), g); - return v; -} - -template -std::vector generate_array_beginnings(std::vector> &copies) { - const auto num_copies = copies.size(); - std::vector begins(num_copies); - for (usize i = 0; i < num_copies; i++) - begins[i] = (U*)copies[i].data(); - return begins; -} - template void refresh_copies(std::vector> &copies, std::vector& orig) { const auto begin = orig.begin(); @@ -66,26 +46,38 @@ std::vector> generate_copies(usize num_copies, i64 n, std::vector return copies; } +template +std::vector generate_array_beginnings(std::vector> &copies) { + const auto num_copies = copies.size(); + std::vector begins(num_copies); + for (usize i = 0; i < num_copies; i++) + begins[i] = (U*)copies[i].data(); + return begins; +} + template -std::vector shuffled_seq(usize size, T start, T stride, std::mt19937_64& rng) { +std::vector unique_values(usize size, T start, T stride) { std::vector v(size); - for (usize i = 0; i < size; ++i) - v.push_back(start + stride * i); + for (usize i = 0; i < v.size(); i++, start += stride) + v[i] = start; + + std::mt19937_64 rng(global_bench_random_seed); std::shuffle(v.begin(), v.end(), rng); return v; } template -std::vector shuffled_16_values(usize size, T start, T stride, std::mt19937_64& rng) { +std::vector shuffled_16_values(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) v.push_back(start + stride * (i % 16)); + std::mt19937_64 rng(global_bench_random_seed); std::shuffle(v.begin(), v.end(), rng); return v; } template -std::vector all_equal(isize size, T start) { +std::vector all_equal(usize size, T start , T stride) { std::vector v(size); for (i32 i = 0; i < size; ++i) v.push_back(start); @@ -93,15 +85,15 @@ std::vector all_equal(isize size, T start) { } template -std::vector ascending_int(isize size, T start, T stride) { +std::vector ascending_int(usize size, T start, T stride) { std::vector v(size); - for (isize i = 0; i < size; ++i) + for (usize i = 0; i < size; ++i) v.push_back(start + stride * i); return v; } template -std::vector descending_int(isize size, T start, T stride) { +std::vector descending_int(usize size, T start, T stride) { std::vector v(size); for (isize i = size - 1; i >= 0; --i) v.push_back(start + stride * i); @@ -109,28 +101,28 @@ std::vector descending_int(isize size, T start, T stride) { } template -std::vector pipe_organ(isize size, T start, T stride, std::mt19937_64&) { +std::vector pipe_organ(usize size, T start, T stride) { std::vector v(size); - for (isize i = 0; i < size/2; ++i) + for (usize i = 0; i < size/2; ++i) v.push_back(start + stride * i); - for (isize i = size/2; i < size; ++i) + for (usize i = size/2; i < size; ++i) v.push_back(start + (size - i) * stride); return v; } template -std::vector push_front(isize size, T start, T stride, std::mt19937_64&) { +std::vector push_front(usize size, T start, T stride) { std::vector v(size); - for (isize i = 1; i < size; ++i) + for (usize i = 1; i < size; ++i) v.push_back(start + stride * i); v.push_back(start); return v; } template -std::vector push_middle(isize size, T start, T stride, std::mt19937_64&) { +std::vector push_middle(usize size, T start, T stride) { std::vector v(size); - for (isize i = 0; i < size; ++i) { + for (usize i = 0; i < size; ++i) { if (i != size/2) v.push_back(start + stride * i); } From 3d346eb257b2329cc36854eb4c6bf88e1ea09c25 Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Thu, 31 Aug 2023 18:33:10 +0300 Subject: [PATCH 10/21] remove redundant using namespace --- bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp b/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp index baca040..cbccbce 100644 --- a/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp +++ b/bench/fullsort/BM_fullsort.vxsort.avx2.i.cpp @@ -7,7 +7,6 @@ #include "BM_fullsort.vxsort.h" namespace vxsort_bench { -using namespace vxsort::types; using vm = vxsort::vector_machine; void register_fullsort_avx2_i_benchmarks() { From 3f165abf38a7c74658605ebd86895da1d3c92e3e Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Sun, 10 Sep 2023 20:22:51 +0300 Subject: [PATCH 11/21] tests: copy-paste of test generators from the benchmark project, in preperation for testing with a matrix of patterns --- bench/util.h | 2 +- tests/sort_fixtures.h | 13 +++--- tests/util.h | 95 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 89 insertions(+), 21 deletions(-) diff --git a/bench/util.h b/bench/util.h index 003d0d6..0ece72a 100644 --- a/bench/util.h +++ b/bench/util.h @@ -79,7 +79,7 @@ std::vector shuffled_16_values(usize size, T start, T stride) { template std::vector all_equal(usize size, T start , T stride) { std::vector v(size); - for (i32 i = 0; i < size; ++i) + for (usize i = 0; i < size; ++i) v.push_back(start); return v; } diff --git a/tests/sort_fixtures.h b/tests/sort_fixtures.h index e0d4deb..27b1891 100644 --- a/tests/sort_fixtures.h +++ b/tests/sort_fixtures.h @@ -23,8 +23,7 @@ struct SortFixture : public testing::TestWithParam { public: virtual void SetUp() { - V = std::vector(GetParam()); - generate_unique_values_vec(V, (T)0x1000, (T)0x1); + auto v = unique_values(GetParam(), (T)0x1000, (T)0x1); } virtual void TearDown() { } @@ -89,8 +88,11 @@ struct SortWithSlackFixture : public testing::TestWithParam> { virtual void SetUp() { testing::TestWithParam>::SetUp(); auto p = this->GetParam(); - V = std::vector(p.Size + p.Slack); - generate_unique_values_vec(V, p.FirstValue, p.ValueStride, p.Randomize); + //V = std::vector(p.Size + p.Slack); + //generate_unique_values_vec(V, p.FirstValue, p.ValueStride, p.Randomize); + auto v = unique_values(p.Size + p.Slack, p.FirstValue, p.ValueStride); + + } virtual void TearDown() { #ifdef VXSORT_STATS @@ -138,8 +140,7 @@ struct SortWithStrideFixture : public testing::TestWithParam> { virtual void SetUp() { testing::TestWithParam>::SetUp(); auto p = this->GetParam(); - V = std::vector(p.Size); - generate_unique_values_vec(V, p.FirstValue, p.ValueStride, p.Randomize); + auto v = unique_values(p.Size, p.FirstValue, p.ValueStride); MinValue = p.FirstValue; MaxValue = MinValue + p.Size * p.ValueStride; if (MinValue > MaxValue) diff --git a/tests/util.h b/tests/util.h index 09527cf..a14e5d5 100644 --- a/tests/util.h +++ b/tests/util.h @@ -6,22 +6,12 @@ #include #include -template -void generate_unique_values_vec(std::vector& vec, T start, T stride= 0x1, bool randomize = true) { - for (size_t i = 0; i < vec.size(); i++) { - vec[i] = start; - start += stride; - } +#include - if (!randomize) - return; +namespace vxsort_tests { +using namespace vxsort::types; - std::random_device rd; - // std::mt19937 g(rd()); - std::mt19937 g(666); - - std::shuffle(vec.begin(), vec.end(), g); -} +const std::random_device::result_type global_bench_random_seed = 666; template std::vector range(IntType start, IntType stop, IntType step) { @@ -55,4 +45,81 @@ std::vector multiply_range(IntType start, IntType stop, IntType step) { return result; } +template +std::vector unique_values(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 0; i < v.size(); i++, start += stride) + v[i] = start; + + std::mt19937_64 rng(global_bench_random_seed); + std::shuffle(v.begin(), v.end(), rng); + return v; +} + +template +std::vector shuffled_16_values(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 0; i < size; ++i) + v.push_back(start + stride * (i % 16)); + std::mt19937_64 rng(global_bench_random_seed); + std::shuffle(v.begin(), v.end(), rng); + return v; +} + +template +std::vector all_equal(usize size, T start , T stride) { + std::vector v(size); + for (i32 i = 0; i < size; ++i) + v.push_back(start); + return v; +} + +template +std::vector ascending_int(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 0; i < size; ++i) + v.push_back(start + stride * i); + return v; +} + +template +std::vector descending_int(usize size, T start, T stride) { + std::vector v(size); + for (isize i = size - 1; i >= 0; --i) + v.push_back(start + stride * i); + return v; +} + +template +std::vector pipe_organ(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 0; i < size/2; ++i) + v.push_back(start + stride * i); + for (usize i = size/2; i < size; ++i) + v.push_back(start + (size - i) * stride); + return v; +} + +template +std::vector push_front(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 1; i < size; ++i) + v.push_back(start + stride * i); + v.push_back(start); + return v; +} + +template +std::vector push_middle(usize size, T start, T stride) { + std::vector v(size); + for (usize i = 0; i < size; ++i) { + if (i != size/2) + v.push_back(start + stride * i); + } + v.push_back(start + stride * (size/2)); + return v; +} + +} + #endif From 5036e03fd260d65fe9c903e949c542fdaa0d1f2c Mon Sep 17 00:00:00 2001 From: Dan Shechter <125730+damageboy@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:46:22 +0300 Subject: [PATCH 12/21] fix MSVC breakage --- bench/fullsort/BM_fullsort.vxsort.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bench/fullsort/BM_fullsort.vxsort.h b/bench/fullsort/BM_fullsort.vxsort.h index 8b9979b..fed3bc6 100644 --- a/bench/fullsort/BM_fullsort.vxsort.h +++ b/bench/fullsort/BM_fullsort.vxsort.h @@ -2,7 +2,6 @@ #define VXSORT_BM_FULLSORT_VXSORT_H #include -#include #include #include #include @@ -11,6 +10,10 @@ #include "../bench_isa.h" #include "../util.h" +#ifndef VXSORT_COMPILER_MSVC +#include +#endif + #include #include "fullsort_params.h" @@ -184,7 +187,11 @@ void register_type(i64 s, SortPattern p) { if constexpr (U >= 2) { register_type(s, p); } +#ifdef VXSORT_COMPILER_MSVC + auto realname = typeid(T).name(); +#else auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr); +#endif auto bench_name = fmt::format("BM_vxsort_pattern<{}, {}, {}>/{}/{}", realname, U, s, magic_enum::enum_name(M), magic_enum::enum_name(p)); ::benchmark::RegisterBenchmark(bench_name.c_str(), BM_vxsort_pattern, s, p) From 4ed505d3e39aeff6638db9975ae5b1bdc2a1863d Mon Sep 17 00:00:00 2001 From: Dan Shechter <125730+damageboy@users.noreply.github.com> Date: Sun, 17 Sep 2023 18:46:49 +0300 Subject: [PATCH 13/21] Unify all different parametrized testing fixtures to one unified fixture that accepts various sorting patterns --- tests/fullsort/fullsort.avx2.cpp | 48 +++---- tests/fullsort/fullsort.avx512.cpp | 48 +++---- tests/smallsort/smallsort.avx2.cpp | 88 +++++++------ tests/smallsort/smallsort.avx512.cpp | 87 +++++++------ tests/sort_fixtures.h | 187 ++++++++++++++------------- 5 files changed, 240 insertions(+), 218 deletions(-) diff --git a/tests/fullsort/fullsort.avx2.cpp b/tests/fullsort/fullsort.avx2.cpp index 19c623a..2322bb2 100644 --- a/tests/fullsort/fullsort.avx2.cpp +++ b/tests/fullsort/fullsort.avx2.cpp @@ -14,44 +14,44 @@ using VM = vxsort::vector_machine; using namespace vxsort; #ifdef VXSORT_TEST_AVX2_I16 -struct VxSortAVX2_i16 : public SortWithSlackFixture {}; -auto vxsort_i16_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 10000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i16, vxsort_i16_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_i16 : public ParametrizedSortFixture {}; +auto vxsort_i16_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 10000, 10, 32, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i16, vxsort_i16_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_I32 -struct VxSortAVX2_i32 : public SortWithSlackFixture {}; -auto vxsort_i32_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i32, vxsort_i32_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_i32 : public ParametrizedSortFixture {}; +auto vxsort_i32_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i32, vxsort_i32_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_I64 -struct VxSortAVX2_i64 : public SortWithSlackFixture {}; -auto vxsort_i64_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 8, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i64, vxsort_i64_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_i64 : public ParametrizedSortFixture {}; +auto vxsort_i64_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 8, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i64, vxsort_i64_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_U16 -struct VxSortAVX2_u16 : public SortWithSlackFixture {}; -auto vxsort_u16_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u16, vxsort_u16_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_u16 : public ParametrizedSortFixture {}; +auto vxsort_u16_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u16, vxsort_u16_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_U32 -struct VxSortAVX2_u32 : public SortWithSlackFixture {}; -auto vxsort_u32_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u32, vxsort_u32_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_u32 : public ParametrizedSortFixture {}; +auto vxsort_u32_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u32, vxsort_u32_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_U64 -struct VxSortAVX2_u64 : public SortWithSlackFixture {}; -auto vxsort_u64_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 8, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u64, vxsort_u64_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_u64 : public ParametrizedSortFixture {}; +auto vxsort_u64_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 8, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u64, vxsort_u64_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_F32 -struct VxSortAVX2_f32 : public SortWithSlackFixture {}; -auto vxsort_f32_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 1234.5, 0.1f)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_f32, vxsort_f32_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_f32 : public ParametrizedSortFixture {}; +auto vxsort_f32_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 1234.5f, 0.1f)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_f32, vxsort_f32_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_F64 -struct VxSortAVX2_f64 : public SortWithSlackFixture {}; -auto vxsort_f64_params_avx2 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 8, 1234.5, 0.1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_f64, vxsort_f64_params_avx2, PrintSizeAndSlack()); +struct VxSortAVX2_f64 : public ParametrizedSortFixture {}; +auto vxsort_f64_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 8, 1234.5, 0.1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_f64, vxsort_f64_params_avx2, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_I16 diff --git a/tests/fullsort/fullsort.avx512.cpp b/tests/fullsort/fullsort.avx512.cpp index 15eba06..db23ddb 100644 --- a/tests/fullsort/fullsort.avx512.cpp +++ b/tests/fullsort/fullsort.avx512.cpp @@ -14,51 +14,51 @@ using VM = vxsort::vector_machine; using namespace vxsort; #ifdef VXSORT_TEST_AVX512_I16 -struct VxSortAVX512_i16 : public SortWithSlackFixture {}; -auto vxsort_i16_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 10000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i16, vxsort_i16_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_i16 : public ParametrizedSortFixture {}; +auto vxsort_i16_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 10000, 10, 32, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i16, vxsort_i16_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_I32 -struct VxSortAVX512_i32 : public SortWithSlackFixture {}; -auto vxsort_i32_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i32, vxsort_i32_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_i32 : public ParametrizedSortFixture {}; +auto vxsort_i32_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 32, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i32, vxsort_i32_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_I64 -struct VxSortAVX512_i64 : public SortWithSlackFixture {}; -auto vxsort_i64_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i64, vxsort_i64_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_i64 : public ParametrizedSortFixture {}; +auto vxsort_i64_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i64, vxsort_i64_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_U16 -struct VxSortAVX512_u16 : public SortWithSlackFixture {}; -auto vxsort_u16_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 10000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u16, vxsort_u16_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_u16 : public ParametrizedSortFixture {}; +auto vxsort_u16_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 10000, 10, 32, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u16, vxsort_u16_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_U32 -struct VxSortAVX512_u32 : public SortWithSlackFixture {}; -auto vxsort_u32_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u32, vxsort_u32_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_u32 : public ParametrizedSortFixture {}; +auto vxsort_u32_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 32, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u32, vxsort_u32_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_U64 -struct VxSortAVX512_u64 : public SortWithSlackFixture {}; -auto vxsort_u64_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u64, vxsort_u64_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_u64 : public ParametrizedSortFixture {}; +auto vxsort_u64_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u64, vxsort_u64_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_F32 -struct VxSortAVX512_f32 : public SortWithSlackFixture {}; -auto vxsort_f32_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 32, 1234.5, 0.1f)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_f32, vxsort_f32_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_f32 : public ParametrizedSortFixture {}; +auto vxsort_f32_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 32, 1234.5f, 0.1f)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_f32, vxsort_f32_params_avx512, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_F64 -struct VxSortAVX512_f64 : public SortWithSlackFixture {}; -auto vxsort_f64_params_avx512 = ValuesIn(SizeAndSlack::generate(10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_f64, vxsort_f64_params_avx512, PrintSizeAndSlack()); +struct VxSortAVX512_f64 : public ParametrizedSortFixture {}; +auto vxsort_f64_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 1234.5, 0.1)); +INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_f64, vxsort_f64_params_avx512, PrintSortTestParams()); #endif diff --git a/tests/smallsort/smallsort.avx2.cpp b/tests/smallsort/smallsort.avx2.cpp index 7616fba..34b7870 100644 --- a/tests/smallsort/smallsort.avx2.cpp +++ b/tests/smallsort/smallsort.avx2.cpp @@ -11,68 +11,76 @@ namespace vxsort_tests { using namespace vxsort::types; using VM = vxsort::vector_machine; -auto bitonic_machine_allvalues_avx2_16 = ValuesIn(range(16, 64, 16)); -auto bitonic_machine_allvalues_avx2_32 = ValuesIn(range(8, 32, 8)); -auto bitonic_machine_allvalues_avx2_64 = ValuesIn(range(4, 16, 4)); - -auto bitonic_allvalues_avx2_16 = ValuesIn(range(1, 8192, 1)); -auto bitonic_allvalues_avx2_32 = ValuesIn(range(1, 4096, 1)); -auto bitonic_allvalues_avx2_64 = ValuesIn(range(1, 2048, 1)); - #ifdef VXSORT_TEST_AVX2_I16 -struct BitonicMachineAVX2_i16 : public SortFixture {}; -struct BitonicAVX2_i16 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i16, bitonic_machine_allvalues_avx2_16, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i16, bitonic_allvalues_avx2_16, PrintValue()); +auto bitonic_machine_allvalues_avx2_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx2_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX2_i16 : public ParametrizedSortFixture {}; +struct BitonicAVX2_i16 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i16, bitonic_machine_allvalues_avx2_i16, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i16, bitonic_allvalues_avx2_i16, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_I32 -struct BitonicMachineAVX2_i32 : public SortFixture {}; -struct BitonicAVX2_i32 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i32, bitonic_machine_allvalues_avx2_32, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i32, bitonic_allvalues_avx2_32, PrintValue()); +auto bitonic_machine_allvalues_avx2_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx2_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX2_i32: public ParametrizedSortFixture {}; +struct BitonicAVX2_i32 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i32, bitonic_machine_allvalues_avx2_i32, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i32, bitonic_allvalues_avx2_i32, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_I64 -struct BitonicMachineAVX2_i64 : public SortFixture {}; -struct BitonicAVX2_i64 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i64, bitonic_machine_allvalues_avx2_64, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i64, bitonic_allvalues_avx2_64, PrintValue()); +auto bitonic_machine_allvalues_avx2_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 4, 16, 4, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx2_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX2_i64 : public ParametrizedSortFixture {}; +struct BitonicAVX2_i64 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i64, bitonic_machine_allvalues_avx2_i64, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i64, bitonic_allvalues_avx2_i64, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_U16 -struct BitonicMachineAVX2_u16 : public SortFixture {}; -struct BitonicAVX2_u16 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u16, bitonic_machine_allvalues_avx2_16, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u16, bitonic_allvalues_avx2_16, PrintValue()); +auto bitonic_machine_allvalues_avx2_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx2_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX2_u16 : public ParametrizedSortFixture {}; +struct BitonicAVX2_u16 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u16, bitonic_machine_allvalues_avx2_u16, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u16, bitonic_allvalues_avx2_u16, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_U32 -struct BitonicMachineAVX2_u32 : public SortFixture {}; -struct BitonicAVX2_u32 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u32, bitonic_machine_allvalues_avx2_32, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u32, bitonic_allvalues_avx2_32, PrintValue()); +auto bitonic_machine_allvalues_avx2_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx2_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX2_u32 : public ParametrizedSortFixture {}; +struct BitonicAVX2_u32 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u32, bitonic_machine_allvalues_avx2_u32, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u32, bitonic_allvalues_avx2_u32, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_U64 -struct BitonicMachineAVX2_u64 : public SortFixture {}; -struct BitonicAVX2_u64 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u64, bitonic_machine_allvalues_avx2_64, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u64, bitonic_allvalues_avx2_64, PrintValue()); +auto bitonic_machine_allvalues_avx2_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 4, 16, 4, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx2_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX2_u64 : public ParametrizedSortFixture {}; +struct BitonicAVX2_u64 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u64, bitonic_machine_allvalues_avx2_u64, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u64, bitonic_allvalues_avx2_u64, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_F32 -struct BitonicMachineAVX2_f32 : public SortFixture {}; -struct BitonicAVX2_f32 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_f32, bitonic_machine_allvalues_avx2_32, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_f32, bitonic_allvalues_avx2_32, PrintValue()); +auto bitonic_machine_allvalues_avx2_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 1234.5f, 0.1f)); +auto bitonic_allvalues_avx2_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 1234.5f, 0.1f)); +struct BitonicMachineAVX2_f32 : public ParametrizedSortFixture {}; +struct BitonicAVX2_f32 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_f32, bitonic_machine_allvalues_avx2_f32, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_f32, bitonic_allvalues_avx2_f32, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_F64 -struct BitonicMachineAVX2_f64 : public SortFixture {}; -struct BitonicAVX2_f64 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_f64, bitonic_machine_allvalues_avx2_64, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_f64, bitonic_allvalues_avx2_64, PrintValue()); +auto bitonic_machine_allvalues_avx2_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 4, 16, 4, 0, 1234.5, 0.1)); +auto bitonic_allvalues_avx2_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 1234.5, 0.1)); +struct BitonicMachineAVX2_f64 : public ParametrizedSortFixture {}; +struct BitonicAVX2_f64 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_f64, bitonic_machine_allvalues_avx2_f64, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_f64, bitonic_allvalues_avx2_f64, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX2_I16 diff --git a/tests/smallsort/smallsort.avx512.cpp b/tests/smallsort/smallsort.avx512.cpp index 432a8d5..9aa0648 100644 --- a/tests/smallsort/smallsort.avx512.cpp +++ b/tests/smallsort/smallsort.avx512.cpp @@ -13,67 +13,76 @@ using testing::Types; using VM = vxsort::vector_machine; -auto bitonic_machine_allvalues_avx512_16 = ValuesIn(range(32, 128, 32)); -auto bitonic_machine_allvalues_avx512_32 = ValuesIn(range(16, 64, 16)); -auto bitonic_machine_allvalues_avx512_64 = ValuesIn(range(8, 32, 8)); -auto bitonic_allvalues_avx512_16 = ValuesIn(range(1, 8192, 1)); -auto bitonic_allvalues_avx512_32 = ValuesIn(range(1, 4096, 1)); -auto bitonic_allvalues_avx512_64 = ValuesIn(range(1, 2048, 1)); - #ifdef VXSORT_TEST_AVX512_I16 -struct BitonicMachineAVX512_i16 : public SortFixture {}; -struct BitonicAVX512_i16 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i16, bitonic_machine_allvalues_avx512_16, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i16, bitonic_allvalues_avx512_16, PrintValue()); +auto bitonic_machine_allvalues_avx512_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 32, 128, 32, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx512_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX512_i16 : public ParametrizedSortFixture {}; +struct BitonicAVX512_i16 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i16, bitonic_machine_allvalues_avx512_i16, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i16, bitonic_allvalues_avx512_i16, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_I32 -struct BitonicMachineAVX512_i32 : public SortFixture {}; -struct BitonicAVX512_i32 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i32, bitonic_machine_allvalues_avx512_32, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i32, bitonic_allvalues_avx512_32, PrintValue()); +auto bitonic_machine_allvalues_avx512_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx512_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX512_i32 : public ParametrizedSortFixture {}; +struct BitonicAVX512_i32 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i32, bitonic_machine_allvalues_avx512_i32, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i32, bitonic_allvalues_avx512_i32, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_I64 -struct BitonicMachineAVX512_i64 : public SortFixture {}; -struct BitonicAVX512_i64 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i64, bitonic_machine_allvalues_avx512_64, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i64, bitonic_allvalues_avx512_64, PrintValue()); +auto bitonic_machine_allvalues_avx512_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx512_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX512_i64 : public ParametrizedSortFixture {}; +struct BitonicAVX512_i64 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i64, bitonic_machine_allvalues_avx512_i64, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i64, bitonic_allvalues_avx512_i64, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_U16 -struct BitonicMachineAVX512_u16 : public SortFixture {}; -struct BitonicAVX512_u16 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u16, bitonic_machine_allvalues_avx512_16, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u16, bitonic_allvalues_avx512_16, PrintValue()); +auto bitonic_machine_allvalues_avx512_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 32, 128, 32, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx512_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX512_u16 : public ParametrizedSortFixture {}; +struct BitonicAVX512_u16 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u16, bitonic_machine_allvalues_avx512_u16, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u16, bitonic_allvalues_avx512_u16, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_U32 -struct BitonicMachineAVX512_u32 : public SortFixture {}; -struct BitonicAVX512_u32 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u32, bitonic_machine_allvalues_avx512_32, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u32, bitonic_allvalues_avx512_32, PrintValue()); +auto bitonic_machine_allvalues_avx512_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx512_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX512_u32 : public ParametrizedSortFixture {}; +struct BitonicAVX512_u32 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u32, bitonic_machine_allvalues_avx512_u32, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u32, bitonic_allvalues_avx512_u32, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_U64 -struct BitonicMachineAVX512_u64 : public SortFixture {}; -struct BitonicAVX512_u64 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u64, bitonic_machine_allvalues_avx512_64, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u64, bitonic_allvalues_avx512_64, PrintValue()); +auto bitonic_machine_allvalues_avx512_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); +auto bitonic_allvalues_avx512_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); +struct BitonicMachineAVX512_u64 : public ParametrizedSortFixture {}; +struct BitonicAVX512_u64 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u64, bitonic_machine_allvalues_avx512_u64, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u64, bitonic_allvalues_avx512_u64, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_F32 -struct BitonicMachineAVX512_f32 : public SortFixture {}; -struct BitonicAVX512_f32 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_f32, bitonic_machine_allvalues_avx512_32, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_f32, bitonic_allvalues_avx512_32, PrintValue()); +auto bitonic_machine_allvalues_avx512_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 1234.5f, 0.1f)); +auto bitonic_allvalues_avx512_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 1234.5f, 0.1f)); +struct BitonicMachineAVX512_f32 : public ParametrizedSortFixture {}; +struct BitonicAVX512_f32 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_f32, bitonic_machine_allvalues_avx512_f32, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_f32, bitonic_allvalues_avx512_f32, PrintSortTestParams()); #endif #ifdef VXSORT_TEST_AVX512_F64 -struct BitonicMachineAVX512_f64 : public SortFixture {}; -struct BitonicAVX512_f64 : public SortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_f64, bitonic_machine_allvalues_avx512_64, PrintValue()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_f64, bitonic_allvalues_avx512_64, PrintValue()); +auto bitonic_machine_allvalues_avx512_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 1234.5, 0.1)); +auto bitonic_allvalues_avx512_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 1234.5, 0.1)); +struct BitonicMachineAVX512_f64 : public ParametrizedSortFixture {}; +struct BitonicAVX512_f64 : public ParametrizedSortFixture {}; +INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX512_f64, bitonic_machine_allvalues_avx512_f64, PrintSortTestParams()); +INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_f64, bitonic_allvalues_avx512_f64, PrintSortTestParams()); #endif diff --git a/tests/sort_fixtures.h b/tests/sort_fixtures.h index 27b1891..5595e9c 100644 --- a/tests/sort_fixtures.h +++ b/tests/sort_fixtures.h @@ -16,135 +16,139 @@ using namespace vxsort::types; using testing::ValuesIn; using testing::Types; -template -struct SortFixture : public testing::TestWithParam { -protected: - std::vector V; - -public: - virtual void SetUp() { - auto v = unique_values(GetParam(), (T)0x1000, (T)0x1); - } - virtual void TearDown() { - } -}; -struct PrintValue { - template - std::string operator()(const testing::TestParamInfo& info) const { - auto v = static_cast(info.param); - return std::to_string(v); - } +enum class SortPattern { + unique_values, + shuffled_16_values, + all_equal, + ascending_int, + descending_int, + pipe_organ, + push_front, + push_middle }; +/// @brief This sort fixture +/// @tparam T +/// @tparam AlignTo template -struct SizeAndSlack { +struct SortTestParams { public: + SortPattern Pattern; usize Size; i32 Slack; T FirstValue; T ValueStride; - bool Randomize; - SizeAndSlack(size_t size, int slack, T first_value, T value_stride, bool randomize) - : Size(size), Slack(slack), FirstValue(first_value), ValueStride(value_stride), Randomize(randomize) {} + + SortTestParams(SortPattern pattern, size_t size, int slack, T first_value, T value_stride) + : Pattern(pattern), Size(size), Slack(slack), FirstValue(first_value), ValueStride(value_stride) {} /** * Generate sorting problems "descriptions" - * @param start - * @param stop - * @param step - * @param slack + * @param patterns - the sort patterns to test with + * @param start - start value for the size parameter + * @param stop - stop value for the size paraameter + * @param step - the step/multiplier for the size parameter + * @param slack - the slack parameter used to generate ranges of problem sized around a base value * @param first_value - the smallest value in each test array * @param value_stride - the minimal jump between array elements - * @param randomize - should the problem array contents be randomized, defaults to true * @return */ - static std::vector generate(size_t start, size_t stop, size_t step, int slack, T first_value, T value_stride, bool randomize = true) { + static std::vector gen_mult(std::vector patterns, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { if (step == 0) { throw std::invalid_argument("step for range must be non-zero"); } - std::vector result; + std::vector result; size_t i = start; - while ((step > 0) ? (i <= stop) : (i > stop)) { - for (auto j : range(-slack, slack, 1)) { - if ((i64)i + j <= 0) - continue; - result.push_back(SizeAndSlack(i, j, first_value, value_stride, randomize)); + for (auto p : patterns) { + while ((step > 0) ? (i <= stop) : (i > stop)) { + for (auto j : range(-slack, slack, 1)) { + if ((i64)i + j <= 0) + continue; + result.push_back(SortTestParams(p, i, j, first_value, value_stride)); + } + i *= step; } - i *= step; } return result; } -}; -template -struct SortWithSlackFixture : public testing::TestWithParam> { -protected: - std::vector V; - -public: - virtual void SetUp() { - testing::TestWithParam>::SetUp(); - auto p = this->GetParam(); - //V = std::vector(p.Size + p.Slack); - //generate_unique_values_vec(V, p.FirstValue, p.ValueStride, p.Randomize); - auto v = unique_values(p.Size + p.Slack, p.FirstValue, p.ValueStride); - - - } - virtual void TearDown() { -#ifdef VXSORT_STATS - vxsort::print_all_stats(); - vxsort::reset_all_stats(); -#endif - } -}; - -template -struct PrintSizeAndSlack { - std::string operator()(const testing::TestParamInfo>& info) const { - return std::to_string(info.param.Size + info.param.Slack); + /** + * Generate sorting problems "descriptions" + * @param pattern - the sort pattern to test with + * @param start - start value for the size parameter + * @param stop - stop value for the size paraameter + * @param step - the step/multiplier for the size parameter + * @param slack - the slack parameter used to generate ranges of problem sized around a base value + * @param first_value - the smallest value in each test array + * @param value_stride - the minimal jump between array elements + * @return + */ + static auto gen_mult(SortPattern pattern, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { + return gen_mult(std::vector{pattern}, start, stop, step, slack, + first_value, value_stride); } -}; - -template -struct SizeAndStride { -public: - usize Size; - T FirstValue; - T ValueStride; - bool Randomize; - SizeAndStride(size_t size, T first_value, T value_stride, bool randomize) - : Size(size), FirstValue(first_value), ValueStride(value_stride), Randomize(randomize) {} + /** + * Generate sorting problems "descriptions" + * @param patterns - the sort patterns to test with + * @param start - start value for the size parameter + * @param stop - stop value for the size paraameter + * @param step - the step/multiplier for the size parameter + * @param slack - the slack parameter used to generate ranges of problem sized around a base value + * @param first_value - the smallest value in each test array + * @param value_stride - the minimal jump between array elements + * @return + */ + static std::vector gen_step(std::vector patterns, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { + if (step == 0) { + throw std::invalid_argument("step for range must be non-zero"); + } - static std::vector generate(size_t size, T stride_start, T stride_stop, T first_value, bool randomize = true) { - std::vector result; - for (auto j : multiply_range(stride_start, stride_stop, 2)) { - result.push_back(SizeAndStride(size, first_value, j, randomize)); + std::vector result; + size_t i = start; + for (auto p : patterns) { + while ((step > 0) ? (i <= stop) : (i > stop)) { + for (auto j : range(-slack, slack, 1)) { + if ((i64)i + j <= 0) + continue; + result.push_back(SortTestParams(p, i, j, first_value, value_stride)); + } + i += step; + } } return result; } + + /** + * Generate sorting problems "descriptions" + * @param pattern - the sort pattern to test with + * @param start - start value for the size parameter + * @param stop - stop value for the size paraameter + * @param step - the step for the size parameter + * @param slack - the slack parameter used to generate ranges of problem sized around a base value + * @param first_value - the smallest value in each test array + * @param value_stride - the minimal jump between array elements + * @return + */ + static auto gen_step(SortPattern pattern, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { + return gen_step(std::vector{pattern}, start, stop, step, slack, + first_value, value_stride); + } }; -template -struct SortWithStrideFixture : public testing::TestWithParam> { +template +struct ParametrizedSortFixture : public testing::TestWithParam> { protected: std::vector V; - T MinValue; - T MaxValue; public: virtual void SetUp() { - testing::TestWithParam>::SetUp(); + testing::TestWithParam>::SetUp(); auto p = this->GetParam(); - auto v = unique_values(p.Size, p.FirstValue, p.ValueStride); - MinValue = p.FirstValue; - MaxValue = MinValue + p.Size * p.ValueStride; - if (MinValue > MaxValue) - throw std::invalid_argument("stride is generating an overflow"); + auto v = unique_values(p.Size + p.Slack, p.FirstValue, p.ValueStride); } virtual void TearDown() { #ifdef VXSORT_STATS @@ -155,11 +159,12 @@ struct SortWithStrideFixture : public testing::TestWithParam> { }; template -struct PrintSizeAndStride { - std::string operator()(const testing::TestParamInfo>& info) const { - return std::to_string(info.param.ValueStride); +struct PrintSortTestParams { + std::string operator()(const testing::TestParamInfo>& info) const { + return std::to_string(info.param.Size + info.param.Slack); } }; + } #endif // VXSORT_SORT_FIXTURES_H From bbf3ca93597fbead49e5f117d4f3a7f6ee0da7ec Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:42:43 +0300 Subject: [PATCH 14/21] tests: rewrite fullsort tests, again - reduce code-bloat in tests - and chance of manual typeing errors - make slack computed from the type-system (e.g. up to one vector worth of slack) - introduce specific translation units for the i/u/f complilation+testing speed hack while keeping all of the logic in a templated header - still only uses one pattern (unique values) for now --- bench/fullsort/BM_fullsort.vxsort.h | 14 +- bench/util.cpp | 8 +- bench/util.h | 42 +++++- tests/CMakeLists.txt | 28 ++-- tests/fullsort/fullsort.avx2.cpp | 134 ----------------- tests/fullsort/fullsort.avx2.f.cpp | 23 +++ tests/fullsort/fullsort.avx2.i.cpp | 24 +++ tests/fullsort/fullsort.avx2.u.cpp | 24 +++ tests/fullsort/fullsort.avx512.cpp | 141 ------------------ tests/fullsort/fullsort.avx512.f.cpp | 23 +++ tests/fullsort/fullsort.avx512.i.cpp | 24 +++ tests/fullsort/fullsort.avx512.u.cpp | 24 +++ tests/fullsort/fullsort_test.h | 102 ++++++++++++- tests/gtest_main.cpp | 51 ++++--- tests/mini_tests/masked_load_store.avx2.cpp | 6 +- tests/mini_tests/masked_load_store.avx512.cpp | 6 +- tests/mini_tests/pack_machine.avx2.cpp | 6 +- tests/mini_tests/pack_machine.avx512.cpp | 6 +- tests/mini_tests/partition_machine.avx2.cpp | 6 +- tests/mini_tests/partition_machine.avx512.cpp | 6 +- tests/sort_fixtures.h | 12 -- tests/util.h | 51 ++++++- 22 files changed, 405 insertions(+), 356 deletions(-) delete mode 100644 tests/fullsort/fullsort.avx2.cpp create mode 100644 tests/fullsort/fullsort.avx2.f.cpp create mode 100644 tests/fullsort/fullsort.avx2.i.cpp create mode 100644 tests/fullsort/fullsort.avx2.u.cpp delete mode 100644 tests/fullsort/fullsort.avx512.cpp create mode 100644 tests/fullsort/fullsort.avx512.f.cpp create mode 100644 tests/fullsort/fullsort.avx512.i.cpp create mode 100644 tests/fullsort/fullsort.avx512.u.cpp diff --git a/bench/fullsort/BM_fullsort.vxsort.h b/bench/fullsort/BM_fullsort.vxsort.h index fed3bc6..f4cc127 100644 --- a/bench/fullsort/BM_fullsort.vxsort.h +++ b/bench/fullsort/BM_fullsort.vxsort.h @@ -10,10 +10,6 @@ #include "../bench_isa.h" #include "../util.h" -#ifndef VXSORT_COMPILER_MSVC -#include -#endif - #include #include "fullsort_params.h" @@ -187,13 +183,9 @@ void register_type(i64 s, SortPattern p) { if constexpr (U >= 2) { register_type(s, p); } -#ifdef VXSORT_COMPILER_MSVC - auto realname = typeid(T).name(); -#else - auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr); -#endif - auto bench_name = fmt::format("BM_vxsort_pattern<{}, {}, {}>/{}/{}", realname, U, s, - magic_enum::enum_name(M), magic_enum::enum_name(p)); + auto *bench_type = get_canonical_typename(); + auto bench_name = fmt::format("BM_vxsort_pattern<{}, {}, {}>/{}/{}", bench_type, U, s, + magic_enum::enum_name(M), magic_enum::enum_name(p)); ::benchmark::RegisterBenchmark(bench_name.c_str(), BM_vxsort_pattern, s, p) ->Unit(kMillisecond) ->ThreadRange(1, processor_count); diff --git a/bench/util.cpp b/bench/util.cpp index 9f02f4a..ef91bb2 100644 --- a/bench/util.cpp +++ b/bench/util.cpp @@ -1,15 +1,14 @@ #include +#include +#include + #include "util.h" #include #include -#include - #include -#include -#include namespace vxsort_bench { using namespace vxsort::types; @@ -200,5 +199,4 @@ void process_perf_counters(UserCounters &counters, i64 num_elements) { counters.erase(k); } } - } diff --git a/bench/util.h b/bench/util.h index 0ece72a..75a9ee2 100644 --- a/bench/util.h +++ b/bench/util.h @@ -3,12 +3,17 @@ #include +#include + #include #include #include #include +#include +#ifndef VXSORT_COMPILER_MSVC +#include +#endif -#include #include "stolen-cycleclock.h" @@ -130,6 +135,41 @@ std::vector push_middle(usize size, T start, T stride) { return v; } +template +const char *get_canonical_typename() { +#ifdef VXSORT_COMPILER_MSVC + auto realname = typeid(T).name(); +#else + auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr); +#endif + + if (realname == nullptr) { + return "unknown"; + } else if (std::strcmp(realname, "long") == 0) + return "i64"; + else if (std::strcmp(realname, "unsigned long") == 0) + return "u64"; + else if (std::strcmp(realname, "int") == 0) + return "i32"; + else if (std::strcmp(realname, "unsigned int") == 0) + return "u32"; + else if (std::strcmp(realname, "short") == 0) + return "i16"; + else if (std::strcmp(realname, "unsigned short") == 0) + return "u16"; + else if (std::strcmp(realname, "char") == 0) + return "i8"; + else if (std::strcmp(realname, "unsigned char") == 0) + return "u8"; + else if (std::strcmp(realname, "float") == 0) + return "f32"; + else if (std::strcmp(realname, "double") == 0) + return "f64"; + else + return realname; +} + + } #endif //VXSORT_BENCH_UTIL_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d392850..224872f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,6 +10,12 @@ set(test_HEADERS mini_tests/masked_load_store_test.h test_isa.h) +list(APPEND sort_types + i + u + f +) + list(APPEND i_sort_types i16 i32 @@ -27,12 +33,6 @@ list(APPEND f_sort_types f64 ) -list(APPEND sort_types - i - u - f -) - list(APPEND x86_isas avx2 avx512 @@ -47,7 +47,7 @@ if (${PROCESSOR_IS_X86}) set(test_avx2_SOURCES ${test_SOURCES}) list(APPEND test_avx2_SOURCES smallsort/smallsort.avx2.cpp - fullsort/fullsort.avx2.cpp + fullsort/fullsort.avx2.i.cpp mini_tests/masked_load_store.avx2.cpp mini_tests/partition_machine.avx2.cpp mini_tests/pack_machine.avx2.cpp @@ -56,7 +56,7 @@ if (${PROCESSOR_IS_X86}) set(test_avx512_SOURCES ${test_SOURCES}) list(APPEND test_avx512_SOURCES smallsort/smallsort.avx512.cpp - fullsort/fullsort.avx512.cpp + fullsort/fullsort.avx512.i.cpp mini_tests/masked_load_store.avx512.cpp mini_tests/partition_machine.avx512.cpp mini_tests/pack_machine.avx512.cpp @@ -67,15 +67,23 @@ if (${PROCESSOR_IS_X86}) foreach(v ${x86_isas}) foreach(tf ${sort_types}) string(TOUPPER ${v} vu) - add_executable(${TARGET_NAME}_${v}_${tf} ${test_${v}_SOURCES} ${test_HEADERS}) + + add_executable(${TARGET_NAME}_${v}_${tf} ${test_SOURCES} ${test_HEADERS} + smallsort/smallsort.${v}.cpp + fullsort/fullsort.${v}.${tf}.cpp + mini_tests/masked_load_store.${v}.cpp + mini_tests/partition_machine.${v}.cpp + mini_tests/pack_machine.${v}.cpp) foreach(t ${${tf}_sort_types}) + string(TOUPPER ${tf} tfu) string(TOUPPER ${t} tu) - target_compile_definitions(${TARGET_NAME}_${v}_${tf} PRIVATE VXSORT_TEST_${vu}_${tu}) + target_compile_definitions(${TARGET_NAME}_${v}_${tf} PRIVATE VXSORT_TEST_${vu}_${tu} VXSORT_TEST_${vu}_${tfu}) endforeach () target_link_libraries(${TARGET_NAME}_${v}_${tf} ${CMAKE_PROJECT_NAME}_lib + magic_enum::magic_enum Backward::Backward GTest::gtest ) diff --git a/tests/fullsort/fullsort.avx2.cpp b/tests/fullsort/fullsort.avx2.cpp deleted file mode 100644 index 2322bb2..0000000 --- a/tests/fullsort/fullsort.avx2.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "vxsort_targets_enable_avx2.h" - -#include "gtest/gtest.h" - -#include -#include "fullsort_test.h" -#include "../sort_fixtures.h" - -namespace vxsort_tests { -using namespace vxsort::types; -using testing::Types; - -using VM = vxsort::vector_machine; -using namespace vxsort; - -#ifdef VXSORT_TEST_AVX2_I16 -struct VxSortAVX2_i16 : public ParametrizedSortFixture {}; -auto vxsort_i16_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 10000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i16, vxsort_i16_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_I32 -struct VxSortAVX2_i32 : public ParametrizedSortFixture {}; -auto vxsort_i32_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i32, vxsort_i32_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_I64 -struct VxSortAVX2_i64 : public ParametrizedSortFixture {}; -auto vxsort_i64_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 8, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_i64, vxsort_i64_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_U16 -struct VxSortAVX2_u16 : public ParametrizedSortFixture {}; -auto vxsort_u16_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u16, vxsort_u16_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_U32 -struct VxSortAVX2_u32 : public ParametrizedSortFixture {}; -auto vxsort_u32_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u32, vxsort_u32_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_U64 -struct VxSortAVX2_u64 : public ParametrizedSortFixture {}; -auto vxsort_u64_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 8, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_u64, vxsort_u64_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_F32 -struct VxSortAVX2_f32 : public ParametrizedSortFixture {}; -auto vxsort_f32_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 1234.5f, 0.1f)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_f32, vxsort_f32_params_avx2, PrintSortTestParams()); -#endif -#ifdef VXSORT_TEST_AVX2_F64 -struct VxSortAVX2_f64 : public ParametrizedSortFixture {}; -auto vxsort_f64_params_avx2 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 8, 1234.5, 0.1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX2_f64, vxsort_f64_params_avx2, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_I16 -TEST_P(VxSortAVX2_i16, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_i16, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_i16, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_i16, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_i16, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_I32 -TEST_P(VxSortAVX2_i32, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_i32, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_i32, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_i32, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_i32, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_U16 -TEST_P(VxSortAVX2_u16, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_u16, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_u16, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_u16, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_u16, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_U32 -TEST_P(VxSortAVX2_u32, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_u32, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_u32, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_u32, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_u32, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_F32 -TEST_P(VxSortAVX2_f32, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_f32, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_f32, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_f32, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_f32, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_I64 -TEST_P(VxSortAVX2_i64, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_i64, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_i64, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_i64, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_i64, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_U64 -TEST_P(VxSortAVX2_u64, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_u64, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_u64, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_u64, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_u64, VxSortAVX2_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_F64 -TEST_P(VxSortAVX2_f64, VxSortAVX2_1) { vxsort_test(V); } -TEST_P(VxSortAVX2_f64, VxSortAVX2_2) { vxsort_test(V); } -TEST_P(VxSortAVX2_f64, VxSortAVX2_4) { vxsort_test(V); } -TEST_P(VxSortAVX2_f64, VxSortAVX2_8) { vxsort_test(V); } -TEST_P(VxSortAVX2_f64, VxSortAVX2_12) { vxsort_test(V); } -#endif - -/* -struct VxSortWithStridesAndHintsAVX2_i64 : public SortWithStrideFixture {}; -auto vxsort_i64_stride_params_avx2 = ValuesIn(SizeAndStride::generate(1000000, 0x8L, 0x4000000L, 0x80000000L)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortWithStridesAndHintsAVX2_i64, vxsort_i64_stride_params_avx2, PrintSizeAndStride()); - -TEST_P(VxSortWithStridesAndHintsAVX2_i64, VxSortStridesAndHintsAVX2_1) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX2_i64, VxSortStridesAndHintsAVX2_2) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX2_i64, VxSortStridesAndHintsAVX2_4) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX2_i64, VxSortStridesAndHintsAVX2_8) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX2_i64, VxSortStridesAndHintsAVX2_12) { vxsort_hinted_test(V, MinValue, MaxValue); } -*/ -} - -#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx2.f.cpp b/tests/fullsort/fullsort.avx2.f.cpp new file mode 100644 index 0000000..efb0fab --- /dev/null +++ b/tests/fullsort/fullsort.avx2.f.cpp @@ -0,0 +1,23 @@ +#include "vxsort_targets_enable_avx2.h" + +#include "gtest/gtest.h" + +#include +#include "fullsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using testing::Types; + +using VM = vxsort::vector_machine; +using namespace vxsort; + +void register_fullsort_avx2_f_tests() { + register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); + register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); +} + +} + + +#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx2.i.cpp b/tests/fullsort/fullsort.avx2.i.cpp new file mode 100644 index 0000000..6c4efd1 --- /dev/null +++ b/tests/fullsort/fullsort.avx2.i.cpp @@ -0,0 +1,24 @@ +#include "vxsort_targets_enable_avx2.h" + +#include "gtest/gtest.h" + +#include +#include "fullsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using testing::Types; + +using VM = vxsort::vector_machine; +using namespace vxsort; + +void register_fullsort_avx2_i_tests() { + register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); +} + +} + + +#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx2.u.cpp b/tests/fullsort/fullsort.avx2.u.cpp new file mode 100644 index 0000000..2d57965 --- /dev/null +++ b/tests/fullsort/fullsort.avx2.u.cpp @@ -0,0 +1,24 @@ +#include "vxsort_targets_enable_avx2.h" + +#include "gtest/gtest.h" + +#include +#include "fullsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using testing::Types; + +using VM = vxsort::vector_machine; +using namespace vxsort; + +void register_fullsort_avx2_u_tests() { + register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); +} + +} + + +#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.cpp b/tests/fullsort/fullsort.avx512.cpp deleted file mode 100644 index db23ddb..0000000 --- a/tests/fullsort/fullsort.avx512.cpp +++ /dev/null @@ -1,141 +0,0 @@ -#include "vxsort_targets_enable_avx512.h" - -#include "gtest/gtest.h" - -#include -#include "fullsort_test.h" -#include "../sort_fixtures.h" - -namespace vxsort_tests { -using namespace vxsort::types; -using testing::Types; - -using VM = vxsort::vector_machine; -using namespace vxsort; - -#ifdef VXSORT_TEST_AVX512_I16 -struct VxSortAVX512_i16 : public ParametrizedSortFixture {}; -auto vxsort_i16_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 10000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i16, vxsort_i16_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_I32 -struct VxSortAVX512_i32 : public ParametrizedSortFixture {}; -auto vxsort_i32_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i32, vxsort_i32_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_I64 -struct VxSortAVX512_i64 : public ParametrizedSortFixture {}; -auto vxsort_i64_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_i64, vxsort_i64_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_U16 -struct VxSortAVX512_u16 : public ParametrizedSortFixture {}; -auto vxsort_u16_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 10000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u16, vxsort_u16_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_U32 -struct VxSortAVX512_u32 : public ParametrizedSortFixture {}; -auto vxsort_u32_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 32, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u32, vxsort_u32_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_U64 -struct VxSortAVX512_u64 : public ParametrizedSortFixture {}; -auto vxsort_u64_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 0x1000, 0x1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_u64, vxsort_u64_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_F32 -struct VxSortAVX512_f32 : public ParametrizedSortFixture {}; -auto vxsort_f32_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 32, 1234.5f, 0.1f)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_f32, vxsort_f32_params_avx512, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_F64 -struct VxSortAVX512_f64 : public ParametrizedSortFixture {}; -auto vxsort_f64_params_avx512 = ValuesIn(SortTestParams::gen_mult(SortPattern::unique_values, 10, 1000000, 10, 16, 1234.5, 0.1)); -INSTANTIATE_TEST_SUITE_P(VxSort, VxSortAVX512_f64, vxsort_f64_params_avx512, PrintSortTestParams()); -#endif - - -#ifdef VXSORT_TEST_AVX512_I16 -TEST_P(VxSortAVX512_i16, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_i16, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_i16, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_i16, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_i16, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_I32 -TEST_P(VxSortAVX512_i32, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_i32, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_i32, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_i32, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_i32, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_I64 -TEST_P(VxSortAVX512_i64, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_i64, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_i64, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_i64, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_i64, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_U16 -TEST_P(VxSortAVX512_u16, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_u16, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_u16, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_u16, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_u16, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_U32 -TEST_P(VxSortAVX512_u32, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_u32, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_u32, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_u32, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_u32, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_U64 -TEST_P(VxSortAVX512_u64, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_u64, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_u64, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_u64, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_u64, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_F32 -TEST_P(VxSortAVX512_f32, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_f32, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_f32, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_f32, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_f32, VxSortAVX512_12) { vxsort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_F64 -TEST_P(VxSortAVX512_f64, VxSortAVX512_1) { vxsort_test(V); } -TEST_P(VxSortAVX512_f64, VxSortAVX512_2) { vxsort_test(V); } -TEST_P(VxSortAVX512_f64, VxSortAVX512_4) { vxsort_test(V); } -TEST_P(VxSortAVX512_f64, VxSortAVX512_8) { vxsort_test(V); } -TEST_P(VxSortAVX512_f64, VxSortAVX512_12) { vxsort_test(V); } -#endif - -/*struct VxSortWithStridesAndHintsAVX512_i64 : public SortWithStrideFixture {}; -auto vxsort_i64_stride_params_avx512 = ValuesIn(SizeAndStride::generate(1000000, 0x8L, 0x1000000L, 0x80000000L)); -INSTANTIATE_TEST_SUITE_P(FullPackingSort, VxSortWithStridesAndHintsAVX512_i64, vxsort_i64_stride_params_avx512, PrintSizeAndStride()); - -TEST_P(VxSortWithStridesAndHintsAVX512_i64, VxSortStridesAndHintsAVX512_1) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX512_i64, VxSortStridesAndHintsAVX512_2) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX512_i64, VxSortStridesAndHintsAVX512_4) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX512_i64, VxSortStridesAndHintsAVX512_8) { vxsort_hinted_test(V, MinValue, MaxValue); } -TEST_P(VxSortWithStridesAndHintsAVX512_i64, VxSortStridesAndHintsAVX512_12) { vxsort_hinted_test(V, MinValue, MaxValue); } -*/ -} - -#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.f.cpp b/tests/fullsort/fullsort.avx512.f.cpp new file mode 100644 index 0000000..28619d7 --- /dev/null +++ b/tests/fullsort/fullsort.avx512.f.cpp @@ -0,0 +1,23 @@ +#include "vxsort_targets_enable_avx512.h" + +#include "gtest/gtest.h" + +#include +#include "fullsort_test.h" +#include "../sort_fixtures.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using testing::Types; + +using VM = vxsort::vector_machine; +using namespace vxsort; + +void register_fullsort_avx512_f_tests() { + register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); + register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); +} + +} + +#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.i.cpp b/tests/fullsort/fullsort.avx512.i.cpp new file mode 100644 index 0000000..68da451 --- /dev/null +++ b/tests/fullsort/fullsort.avx512.i.cpp @@ -0,0 +1,24 @@ +#include "vxsort_targets_enable_avx512.h" + +#include "gtest/gtest.h" + +#include +#include "fullsort_test.h" +#include "../sort_fixtures.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using testing::Types; + +using VM = vxsort::vector_machine; +using namespace vxsort; + +void register_fullsort_avx512_i_tests() { + register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); +} + +} + +#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.u.cpp b/tests/fullsort/fullsort.avx512.u.cpp new file mode 100644 index 0000000..667c510 --- /dev/null +++ b/tests/fullsort/fullsort.avx512.u.cpp @@ -0,0 +1,24 @@ +#include "vxsort_targets_enable_avx512.h" + +#include "gtest/gtest.h" + +#include +#include "fullsort_test.h" +#include "../sort_fixtures.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using testing::Types; + +using VM = vxsort::vector_machine; +using namespace vxsort; + +void register_fullsort_avx512_u_tests() { + register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); +} + +} + +#include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort_test.h b/tests/fullsort/fullsort_test.h index bab742b..51f883b 100644 --- a/tests/fullsort/fullsort_test.h +++ b/tests/fullsort/fullsort_test.h @@ -5,7 +5,10 @@ #include #include #include +#include +#include "../util.h" +#include "../sort_fixtures.h" #include "../test_isa.h" #include "vxsort.h" @@ -14,9 +17,11 @@ using namespace vxsort::types; using ::vxsort::vector_machine; template -void vxsort_test(std::vector& V) { +void vxsort_pattern_test(SortPattern, usize size, T first_value, T stride) { VXSORT_TEST_ISA(); + auto V = unique_values(size, first_value, stride); + auto v_copy = std::vector(V); auto begin = V.data(); auto end = V.data() + V.size() - 1; @@ -25,7 +30,6 @@ void vxsort_test(std::vector& V) { sorter.sort(begin, end); std::sort(v_copy.begin(), v_copy.end()); - usize size = v_copy.size(); for (usize i = 0; i < size; ++i) { if (v_copy[i] != V[i]) { GTEST_FAIL() << fmt::format("value at idx #{} {} != {}", i, v_copy[i], V[i]); @@ -51,7 +55,101 @@ void vxsort_hinted_test(std::vector& V, T min_value, T max_value) { GTEST_FAIL() << fmt::format("value at idx #{} {} != {}", i, v_copy[i], V[i]); } } +} + +static inline std::vector test_patterns() { + return { + SortPattern::unique_values, + SortPattern::shuffled_16_values, + SortPattern::all_equal, + }; +} + +template +struct SortTestParams2 { +public: + SortTestParams2(SortPattern pattern, usize size, i32 slack, T first_value, T value_stride) + : Pattern(pattern), Size(size), Slack(slack), FirstValue(first_value), ValueStride(value_stride) {} + SortPattern Pattern; + usize Size; + i32 Slack; + T FirstValue; + T ValueStride; +}; + +class VxSortFixture : public testing::Test { +public: + using FunctionType = std::function; + explicit VxSortFixture(FunctionType fn) : _fn(std::move(fn)) {} + + VxSortFixture(VxSortFixture const&) = delete; + + void TestBody() override { + _fn(); + } + +private: + FunctionType _fn; +}; + +template +void RegisterSingleTest(const char* test_suite_name, const char* test_name, + const char* type_param, const char* value_param, + const char* file, int line, + Lambda&& fn, Args&&... args) { + + testing::RegisterTest( + test_suite_name, test_name, type_param, value_param, + file, line, + [=]() mutable -> testing::Test* { return new VxSortFixture( + [=]() mutable { fn(args...); }); + }); +} + +template +void register_fullsort_benchmarks(usize start, usize stop, usize step, T first_value, T value_stride) { + if (step == 0) { + throw std::invalid_argument("step for range must be non-zero"); + } + + if constexpr (U >= 2) { + register_fullsort_benchmarks(start, stop, step, first_value, value_stride); + } + + using VM = vxsort::vxsort_machine_traits; + + // Test "slacks" are defined in terms of number of elements in the primitive size (T) + // up to the number of such elements contained in one vector type (VM::TV) + constexpr i32 slack = sizeof(typename VM::TV) / sizeof(T); + static_assert(slack > 1); + + std::vector> tests; + size_t i = start; + for (auto p : test_patterns()) { + while ((step > 0) ? (i <= stop) : (i > stop)) { + for (auto j : range(-slack, slack, 1)) { + if ((i64)i + j <= 0) + continue; + tests.push_back(SortTestParams2(p, i, j, first_value, value_stride)); + } + i *= step; + } + } + + for (auto p : tests) { + auto *test_type = get_canonical_typename(); + + auto test_size = p.Size + p.Slack; + auto test_name = fmt::format("vxsort_pattern_test<{}, {}, {}>/{}/{}", test_type, U, + magic_enum::enum_name(M), magic_enum::enum_name(p.Pattern), test_size); + + RegisterSingleTest( + "fullsort", test_name.c_str(), nullptr, + std::to_string(p.Size).c_str(), + __FILE__, __LINE__, + vxsort_pattern_test, p.Pattern, test_size, p.FirstValue, p.ValueStride); + } } } diff --git a/tests/gtest_main.cpp b/tests/gtest_main.cpp index 1be0dc2..fbf4430 100644 --- a/tests/gtest_main.cpp +++ b/tests/gtest_main.cpp @@ -3,36 +3,45 @@ #include "gtest/gtest.h" -#if defined(GTEST_OS_ESP8266) || defined(GTEST_OS_ESP32) -// Arduino-like platforms: program entry points are setup/loop instead of main. +namespace vxsort_tests { -#ifdef GTEST_OS_ESP8266 -extern "C" { -#endif -void setup() { testing::InitGoogleTest(); } + void register_fullsort_avx2_i_tests(); + void register_fullsort_avx512_i_tests(); + void register_fullsort_avx2_u_tests(); + void register_fullsort_avx2_f_tests(); + void register_fullsort_avx512_u_tests(); + void register_fullsort_avx512_f_tests(); -void loop() { RUN_ALL_TESTS(); } + void register_fullsort_test_matrix() { -#ifdef GTEST_OS_ESP8266 -} +#ifdef VXSORT_TEST_AVX2_I + register_fullsort_avx2_i_tests(); #endif - -#elif defined(GTEST_OS_QURT) -// QuRT: program entry point is main, but argc/argv are unusable. - -GTEST_API_ int main() { - printf("Running main() from %s\n", __FILE__); - testing::InitGoogleTest(); - return RUN_ALL_TESTS(); -} -#else -// Normal platforms: program entry point is main, argc/argv are initialized. +#ifdef VXSORT_TEST_AVX2_U + register_fullsort_avx2_u_tests(); +#endif +#ifdef VXSORT_TEST_AVX2_F + register_fullsort_avx2_f_tests(); +#endif +#ifdef VXSORT_TEST_AVX512_I + register_fullsort_avx512_i_tests(); +#endif +#ifdef VXSORT_TEST_AVX512_U + register_fullsort_avx512_u_tests(); +#endif +#ifdef VXSORT_TEST_AVX512_F + register_fullsort_avx512_f_tests(); +#endif + } +} // namespace vxsort_tests GTEST_API_ int main(int argc, char **argv) { backward::SignalHandling sh; testing::InitGoogleTest(&argc, argv); + + vxsort_tests::register_fullsort_test_matrix(); + return RUN_ALL_TESTS(); } -#endif \ No newline at end of file diff --git a/tests/mini_tests/masked_load_store.avx2.cpp b/tests/mini_tests/masked_load_store.avx2.cpp index 79720bc..f70d5d7 100644 --- a/tests/mini_tests/masked_load_store.avx2.cpp +++ b/tests/mini_tests/masked_load_store.avx2.cpp @@ -11,13 +11,13 @@ template using AVX2MaskedLoadStoreTest = PageWithLavaBoundariesFixture; using TestTypes = ::testing::Types< -#ifdef VXSORT_TEST_AVX2_I16 +#ifdef VXSORT_TEST_AVX2_I i16, i32, i64 #endif -#ifdef VXSORT_TEST_AVX2_U16 +#ifdef VXSORT_TEST_AVX2_U u16, u32, u64 #endif -#ifdef VXSORT_TEST_AVX2_F32 +#ifdef VXSORT_TEST_AVX2_F f32, f64 #endif >; diff --git a/tests/mini_tests/masked_load_store.avx512.cpp b/tests/mini_tests/masked_load_store.avx512.cpp index ef1f6b8..6e925ee 100644 --- a/tests/mini_tests/masked_load_store.avx512.cpp +++ b/tests/mini_tests/masked_load_store.avx512.cpp @@ -11,13 +11,13 @@ template using AVX512MaskedLoadStoreTest = PageWithLavaBoundariesFixture; using TestTypes = ::testing::Types< -#ifdef VXSORT_TEST_AVX512_I16 +#ifdef VXSORT_TEST_AVX512_I i16, i32, i64 #endif -#ifdef VXSORT_TEST_AVX512_U16 +#ifdef VXSORT_TEST_AVX512_U u16, u32, u64 #endif -#ifdef VXSORT_TEST_AVX512_F32 +#ifdef VXSORT_TEST_AVX512_F f32, f64 #endif >; diff --git a/tests/mini_tests/pack_machine.avx2.cpp b/tests/mini_tests/pack_machine.avx2.cpp index 4f30946..fbdd1ad 100644 --- a/tests/mini_tests/pack_machine.avx2.cpp +++ b/tests/mini_tests/pack_machine.avx2.cpp @@ -14,13 +14,13 @@ template using PackMachineAVX2Test = PackMachineTest; using TestTypes = ::testing::Types< -#ifdef VXSORT_TEST_AVX2_I16 +#ifdef VXSORT_TEST_AVX2_I i16, i32, i64 #endif -#ifdef VXSORT_TEST_AVX2_U16 +#ifdef VXSORT_TEST_AVX2_U u16, u32, u64 #endif -#ifdef VXSORT_TEST_AVX2_F32 +#ifdef VXSORT_TEST_AVX2_F f32, f64 #endif >; diff --git a/tests/mini_tests/pack_machine.avx512.cpp b/tests/mini_tests/pack_machine.avx512.cpp index 75807ad..932408e 100644 --- a/tests/mini_tests/pack_machine.avx512.cpp +++ b/tests/mini_tests/pack_machine.avx512.cpp @@ -15,13 +15,13 @@ template using PackMachineAVX512Test = PackMachineTest; using TestTypes = ::testing::Types< -#ifdef VXSORT_TEST_AVX512_I16 +#ifdef VXSORT_TEST_AVX512_I i16, i32, i64 #endif -#ifdef VXSORT_TEST_AVX512_U16 +#ifdef VXSORT_TEST_AVX512_U u16, u32, u64 #endif -#ifdef VXSORT_TEST_AVX512_F32 +#ifdef VXSORT_TEST_AVX512_F f32, f64 #endif >; diff --git a/tests/mini_tests/partition_machine.avx2.cpp b/tests/mini_tests/partition_machine.avx2.cpp index 53a8581..e2e1ea8 100644 --- a/tests/mini_tests/partition_machine.avx2.cpp +++ b/tests/mini_tests/partition_machine.avx2.cpp @@ -13,13 +13,13 @@ template using PartitionMachineAVX2Test = PageWithLavaBoundariesFixture; using TestTypes = ::testing::Types< -#ifdef VXSORT_TEST_AVX2_I16 +#ifdef VXSORT_TEST_AVX2_I i16, i32, i64 #endif -#ifdef VXSORT_TEST_AVX2_U16 +#ifdef VXSORT_TEST_AVX2_U u16, u32, u64 #endif -#ifdef VXSORT_TEST_AVX2_F32 +#ifdef VXSORT_TEST_AVX2_F f32, f64 #endif >; diff --git a/tests/mini_tests/partition_machine.avx512.cpp b/tests/mini_tests/partition_machine.avx512.cpp index 138f4a2..cfeea44 100644 --- a/tests/mini_tests/partition_machine.avx512.cpp +++ b/tests/mini_tests/partition_machine.avx512.cpp @@ -13,13 +13,13 @@ template using PartitionMachineAVX512Test = PageWithLavaBoundariesFixture; using TestTypes = ::testing::Types< -#ifdef VXSORT_TEST_AVX512_I16 +#ifdef VXSORT_TEST_AVX512_I i16, i32, i64 #endif -#ifdef VXSORT_TEST_AVX512_U16 +#ifdef VXSORT_TEST_AVX512_U u16, u32, u64 #endif -#ifdef VXSORT_TEST_AVX512_F32 +#ifdef VXSORT_TEST_AVX512_F f32, f64 #endif >; diff --git a/tests/sort_fixtures.h b/tests/sort_fixtures.h index 5595e9c..c674a13 100644 --- a/tests/sort_fixtures.h +++ b/tests/sort_fixtures.h @@ -16,18 +16,6 @@ using namespace vxsort::types; using testing::ValuesIn; using testing::Types; - -enum class SortPattern { - unique_values, - shuffled_16_values, - all_equal, - ascending_int, - descending_int, - pipe_organ, - push_front, - push_middle -}; - /// @brief This sort fixture /// @tparam T /// @tparam AlignTo diff --git a/tests/util.h b/tests/util.h index a14e5d5..2edd183 100644 --- a/tests/util.h +++ b/tests/util.h @@ -5,12 +5,26 @@ #include #include #include - +#ifndef VXSORT_COMPILER_MSVC +#include +#endif +#include #include namespace vxsort_tests { using namespace vxsort::types; +enum class SortPattern { + unique_values, + shuffled_16_values, + all_equal, + ascending_int, + descending_int, + pipe_organ, + push_front, + push_middle +}; + const std::random_device::result_type global_bench_random_seed = 666; template @@ -120,6 +134,41 @@ std::vector push_middle(usize size, T start, T stride) { return v; } +template +const char *get_canonical_typename() { +#ifdef VXSORT_COMPILER_MSVC + auto realname = typeid(T).name(); +#else + auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr); +#endif + + if (realname == nullptr) { + return "unknown"; + } else if (std::strcmp(realname, "long") == 0) + return "i64"; + else if (std::strcmp(realname, "unsigned long") == 0) + return "u64"; + else if (std::strcmp(realname, "int") == 0) + return "i32"; + else if (std::strcmp(realname, "unsigned int") == 0) + return "u32"; + else if (std::strcmp(realname, "short") == 0) + return "i16"; + else if (std::strcmp(realname, "unsigned short") == 0) + return "u16"; + else if (std::strcmp(realname, "char") == 0) + return "i8"; + else if (std::strcmp(realname, "unsigned char") == 0) + return "u8"; + else if (std::strcmp(realname, "float") == 0) + return "f32"; + else if (std::strcmp(realname, "double") == 0) + return "f64"; + + else + return realname; +} + } #endif From 4b5166ef0292c3d29923356a2f9cbb027299b49a Mon Sep 17 00:00:00 2001 From: damageboy <125730+damageboy@users.noreply.github.com> Date: Mon, 2 Oct 2023 15:05:08 +0300 Subject: [PATCH 15/21] tests: rewrite smallsort tests, to reduce code-bloat and chances for manual errors - Same manual test registration mechanism taken from fullsort tests - For now, still, only unique values data-sets are generated - Test "sizes" are coded as KB and adjusted down to actual type/element count --- tests/CMakeLists.txt | 22 +--- tests/fullsort/fullsort.avx2.f.cpp | 9 +- tests/fullsort/fullsort.avx2.i.cpp | 11 +- tests/fullsort/fullsort.avx2.u.cpp | 11 +- tests/fullsort/fullsort.avx512.f.cpp | 9 +- tests/fullsort/fullsort.avx512.i.cpp | 11 +- tests/fullsort/fullsort.avx512.u.cpp | 11 +- tests/fullsort/fullsort_test.h | 97 ++++++---------- tests/gtest_main.cpp | 41 ++++--- tests/smallsort/smallsort.avx2.cpp | 133 --------------------- tests/smallsort/smallsort.avx2.f.cpp | 21 ++++ tests/smallsort/smallsort.avx2.i.cpp | 23 ++++ tests/smallsort/smallsort.avx2.u.cpp | 23 ++++ tests/smallsort/smallsort.avx512.cpp | 137 ---------------------- tests/smallsort/smallsort.avx512.f.cpp | 21 ++++ tests/smallsort/smallsort.avx512.i.cpp | 23 ++++ tests/smallsort/smallsort.avx512.u.cpp | 23 ++++ tests/smallsort/smallsort_test.h | 109 ++++++++++++++++-- tests/sort_fixtures.h | 153 ++++--------------------- tests/{util.h => test_vectors.h} | 2 +- 20 files changed, 339 insertions(+), 551 deletions(-) delete mode 100644 tests/smallsort/smallsort.avx2.cpp create mode 100644 tests/smallsort/smallsort.avx2.f.cpp create mode 100644 tests/smallsort/smallsort.avx2.i.cpp create mode 100644 tests/smallsort/smallsort.avx2.u.cpp delete mode 100644 tests/smallsort/smallsort.avx512.cpp create mode 100644 tests/smallsort/smallsort.avx512.f.cpp create mode 100644 tests/smallsort/smallsort.avx512.i.cpp create mode 100644 tests/smallsort/smallsort.avx512.u.cpp rename tests/{util.h => test_vectors.h} (99%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 224872f..bf9a661 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -44,32 +44,12 @@ list(APPEND test_SOURCES ) if (${PROCESSOR_IS_X86}) - set(test_avx2_SOURCES ${test_SOURCES}) - list(APPEND test_avx2_SOURCES - smallsort/smallsort.avx2.cpp - fullsort/fullsort.avx2.i.cpp - mini_tests/masked_load_store.avx2.cpp - mini_tests/partition_machine.avx2.cpp - mini_tests/pack_machine.avx2.cpp - ) - - set(test_avx512_SOURCES ${test_SOURCES}) - list(APPEND test_avx512_SOURCES - smallsort/smallsort.avx512.cpp - fullsort/fullsort.avx512.i.cpp - mini_tests/masked_load_store.avx512.cpp - mini_tests/partition_machine.avx512.cpp - mini_tests/pack_machine.avx512.cpp - ) - - - foreach(v ${x86_isas}) foreach(tf ${sort_types}) string(TOUPPER ${v} vu) add_executable(${TARGET_NAME}_${v}_${tf} ${test_SOURCES} ${test_HEADERS} - smallsort/smallsort.${v}.cpp + smallsort/smallsort.${v}.${tf}.cpp fullsort/fullsort.${v}.${tf}.cpp mini_tests/masked_load_store.${v}.cpp mini_tests/partition_machine.${v}.cpp diff --git a/tests/fullsort/fullsort.avx2.f.cpp b/tests/fullsort/fullsort.avx2.f.cpp index efb0fab..09fdd40 100644 --- a/tests/fullsort/fullsort.avx2.f.cpp +++ b/tests/fullsort/fullsort.avx2.f.cpp @@ -7,17 +7,12 @@ namespace vxsort_tests { using namespace vxsort::types; -using testing::Types; - using VM = vxsort::vector_machine; -using namespace vxsort; void register_fullsort_avx2_f_tests() { - register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); - register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); + register_fullsort_tests(10, 1000000, 10, 1234.5, 0.1); + register_fullsort_tests(10, 1000000, 10, 1234.5, 0.1); } - } - #include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx2.i.cpp b/tests/fullsort/fullsort.avx2.i.cpp index 6c4efd1..eabb14e 100644 --- a/tests/fullsort/fullsort.avx2.i.cpp +++ b/tests/fullsort/fullsort.avx2.i.cpp @@ -7,18 +7,13 @@ namespace vxsort_tests { using namespace vxsort::types; -using testing::Types; - using VM = vxsort::vector_machine; -using namespace vxsort; void register_fullsort_avx2_i_tests() { - register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 10000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); } - } - #include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx2.u.cpp b/tests/fullsort/fullsort.avx2.u.cpp index 2d57965..7481724 100644 --- a/tests/fullsort/fullsort.avx2.u.cpp +++ b/tests/fullsort/fullsort.avx2.u.cpp @@ -7,18 +7,13 @@ namespace vxsort_tests { using namespace vxsort::types; -using testing::Types; - using VM = vxsort::vector_machine; -using namespace vxsort; void register_fullsort_avx2_u_tests() { - register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 10000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); } - } - #include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.f.cpp b/tests/fullsort/fullsort.avx512.f.cpp index 28619d7..fab937e 100644 --- a/tests/fullsort/fullsort.avx512.f.cpp +++ b/tests/fullsort/fullsort.avx512.f.cpp @@ -4,20 +4,15 @@ #include #include "fullsort_test.h" -#include "../sort_fixtures.h" namespace vxsort_tests { using namespace vxsort::types; -using testing::Types; - using VM = vxsort::vector_machine; -using namespace vxsort; void register_fullsort_avx512_f_tests() { - register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); - register_fullsort_benchmarks(10, 1000000, 10, 1234.5, 0.1); + register_fullsort_tests(10, 1000000, 10, 1234.5, 0.1); + register_fullsort_tests(10, 1000000, 10, 1234.5, 0.1); } - } #include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.i.cpp b/tests/fullsort/fullsort.avx512.i.cpp index 68da451..b4725ac 100644 --- a/tests/fullsort/fullsort.avx512.i.cpp +++ b/tests/fullsort/fullsort.avx512.i.cpp @@ -4,21 +4,16 @@ #include #include "fullsort_test.h" -#include "../sort_fixtures.h" namespace vxsort_tests { using namespace vxsort::types; -using testing::Types; - using VM = vxsort::vector_machine; -using namespace vxsort; void register_fullsort_avx512_i_tests() { - register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 10000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); } - } #include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort.avx512.u.cpp b/tests/fullsort/fullsort.avx512.u.cpp index 667c510..5d400e9 100644 --- a/tests/fullsort/fullsort.avx512.u.cpp +++ b/tests/fullsort/fullsort.avx512.u.cpp @@ -4,21 +4,16 @@ #include #include "fullsort_test.h" -#include "../sort_fixtures.h" namespace vxsort_tests { using namespace vxsort::types; -using testing::Types; - using VM = vxsort::vector_machine; -using namespace vxsort; void register_fullsort_avx512_u_tests() { - register_fullsort_benchmarks(10, 10000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); - register_fullsort_benchmarks(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 10000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); + register_fullsort_tests(10, 1000000, 10, 0x1000, 0x1); } - } #include "vxsort_targets_disable.h" diff --git a/tests/fullsort/fullsort_test.h b/tests/fullsort/fullsort_test.h index 51f883b..939754c 100644 --- a/tests/fullsort/fullsort_test.h +++ b/tests/fullsort/fullsort_test.h @@ -7,7 +7,7 @@ #include #include -#include "../util.h" +#include "../test_vectors.h" #include "../sort_fixtures.h" #include "../test_isa.h" #include "vxsort.h" @@ -17,7 +17,7 @@ using namespace vxsort::types; using ::vxsort::vector_machine; template -void vxsort_pattern_test(SortPattern, usize size, T first_value, T stride) { +void vxsort_pattern_test(sort_pattern, usize size, T first_value, T stride) { VXSORT_TEST_ISA(); auto V = unique_values(size, first_value, stride); @@ -57,64 +57,55 @@ void vxsort_hinted_test(std::vector& V, T min_value, T max_value) { } } -static inline std::vector test_patterns() { +static inline std::vector fullsort_test_patterns() { return { - SortPattern::unique_values, - SortPattern::shuffled_16_values, - SortPattern::all_equal, + sort_pattern::unique_values, + //sort_pattern::shuffled_16_values, + //sort_pattern::all_equal, }; } template -struct SortTestParams2 { +struct fullsort_test_params { public: - SortTestParams2(SortPattern pattern, usize size, i32 slack, T first_value, T value_stride) - : Pattern(pattern), Size(size), Slack(slack), FirstValue(first_value), ValueStride(value_stride) {} - SortPattern Pattern; - usize Size; - i32 Slack; - T FirstValue; - T ValueStride; + fullsort_test_params(sort_pattern pattern, usize size, i32 slack, T first_value, T value_stride) + : pattern(pattern), size(size), slack(slack), first_value(first_value), stride(value_stride) {} + sort_pattern pattern; + usize size; + i32 slack; + T first_value; + T stride; }; -class VxSortFixture : public testing::Test { -public: - using FunctionType = std::function; - explicit VxSortFixture(FunctionType fn) : _fn(std::move(fn)) {} +template +std::vector> +gen_params(usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) +{ + auto patterns = fullsort_test_patterns(); - VxSortFixture(VxSortFixture const&) = delete; + using TestParams = fullsort_test_params; + std::vector tests; - void TestBody() override { - _fn(); + for (auto p : fullsort_test_patterns()) { + for (auto i : multiply_range(start, stop, step)) { + for (auto j : range(-slack, slack, 1)) { + if ((i64)i + j <= 0) + continue; + tests.push_back(fullsort_test_params(p, i, j, first_value, value_stride)); + } + } } - -private: - FunctionType _fn; -}; - -template -void RegisterSingleTest(const char* test_suite_name, const char* test_name, - const char* type_param, const char* value_param, - const char* file, int line, - Lambda&& fn, Args&&... args) { - - testing::RegisterTest( - test_suite_name, test_name, type_param, value_param, - file, line, - [=]() mutable -> testing::Test* { return new VxSortFixture( - [=]() mutable { fn(args...); }); - }); + return tests; } - template -void register_fullsort_benchmarks(usize start, usize stop, usize step, T first_value, T value_stride) { +void register_fullsort_tests(usize start, usize stop, usize step, T first_value, T value_stride) { if (step == 0) { throw std::invalid_argument("step for range must be non-zero"); } if constexpr (U >= 2) { - register_fullsort_benchmarks(start, stop, step, first_value, value_stride); + register_fullsort_tests(start, stop, step, first_value, value_stride); } using VM = vxsort::vxsort_machine_traits; @@ -124,34 +115,22 @@ void register_fullsort_benchmarks(usize start, usize stop, usize step, T first_v constexpr i32 slack = sizeof(typename VM::TV) / sizeof(T); static_assert(slack > 1); - std::vector> tests; - size_t i = start; - for (auto p : test_patterns()) { - while ((step > 0) ? (i <= stop) : (i > stop)) { - for (auto j : range(-slack, slack, 1)) { - if ((i64)i + j <= 0) - continue; - tests.push_back(SortTestParams2(p, i, j, first_value, value_stride)); - } - i *= step; - } - } + auto tests = gen_params(start, stop, step, slack, first_value, value_stride); for (auto p : tests) { auto *test_type = get_canonical_typename(); - auto test_size = p.Size + p.Slack; + auto test_size = p.size + p.slack; auto test_name = fmt::format("vxsort_pattern_test<{}, {}, {}>/{}/{}", test_type, U, - magic_enum::enum_name(M), magic_enum::enum_name(p.Pattern), test_size); + magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); - RegisterSingleTest( + RegisterSingleLambdaTest( "fullsort", test_name.c_str(), nullptr, - std::to_string(p.Size).c_str(), + std::to_string(test_size).c_str(), __FILE__, __LINE__, - vxsort_pattern_test, p.Pattern, test_size, p.FirstValue, p.ValueStride); + vxsort_pattern_test, p.pattern, test_size, p.first_value, p.stride); } } - } #endif // VXSORT_FULLSORT_TEST_H diff --git a/tests/gtest_main.cpp b/tests/gtest_main.cpp index fbf4430..414acb5 100644 --- a/tests/gtest_main.cpp +++ b/tests/gtest_main.cpp @@ -6,32 +6,45 @@ namespace vxsort_tests { - void register_fullsort_avx2_i_tests(); - void register_fullsort_avx512_i_tests(); - void register_fullsort_avx2_u_tests(); - void register_fullsort_avx2_f_tests(); - void register_fullsort_avx512_u_tests(); - void register_fullsort_avx512_f_tests(); - - void register_fullsort_test_matrix() { +void register_fullsort_avx2_i_tests(); +void register_fullsort_avx512_i_tests(); +void register_fullsort_avx2_u_tests(); +void register_fullsort_avx2_f_tests(); +void register_fullsort_avx512_u_tests(); +void register_fullsort_avx512_f_tests(); + +void register_smallsort_avx2_i_tests(); +void register_smallsort_avx512_i_tests(); +void register_smallsort_avx2_u_tests(); +void register_smallsort_avx2_f_tests(); +void register_smallsort_avx512_u_tests(); +void register_smallsort_avx512_f_tests(); + +void register_fullsort_test_matrix() { #ifdef VXSORT_TEST_AVX2_I - register_fullsort_avx2_i_tests(); + register_fullsort_avx2_i_tests(); + register_smallsort_avx2_i_tests(); #endif #ifdef VXSORT_TEST_AVX2_U - register_fullsort_avx2_u_tests(); + register_fullsort_avx2_u_tests(); + register_smallsort_avx2_u_tests(); #endif #ifdef VXSORT_TEST_AVX2_F - register_fullsort_avx2_f_tests(); + register_fullsort_avx2_f_tests(); + register_smallsort_avx2_f_tests(); #endif #ifdef VXSORT_TEST_AVX512_I - register_fullsort_avx512_i_tests(); + register_fullsort_avx512_i_tests(); + register_smallsort_avx512_i_tests(); #endif #ifdef VXSORT_TEST_AVX512_U - register_fullsort_avx512_u_tests(); + register_fullsort_avx512_u_tests(); + register_smallsort_avx512_u_tests(); #endif #ifdef VXSORT_TEST_AVX512_F - register_fullsort_avx512_f_tests(); + register_fullsort_avx512_f_tests(); + register_smallsort_avx512_f_tests(); #endif } } // namespace vxsort_tests diff --git a/tests/smallsort/smallsort.avx2.cpp b/tests/smallsort/smallsort.avx2.cpp deleted file mode 100644 index 34b7870..0000000 --- a/tests/smallsort/smallsort.avx2.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include "vxsort_targets_enable_avx2.h" - -#include "gtest/gtest.h" - -#include - -#include "smallsort_test.h" -#include "../sort_fixtures.h" - -namespace vxsort_tests { -using namespace vxsort::types; -using VM = vxsort::vector_machine; - -#ifdef VXSORT_TEST_AVX2_I16 -auto bitonic_machine_allvalues_avx2_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx2_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX2_i16 : public ParametrizedSortFixture {}; -struct BitonicAVX2_i16 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i16, bitonic_machine_allvalues_avx2_i16, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i16, bitonic_allvalues_avx2_i16, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_I32 -auto bitonic_machine_allvalues_avx2_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx2_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX2_i32: public ParametrizedSortFixture {}; -struct BitonicAVX2_i32 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i32, bitonic_machine_allvalues_avx2_i32, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i32, bitonic_allvalues_avx2_i32, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_I64 -auto bitonic_machine_allvalues_avx2_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 4, 16, 4, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx2_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX2_i64 : public ParametrizedSortFixture {}; -struct BitonicAVX2_i64 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_i64, bitonic_machine_allvalues_avx2_i64, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_i64, bitonic_allvalues_avx2_i64, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_U16 -auto bitonic_machine_allvalues_avx2_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx2_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX2_u16 : public ParametrizedSortFixture {}; -struct BitonicAVX2_u16 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u16, bitonic_machine_allvalues_avx2_u16, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u16, bitonic_allvalues_avx2_u16, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_U32 -auto bitonic_machine_allvalues_avx2_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx2_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX2_u32 : public ParametrizedSortFixture {}; -struct BitonicAVX2_u32 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u32, bitonic_machine_allvalues_avx2_u32, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u32, bitonic_allvalues_avx2_u32, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_U64 -auto bitonic_machine_allvalues_avx2_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 4, 16, 4, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx2_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX2_u64 : public ParametrizedSortFixture {}; -struct BitonicAVX2_u64 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_u64, bitonic_machine_allvalues_avx2_u64, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_u64, bitonic_allvalues_avx2_u64, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_F32 -auto bitonic_machine_allvalues_avx2_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 1234.5f, 0.1f)); -auto bitonic_allvalues_avx2_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 1234.5f, 0.1f)); -struct BitonicMachineAVX2_f32 : public ParametrizedSortFixture {}; -struct BitonicAVX2_f32 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_f32, bitonic_machine_allvalues_avx2_f32, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_f32, bitonic_allvalues_avx2_f32, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_F64 -auto bitonic_machine_allvalues_avx2_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 4, 16, 4, 0, 1234.5, 0.1)); -auto bitonic_allvalues_avx2_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 1234.5, 0.1)); -struct BitonicMachineAVX2_f64 : public ParametrizedSortFixture {}; -struct BitonicAVX2_f64 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX2_f64, bitonic_machine_allvalues_avx2_f64, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX2, BitonicAVX2_f64, bitonic_allvalues_avx2_f64, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX2_I16 -TEST_P(BitonicMachineAVX2_i16, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_i16, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_I32 -TEST_P(BitonicMachineAVX2_i32, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_i32, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_I64 -TEST_P(BitonicMachineAVX2_i64, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_i64, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif -#ifdef VXSORT_TEST_AVX2_U16 -TEST_P(BitonicMachineAVX2_u16, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_u16, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_U32 -TEST_P(BitonicMachineAVX2_u32, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_u32, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_U64 -TEST_P(BitonicMachineAVX2_u64, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_u64, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_F32 -TEST_P(BitonicMachineAVX2_f32, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_f32, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX2_F64 -TEST_P(BitonicMachineAVX2_f64, BitonicSortAVX2Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX2_f64, BitonicSortAVX2) { bitonic_sort_test(V); } -#endif - -//TEST_P(BitonicMachineAVX2_i32, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX2_u32, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX2_i64, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX2_u64, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX2_f32, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX2_f64, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } - -} -#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx2.f.cpp b/tests/smallsort/smallsort.avx2.f.cpp new file mode 100644 index 0000000..fae5af5 --- /dev/null +++ b/tests/smallsort/smallsort.avx2.f.cpp @@ -0,0 +1,21 @@ +#include "vxsort_targets_enable_avx2.h" + +#include "gtest/gtest.h" + +#include +#include "smallsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using VM = vxsort::vector_machine; + +void register_smallsort_avx2_f_tests() { + register_bitonic_tests(16*1024, 1234.5, 0.1); + register_bitonic_tests(16*1024, 1234.5, 0.1); + + register_bitonic_machine_tests(1234.5, 0.1); + register_bitonic_machine_tests(1234.5, 0.1); +} +} + +#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx2.i.cpp b/tests/smallsort/smallsort.avx2.i.cpp new file mode 100644 index 0000000..0cfd817 --- /dev/null +++ b/tests/smallsort/smallsort.avx2.i.cpp @@ -0,0 +1,23 @@ +#include "vxsort_targets_enable_avx2.h" + +#include "gtest/gtest.h" + +#include +#include "smallsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using VM = vxsort::vector_machine; + +void register_smallsort_avx2_i_tests() { + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); +} +} + +#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx2.u.cpp b/tests/smallsort/smallsort.avx2.u.cpp new file mode 100644 index 0000000..7dd651e --- /dev/null +++ b/tests/smallsort/smallsort.avx2.u.cpp @@ -0,0 +1,23 @@ +#include "vxsort_targets_enable_avx2.h" + +#include "gtest/gtest.h" + +#include +#include "smallsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using VM = vxsort::vector_machine; + +void register_smallsort_avx2_u_tests() { + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); +} +} + +#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx512.cpp b/tests/smallsort/smallsort.avx512.cpp deleted file mode 100644 index 9aa0648..0000000 --- a/tests/smallsort/smallsort.avx512.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include "vxsort_targets_enable_avx512.h" - -#include "gtest/gtest.h" - -#include - -#include "smallsort_test.h" -#include "../sort_fixtures.h" - -namespace vxsort_tests { -using namespace vxsort::types; -using testing::Types; - -using VM = vxsort::vector_machine; - -#ifdef VXSORT_TEST_AVX512_I16 -auto bitonic_machine_allvalues_avx512_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 32, 128, 32, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx512_i16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX512_i16 : public ParametrizedSortFixture {}; -struct BitonicAVX512_i16 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i16, bitonic_machine_allvalues_avx512_i16, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i16, bitonic_allvalues_avx512_i16, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_I32 -auto bitonic_machine_allvalues_avx512_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx512_i32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX512_i32 : public ParametrizedSortFixture {}; -struct BitonicAVX512_i32 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i32, bitonic_machine_allvalues_avx512_i32, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i32, bitonic_allvalues_avx512_i32, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_I64 -auto bitonic_machine_allvalues_avx512_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx512_i64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX512_i64 : public ParametrizedSortFixture {}; -struct BitonicAVX512_i64 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_i64, bitonic_machine_allvalues_avx512_i64, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_i64, bitonic_allvalues_avx512_i64, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_U16 -auto bitonic_machine_allvalues_avx512_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 32, 128, 32, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx512_u16 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 8192, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX512_u16 : public ParametrizedSortFixture {}; -struct BitonicAVX512_u16 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u16, bitonic_machine_allvalues_avx512_u16, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u16, bitonic_allvalues_avx512_u16, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_U32 -auto bitonic_machine_allvalues_avx512_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx512_u32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX512_u32 : public ParametrizedSortFixture {}; -struct BitonicAVX512_u32 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u32, bitonic_machine_allvalues_avx512_u32, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u32, bitonic_allvalues_avx512_u32, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_U64 -auto bitonic_machine_allvalues_avx512_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 0x1000, 0x1)); -auto bitonic_allvalues_avx512_u64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 0x1000, 0x1)); -struct BitonicMachineAVX512_u64 : public ParametrizedSortFixture {}; -struct BitonicAVX512_u64 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_u64, bitonic_machine_allvalues_avx512_u64, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_u64, bitonic_allvalues_avx512_u64, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_F32 -auto bitonic_machine_allvalues_avx512_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 16, 64, 16, 0, 1234.5f, 0.1f)); -auto bitonic_allvalues_avx512_f32 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 4096, 1, 0, 1234.5f, 0.1f)); -struct BitonicMachineAVX512_f32 : public ParametrizedSortFixture {}; -struct BitonicAVX512_f32 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX512, BitonicMachineAVX512_f32, bitonic_machine_allvalues_avx512_f32, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_f32, bitonic_allvalues_avx512_f32, PrintSortTestParams()); -#endif - -#ifdef VXSORT_TEST_AVX512_F64 -auto bitonic_machine_allvalues_avx512_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 8, 32, 8, 0, 1234.5, 0.1)); -auto bitonic_allvalues_avx512_f64 = ValuesIn(SortTestParams::gen_step(SortPattern::unique_values, 1, 2048, 1, 0, 1234.5, 0.1)); -struct BitonicMachineAVX512_f64 : public ParametrizedSortFixture {}; -struct BitonicAVX512_f64 : public ParametrizedSortFixture {}; -INSTANTIATE_TEST_SUITE_P(BitonicMachineAVX2, BitonicMachineAVX512_f64, bitonic_machine_allvalues_avx512_f64, PrintSortTestParams()); -INSTANTIATE_TEST_SUITE_P(BitonicAVX512, BitonicAVX512_f64, bitonic_allvalues_avx512_f64, PrintSortTestParams()); -#endif - - -#ifdef VXSORT_TEST_AVX512_I16 -TEST_P(BitonicMachineAVX512_i16, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_i16, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_I32 -TEST_P(BitonicMachineAVX512_i32, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_i32, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_I64 -TEST_P(BitonicMachineAVX512_i64, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_i64, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_U16 -TEST_P(BitonicMachineAVX512_u16, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_u16, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_U32 -TEST_P(BitonicMachineAVX512_u32, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_u32, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_U64 -TEST_P(BitonicMachineAVX512_u64, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_u64, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_F32 -TEST_P(BitonicMachineAVX512_f32, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_f32, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -#ifdef VXSORT_TEST_AVX512_F64 -TEST_P(BitonicMachineAVX512_f64, BitonicSortAVX512Asc) { bitonic_machine_sort_test(V); } -TEST_P(BitonicAVX512_f64, BitonicSortAVX512) { bitonic_sort_test(V); } -#endif - -//TEST_P(BitonicMachineAVX512_i32, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX512_u32, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX512_f32, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX512_i64, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX512_u64, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -//TEST_P(BitonicMachineAVX512_f64, BitonicSortAVX2Desc) { bitonic_machine_sort_test(V); } -} - -#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx512.f.cpp b/tests/smallsort/smallsort.avx512.f.cpp new file mode 100644 index 0000000..a920928 --- /dev/null +++ b/tests/smallsort/smallsort.avx512.f.cpp @@ -0,0 +1,21 @@ +#include "vxsort_targets_enable_avx512.h" + +#include "gtest/gtest.h" + +#include +#include "smallsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using VM = vxsort::vector_machine; + +void register_smallsort_avx512_f_tests() { + register_bitonic_tests(16*1024, 1234.5, 0.1); + register_bitonic_tests(16*1024, 1234.5, 0.1); + + register_bitonic_machine_tests(1234.5, 0.1); + register_bitonic_machine_tests(1234.5, 0.1); +} +} + +#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx512.i.cpp b/tests/smallsort/smallsort.avx512.i.cpp new file mode 100644 index 0000000..ee08ae8 --- /dev/null +++ b/tests/smallsort/smallsort.avx512.i.cpp @@ -0,0 +1,23 @@ +#include "vxsort_targets_enable_avx512.h" + +#include "gtest/gtest.h" + +#include +#include "smallsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using VM = vxsort::vector_machine; + +void register_smallsort_avx512_i_tests() { + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); +} +} + +#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort.avx512.u.cpp b/tests/smallsort/smallsort.avx512.u.cpp new file mode 100644 index 0000000..e94e369 --- /dev/null +++ b/tests/smallsort/smallsort.avx512.u.cpp @@ -0,0 +1,23 @@ +#include "vxsort_targets_enable_avx512.h" + +#include "gtest/gtest.h" + +#include +#include "smallsort_test.h" + +namespace vxsort_tests { +using namespace vxsort::types; +using VM = vxsort::vector_machine; + +void register_smallsort_avx512_u_tests() { + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + register_bitonic_tests(16*1024, 0x1000, 0x1); + + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); + register_bitonic_machine_tests(0x1000, 0x1); +} +} + +#include "vxsort_targets_disable.h" diff --git a/tests/smallsort/smallsort_test.h b/tests/smallsort/smallsort_test.h index 1225250..e1f20bf 100644 --- a/tests/smallsort/smallsort_test.h +++ b/tests/smallsort/smallsort_test.h @@ -2,6 +2,7 @@ #define VXSORT_SMALLSORT_TEST_H #include +#include #include "gtest/gtest.h" #include "../sort_fixtures.h" @@ -14,20 +15,18 @@ namespace vxsort_tests { using vxsort::vector_machine; -template -void bitonic_machine_sort_test(std::vector& V) { +template +void bitonic_machine_sort_pattern_test(sort_pattern pattern, usize size, T first_value, T stride) { VXSORT_TEST_ISA(); using BM = vxsort::smallsort::bitonic_machine; + auto V = unique_values(size, first_value, stride); + auto v_copy = std::vector(V); auto begin = V.data(); - auto size = V.size(); - if (ascending) - BM::sort_full_vectors_ascending(begin, size); - else - BM::sort_full_vectors_descending(begin, size); + BM::sort_full_vectors_ascending(begin, size); std::sort(v_copy.begin(), v_copy.end()); for (usize i = 0; i < size; ++i) { @@ -38,12 +37,13 @@ void bitonic_machine_sort_test(std::vector& V) { } template -void bitonic_sort_test(std::vector& V) { +void bitonic_sort_pattern_test(sort_pattern pattern, usize size, T first_value, T stride) { VXSORT_TEST_ISA(); + auto V = unique_values(size, first_value, stride); + auto v_copy = std::vector(V); auto begin = V.data(); - auto size = V.size(); vxsort::smallsort::bitonic::sort(begin, size); std::sort(v_copy.begin(), v_copy.end()); @@ -53,6 +53,97 @@ void bitonic_sort_test(std::vector& V) { } } } + +static inline std::vector smallsort_test_patterns() { + return { + sort_pattern::unique_values, + //sort_pattern::shuffled_16_values, + //sort_pattern::all_equal, + }; +} + +template +struct smallsort_test_params { +public: + smallsort_test_params(sort_pattern pattern, usize size, T first_value, T value_stride) + : pattern(pattern), size(size), first_value(first_value), stride(value_stride) {} + sort_pattern pattern; + usize size; + T first_value; + T stride; +}; + +template +std::vector> +param_range(usize start, usize stop, usize step, T first_value, T value_stride) { + + assert(step > 0); + + auto patterns = smallsort_test_patterns(); + + using TestParams = smallsort_test_params; + std::vector tests; + + for(const auto& p: smallsort_test_patterns()) { + for(usize i = start; i <= stop; i += step) { + if(static_cast(i) <= 0) + continue; + + tests.push_back(TestParams(p, i, first_value, value_stride)); + } + } + return tests; +} + +template +void register_bitonic_tests(usize test_size_bytes, T first_value, T value_stride) +{ + + auto stop = test_size_bytes / sizeof(T); + usize step = 1; + auto tests = param_range(1, stop, step, first_value, value_stride); + + for (auto p : tests) { + auto *test_type = get_canonical_typename(); + + auto test_size = p.size; + auto test_name = fmt::format("bitonic_sort_pattern_test<{}, {}>/{}/{}", test_type, + magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); + + RegisterSingleLambdaTest( + "smallsort", test_name.c_str(), nullptr, + std::to_string(test_size).c_str(), + __FILE__, __LINE__, + bitonic_sort_pattern_test, p.pattern, test_size, p.first_value, p.stride); + } +} + +template +void register_bitonic_machine_tests(T first_value, T value_stride) +{ + using VM = vxsort::vxsort_machine_traits; + + // We test bitonic_machine from 1 up to 4 vectors in single vector increments + auto stop = (sizeof(typename VM::TV) * 4) / sizeof(T); + usize step = sizeof(typename VM::TV) / sizeof(T); + assert(step > 0); + + auto tests = param_range(step, stop, step, first_value, value_stride); + + for (auto p : tests) { + auto *test_type = get_canonical_typename(); + + auto test_size = p.size; + auto test_name = fmt::format("bitonic_machine_sort_pattern_test<{}, {}>/{}/{}", test_type, + magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); + + RegisterSingleLambdaTest( + "smallsort", test_name.c_str(), nullptr, + std::to_string(test_size).c_str(), + __FILE__, __LINE__, + bitonic_machine_sort_pattern_test, p.pattern, test_size, p.first_value, p.stride); + } +} } #endif // VXSORT_SMALLSORT_TEST_H diff --git a/tests/sort_fixtures.h b/tests/sort_fixtures.h index c674a13..62bf869 100644 --- a/tests/sort_fixtures.h +++ b/tests/sort_fixtures.h @@ -3,7 +3,7 @@ #include "gtest/gtest.h" #include "stats/vxsort_stats.h" -#include "util.h" +#include "test_vectors.h" #include #include @@ -16,143 +16,34 @@ using namespace vxsort::types; using testing::ValuesIn; using testing::Types; -/// @brief This sort fixture -/// @tparam T -/// @tparam AlignTo -template -struct SortTestParams { +class VxSortLambdaFixture : public testing::Test { public: - SortPattern Pattern; - usize Size; - i32 Slack; - T FirstValue; - T ValueStride; + using FunctionType = std::function; + explicit VxSortLambdaFixture(FunctionType fn) : _fn(std::move(fn)) {} + VxSortLambdaFixture(VxSortLambdaFixture const&) = delete; - SortTestParams(SortPattern pattern, size_t size, int slack, T first_value, T value_stride) - : Pattern(pattern), Size(size), Slack(slack), FirstValue(first_value), ValueStride(value_stride) {} - - /** - * Generate sorting problems "descriptions" - * @param patterns - the sort patterns to test with - * @param start - start value for the size parameter - * @param stop - stop value for the size paraameter - * @param step - the step/multiplier for the size parameter - * @param slack - the slack parameter used to generate ranges of problem sized around a base value - * @param first_value - the smallest value in each test array - * @param value_stride - the minimal jump between array elements - * @return - */ - static std::vector gen_mult(std::vector patterns, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { - if (step == 0) { - throw std::invalid_argument("step for range must be non-zero"); - } - - std::vector result; - size_t i = start; - for (auto p : patterns) { - while ((step > 0) ? (i <= stop) : (i > stop)) { - for (auto j : range(-slack, slack, 1)) { - if ((i64)i + j <= 0) - continue; - result.push_back(SortTestParams(p, i, j, first_value, value_stride)); - } - i *= step; - } - } - return result; - } - - /** - * Generate sorting problems "descriptions" - * @param pattern - the sort pattern to test with - * @param start - start value for the size parameter - * @param stop - stop value for the size paraameter - * @param step - the step/multiplier for the size parameter - * @param slack - the slack parameter used to generate ranges of problem sized around a base value - * @param first_value - the smallest value in each test array - * @param value_stride - the minimal jump between array elements - * @return - */ - static auto gen_mult(SortPattern pattern, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { - return gen_mult(std::vector{pattern}, start, stop, step, slack, - first_value, value_stride); - } - - /** - * Generate sorting problems "descriptions" - * @param patterns - the sort patterns to test with - * @param start - start value for the size parameter - * @param stop - stop value for the size paraameter - * @param step - the step/multiplier for the size parameter - * @param slack - the slack parameter used to generate ranges of problem sized around a base value - * @param first_value - the smallest value in each test array - * @param value_stride - the minimal jump between array elements - * @return - */ - static std::vector gen_step(std::vector patterns, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { - if (step == 0) { - throw std::invalid_argument("step for range must be non-zero"); - } - - std::vector result; - size_t i = start; - for (auto p : patterns) { - while ((step > 0) ? (i <= stop) : (i > stop)) { - for (auto j : range(-slack, slack, 1)) { - if ((i64)i + j <= 0) - continue; - result.push_back(SortTestParams(p, i, j, first_value, value_stride)); - } - i += step; - } - } - return result; - } - - /** - * Generate sorting problems "descriptions" - * @param pattern - the sort pattern to test with - * @param start - start value for the size parameter - * @param stop - stop value for the size paraameter - * @param step - the step for the size parameter - * @param slack - the slack parameter used to generate ranges of problem sized around a base value - * @param first_value - the smallest value in each test array - * @param value_stride - the minimal jump between array elements - * @return - */ - static auto gen_step(SortPattern pattern, usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) { - return gen_step(std::vector{pattern}, start, stop, step, slack, - first_value, value_stride); + void TestBody() override { + _fn(); } -}; - -template -struct ParametrizedSortFixture : public testing::TestWithParam> { -protected: - std::vector V; -public: - virtual void SetUp() { - testing::TestWithParam>::SetUp(); - auto p = this->GetParam(); - auto v = unique_values(p.Size + p.Slack, p.FirstValue, p.ValueStride); - } - virtual void TearDown() { -#ifdef VXSORT_STATS - vxsort::print_all_stats(); - vxsort::reset_all_stats(); -#endif - } -}; - -template -struct PrintSortTestParams { - std::string operator()(const testing::TestParamInfo>& info) const { - return std::to_string(info.param.Size + info.param.Slack); - } +private: + FunctionType _fn; }; +template +void RegisterSingleLambdaTest(const char* test_suite_name, const char* test_name, + const char* type_param, const char* value_param, + const char* file, int line, + Lambda&& fn, Args&&... args) { + + testing::RegisterTest( + test_suite_name, test_name, type_param, value_param, + file, line, + [=]() mutable -> testing::Test* { return new VxSortLambdaFixture( + [=]() mutable { fn(args...); }); + }); +} } #endif // VXSORT_SORT_FIXTURES_H diff --git a/tests/util.h b/tests/test_vectors.h similarity index 99% rename from tests/util.h rename to tests/test_vectors.h index 2edd183..d9cfd1d 100644 --- a/tests/util.h +++ b/tests/test_vectors.h @@ -14,7 +14,7 @@ namespace vxsort_tests { using namespace vxsort::types; -enum class SortPattern { +enum class sort_pattern { unique_values, shuffled_16_values, all_equal, From 5226e06b8b7786d55721bf0d8c7466c0c19ceea5 Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Wed, 4 Oct 2023 18:37:18 +0300 Subject: [PATCH 16/21] tests: add support to actually generate the different patterns --- tests/smallsort/smallsort_test.h | 8 ++++---- tests/test_vectors.h | 31 +++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/tests/smallsort/smallsort_test.h b/tests/smallsort/smallsort_test.h index e1f20bf..afcff25 100644 --- a/tests/smallsort/smallsort_test.h +++ b/tests/smallsort/smallsort_test.h @@ -21,7 +21,7 @@ void bitonic_machine_sort_pattern_test(sort_pattern pattern, usize size, T first using BM = vxsort::smallsort::bitonic_machine; - auto V = unique_values(size, first_value, stride); + auto V = generate_values_by_pattern(pattern, size, first_value, stride); auto v_copy = std::vector(V); auto begin = V.data(); @@ -40,7 +40,7 @@ template void bitonic_sort_pattern_test(sort_pattern pattern, usize size, T first_value, T stride) { VXSORT_TEST_ISA(); - auto V = unique_values(size, first_value, stride); + auto V = generate_values_by_pattern(pattern, size, first_value, stride); auto v_copy = std::vector(V); auto begin = V.data(); @@ -57,8 +57,8 @@ void bitonic_sort_pattern_test(sort_pattern pattern, usize size, T first_value, static inline std::vector smallsort_test_patterns() { return { sort_pattern::unique_values, - //sort_pattern::shuffled_16_values, - //sort_pattern::all_equal, + sort_pattern::shuffled_16_values, + sort_pattern::all_equal, }; } diff --git a/tests/test_vectors.h b/tests/test_vectors.h index d9cfd1d..765ed89 100644 --- a/tests/test_vectors.h +++ b/tests/test_vectors.h @@ -81,9 +81,9 @@ std::vector shuffled_16_values(usize size, T start, T stride) { } template -std::vector all_equal(usize size, T start , T stride) { +std::vector all_equal(usize size, T start , T) { std::vector v(size); - for (i32 i = 0; i < size; ++i) + for (usize i = 0; i < size; ++i) v.push_back(start); return v; } @@ -169,6 +169,33 @@ const char *get_canonical_typename() { return realname; } +template +std::vector +generate_values_by_pattern(sort_pattern pattern, usize size, T first_value, T stride) +{ + switch (pattern) { + case sort_pattern::unique_values: + return unique_values(size, first_value, stride); + case sort_pattern::shuffled_16_values: + return shuffled_16_values(size, first_value, stride); + case sort_pattern::all_equal: + return all_equal(size, first_value, stride); + case sort_pattern::ascending_int: + return ascending_int(size, first_value, stride); + case sort_pattern::descending_int: + return descending_int(size, first_value, stride); + case sort_pattern::pipe_organ: + return pipe_organ(size, first_value, stride); + case sort_pattern::push_front: + return push_front(size, first_value, stride); + case sort_pattern::push_middle: + return push_middle(size, first_value, stride); + default: + throw std::invalid_argument("unknown sort pattern"); + } + +} + } #endif From 21749129b4e41539bc68a3889ca195e75e9efce9 Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Mon, 9 Oct 2023 17:37:57 +0300 Subject: [PATCH 17/21] Fix test vector generation where some generation function were using the std::vector c-tor with size and using push_back() instead of indexing into the new vector. --- bench/util.h | 23 +++++----- tests/fullsort/fullsort_test.h | 52 ++++++++++++--------- tests/smallsort/smallsort_test.h | 78 ++++++++++++++++---------------- tests/sort_fixtures.h | 39 ++++++++-------- tests/test_vectors.h | 21 +++++---- 5 files changed, 111 insertions(+), 102 deletions(-) diff --git a/bench/util.h b/bench/util.h index 75a9ee2..6f3dd69 100644 --- a/bench/util.h +++ b/bench/util.h @@ -75,17 +75,18 @@ template std::vector shuffled_16_values(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) - v.push_back(start + stride * (i % 16)); + v[i] = start + stride * (i % 16); + std::mt19937_64 rng(global_bench_random_seed); std::shuffle(v.begin(), v.end(), rng); return v; } template -std::vector all_equal(usize size, T start , T stride) { +std::vector all_equal(usize size, T start , T) { std::vector v(size); for (usize i = 0; i < size; ++i) - v.push_back(start); + v[i] = start; return v; } @@ -93,7 +94,7 @@ template std::vector ascending_int(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) - v.push_back(start + stride * i); + v[i] = start + stride * i; return v; } @@ -101,7 +102,7 @@ template std::vector descending_int(usize size, T start, T stride) { std::vector v(size); for (isize i = size - 1; i >= 0; --i) - v.push_back(start + stride * i); + v[i] = start + stride * i; return v; } @@ -109,9 +110,9 @@ template std::vector pipe_organ(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size/2; ++i) - v.push_back(start + stride * i); + v[i] = start + stride * i; for (usize i = size/2; i < size; ++i) - v.push_back(start + (size - i) * stride); + v[i] = start + (size - i) * stride; return v; } @@ -119,8 +120,8 @@ template std::vector push_front(usize size, T start, T stride) { std::vector v(size); for (usize i = 1; i < size; ++i) - v.push_back(start + stride * i); - v.push_back(start); + v[i-1] = start + stride * i; + v[size-1] = start; return v; } @@ -129,9 +130,9 @@ std::vector push_middle(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) { if (i != size/2) - v.push_back(start + stride * i); + v[i] = start + stride * i; } - v.push_back(start + stride * (size/2)); + v[size/2] = start + stride * (size/2); return v; } diff --git a/tests/fullsort/fullsort_test.h b/tests/fullsort/fullsort_test.h index 939754c..6878b98 100644 --- a/tests/fullsort/fullsort_test.h +++ b/tests/fullsort/fullsort_test.h @@ -1,15 +1,15 @@ #ifndef VXSORT_FULLSORT_TEST_H #define VXSORT_FULLSORT_TEST_H +#include #include #include -#include -#include #include +#include -#include "../test_vectors.h" #include "../sort_fixtures.h" #include "../test_isa.h" +#include "../test_vectors.h" #include "vxsort.h" namespace vxsort_tests { @@ -59,9 +59,9 @@ void vxsort_hinted_test(std::vector& V, T min_value, T max_value) { static inline std::vector fullsort_test_patterns() { return { - sort_pattern::unique_values, - //sort_pattern::shuffled_16_values, - //sort_pattern::all_equal, + sort_pattern::unique_values, + // sort_pattern::shuffled_16_values, + // sort_pattern::all_equal, }; } @@ -69,7 +69,11 @@ template struct fullsort_test_params { public: fullsort_test_params(sort_pattern pattern, usize size, i32 slack, T first_value, T value_stride) - : pattern(pattern), size(size), slack(slack), first_value(first_value), stride(value_stride) {} + : pattern(pattern), + size(size), + slack(slack), + first_value(first_value), + stride(value_stride) {} sort_pattern pattern; usize size; i32 slack; @@ -77,10 +81,13 @@ struct fullsort_test_params { T stride; }; -template -std::vector> -gen_params(usize start, usize stop, usize step, i32 slack, T first_value, T value_stride) -{ +template +std::vector> gen_params(usize start, + usize stop, + usize step, + i32 slack, + T first_value, + T value_stride) { auto patterns = fullsort_test_patterns(); using TestParams = fullsort_test_params; @@ -112,25 +119,26 @@ void register_fullsort_tests(usize start, usize stop, usize step, T first_value, // Test "slacks" are defined in terms of number of elements in the primitive size (T) // up to the number of such elements contained in one vector type (VM::TV) - constexpr i32 slack = sizeof(typename VM::TV) / sizeof(T); + constexpr i32 slack = sizeof(typename VM::TV) / sizeof(T); static_assert(slack > 1); auto tests = gen_params(start, stop, step, slack, first_value, value_stride); for (auto p : tests) { - auto *test_type = get_canonical_typename(); + auto* test_type = get_canonical_typename(); auto test_size = p.size + p.slack; - auto test_name = fmt::format("vxsort_pattern_test<{}, {}, {}>/{}/{}", test_type, U, - magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); - - RegisterSingleLambdaTest( - "fullsort", test_name.c_str(), nullptr, - std::to_string(test_size).c_str(), - __FILE__, __LINE__, - vxsort_pattern_test, p.pattern, test_size, p.first_value, p.stride); + auto test_name = + fmt::format("vxsort_pattern_test<{}, {}, {}>/{}/{}", test_type, U, + magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); + + register_single_test_lambda( + "fullsort", test_name.c_str(), nullptr, + std::to_string(test_size).c_str(), + __FILE__, __LINE__, + vxsort_pattern_test, p.pattern, test_size, p.first_value, p.stride); } } -} +} // namespace vxsort_tests #endif // VXSORT_FULLSORT_TEST_H diff --git a/tests/smallsort/smallsort_test.h b/tests/smallsort/smallsort_test.h index afcff25..d95f7f6 100644 --- a/tests/smallsort/smallsort_test.h +++ b/tests/smallsort/smallsort_test.h @@ -4,12 +4,12 @@ #include #include -#include "gtest/gtest.h" #include "../sort_fixtures.h" +#include "gtest/gtest.h" #include "../test_isa.h" -#include "smallsort/bitonic_sort.h" #include "fmt/format.h" +#include "smallsort/bitonic_sort.h" namespace vxsort_tests { @@ -56,9 +56,9 @@ void bitonic_sort_pattern_test(sort_pattern pattern, usize size, T first_value, static inline std::vector smallsort_test_patterns() { return { - sort_pattern::unique_values, - sort_pattern::shuffled_16_values, - sort_pattern::all_equal, + sort_pattern::unique_values, + sort_pattern::shuffled_16_values, + sort_pattern::all_equal, }; } @@ -66,17 +66,19 @@ template struct smallsort_test_params { public: smallsort_test_params(sort_pattern pattern, usize size, T first_value, T value_stride) - : pattern(pattern), size(size), first_value(first_value), stride(value_stride) {} + : pattern(pattern), size(size), first_value(first_value), stride(value_stride) {} sort_pattern pattern; usize size; T first_value; T stride; }; -template -std::vector> -param_range(usize start, usize stop, usize step, T first_value, T value_stride) { - +template +std::vector> param_range(usize start, + usize stop, + usize step, + T first_value, + T value_stride) { assert(step > 0); auto patterns = smallsort_test_patterns(); @@ -84,9 +86,9 @@ param_range(usize start, usize stop, usize step, T first_value, T value_stride) using TestParams = smallsort_test_params; std::vector tests; - for(const auto& p: smallsort_test_patterns()) { - for(usize i = start; i <= stop; i += step) { - if(static_cast(i) <= 0) + for (const auto& p : smallsort_test_patterns()) { + for (usize i = start; i <= stop; i += step) { + if (static_cast(i) <= 0) continue; tests.push_back(TestParams(p, i, first_value, value_stride)); @@ -96,54 +98,54 @@ param_range(usize start, usize stop, usize step, T first_value, T value_stride) } template -void register_bitonic_tests(usize test_size_bytes, T first_value, T value_stride) -{ - +void register_bitonic_tests(usize test_size_bytes, T first_value, T value_stride) { auto stop = test_size_bytes / sizeof(T); usize step = 1; auto tests = param_range(1, stop, step, first_value, value_stride); for (auto p : tests) { - auto *test_type = get_canonical_typename(); + auto* test_type = get_canonical_typename(); auto test_size = p.size; - auto test_name = fmt::format("bitonic_sort_pattern_test<{}, {}>/{}/{}", test_type, - magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); - - RegisterSingleLambdaTest( - "smallsort", test_name.c_str(), nullptr, - std::to_string(test_size).c_str(), - __FILE__, __LINE__, - bitonic_sort_pattern_test, p.pattern, test_size, p.first_value, p.stride); + auto test_name = + fmt::format("bitonic_sort_pattern_test<{}, {}>/{}/{}", test_type, + magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); + + register_single_test_lambda("smallsort", test_name.c_str(), nullptr, + std::to_string(test_size).c_str(), + __FILE__, __LINE__, + bitonic_sort_pattern_test, p.pattern, test_size, + p.first_value, p.stride); } } template -void register_bitonic_machine_tests(T first_value, T value_stride) -{ +void register_bitonic_machine_tests(T first_value, T value_stride) { using VM = vxsort::vxsort_machine_traits; // We test bitonic_machine from 1 up to 4 vectors in single vector increments - auto stop = (sizeof(typename VM::TV) * 4) / sizeof(T); + //auto stop = (sizeof(typename VM::TV) * 4) / sizeof(T); + auto stop = (sizeof(typename VM::TV) * 1) / sizeof(T); usize step = sizeof(typename VM::TV) / sizeof(T); assert(step > 0); auto tests = param_range(step, stop, step, first_value, value_stride); for (auto p : tests) { - auto *test_type = get_canonical_typename(); + auto* test_type = get_canonical_typename(); auto test_size = p.size; - auto test_name = fmt::format("bitonic_machine_sort_pattern_test<{}, {}>/{}/{}", test_type, - magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); - - RegisterSingleLambdaTest( - "smallsort", test_name.c_str(), nullptr, - std::to_string(test_size).c_str(), - __FILE__, __LINE__, - bitonic_machine_sort_pattern_test, p.pattern, test_size, p.first_value, p.stride); + auto test_name = + fmt::format("bitonic_machine_sort_pattern_test<{}, {}>/{}/{}", test_type, + magic_enum::enum_name(M), magic_enum::enum_name(p.pattern), test_size); + + register_single_test_lambda("smallsort", test_name.c_str(), nullptr, + std::to_string(test_size).c_str(), + __FILE__, __LINE__, + bitonic_machine_sort_pattern_test, p.pattern, test_size, + p.first_value, p.stride); } } -} +} // namespace vxsort_tests #endif // VXSORT_SMALLSORT_TEST_H diff --git a/tests/sort_fixtures.h b/tests/sort_fixtures.h index 62bf869..26cdbd7 100644 --- a/tests/sort_fixtures.h +++ b/tests/sort_fixtures.h @@ -5,45 +5,42 @@ #include "stats/vxsort_stats.h" #include "test_vectors.h" -#include +#include #include +#include #include #include -#include namespace vxsort_tests { using namespace vxsort::types; -using testing::ValuesIn; -using testing::Types; class VxSortLambdaFixture : public testing::Test { -public: + public: using FunctionType = std::function; explicit VxSortLambdaFixture(FunctionType fn) : _fn(std::move(fn)) {} VxSortLambdaFixture(VxSortLambdaFixture const&) = delete; - void TestBody() override { - _fn(); - } + void TestBody() override { _fn(); } -private: + private: FunctionType _fn; }; template -void RegisterSingleLambdaTest(const char* test_suite_name, const char* test_name, - const char* type_param, const char* value_param, - const char* file, int line, - Lambda&& fn, Args&&... args) { - - testing::RegisterTest( - test_suite_name, test_name, type_param, value_param, - file, line, - [=]() mutable -> testing::Test* { return new VxSortLambdaFixture( - [=]() mutable { fn(args...); }); - }); -} +void register_single_test_lambda(const char* test_suite_name, + const char* test_name, + const char* type_param, + const char* value_param, + const char* file, + int line, + Lambda&& fn, + Args&&... args) { + testing::RegisterTest(test_suite_name, test_name, type_param, value_param, file, line, + [=]() mutable -> testing::Test* { + return new VxSortLambdaFixture([=]() mutable { fn(args...); }); + }); } +} // namespace vxsort_tests #endif // VXSORT_SORT_FIXTURES_H diff --git a/tests/test_vectors.h b/tests/test_vectors.h index 765ed89..95e2ec0 100644 --- a/tests/test_vectors.h +++ b/tests/test_vectors.h @@ -74,7 +74,8 @@ template std::vector shuffled_16_values(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) - v.push_back(start + stride * (i % 16)); + v[i] = start + stride * (i % 16); + std::mt19937_64 rng(global_bench_random_seed); std::shuffle(v.begin(), v.end(), rng); return v; @@ -84,7 +85,7 @@ template std::vector all_equal(usize size, T start , T) { std::vector v(size); for (usize i = 0; i < size; ++i) - v.push_back(start); + v[i] = start; return v; } @@ -92,7 +93,7 @@ template std::vector ascending_int(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) - v.push_back(start + stride * i); + v[i] = start + stride * i; return v; } @@ -100,7 +101,7 @@ template std::vector descending_int(usize size, T start, T stride) { std::vector v(size); for (isize i = size - 1; i >= 0; --i) - v.push_back(start + stride * i); + v[i] = start + stride * i; return v; } @@ -108,9 +109,9 @@ template std::vector pipe_organ(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size/2; ++i) - v.push_back(start + stride * i); + v[i] = start + stride * i; for (usize i = size/2; i < size; ++i) - v.push_back(start + (size - i) * stride); + v[i] = start + (size - i) * stride; return v; } @@ -118,8 +119,8 @@ template std::vector push_front(usize size, T start, T stride) { std::vector v(size); for (usize i = 1; i < size; ++i) - v.push_back(start + stride * i); - v.push_back(start); + v[i-1] = start + stride * i; + v[size-1] = start; return v; } @@ -128,9 +129,9 @@ std::vector push_middle(usize size, T start, T stride) { std::vector v(size); for (usize i = 0; i < size; ++i) { if (i != size/2) - v.push_back(start + stride * i); + v[i] = start + stride * i; } - v.push_back(start + stride * (size/2)); + v[size/2] = start + stride * (size/2); return v; } From e640523283367ee4d7392e3e61ee67d76edd47c1 Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Tue, 31 Oct 2023 12:37:03 +0200 Subject: [PATCH 18/21] tests: change default stack size for test-reporter and hope for the best --- .github/workflows/build-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 78d6dcd..bd2e5ba 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -187,6 +187,8 @@ jobs: - name: Test Report uses: dorny/test-reporter@v1 if: steps.check_cpu.outputs.has_avx2 == 1 || steps.check_cpu.outputs.has_avx512 == 1 + env: + NODE_OPTIONS: --max-old-space-size=4096 with: name: tests/${{ matrix.config.name}} path: build/tests/junit/*.xml From 941f58795f24b60af10721a0620febc7a7583c85 Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Tue, 31 Oct 2023 12:46:33 +0200 Subject: [PATCH 19/21] update fmt and googletest versions --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 87b3304..5237186 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,8 +203,8 @@ CPMAddPackage( CPMAddPackage( NAME googletest GITHUB_REPOSITORY google/googletest - GIT_TAG v1.13.0 - VERSION 1.13.0 + GIT_TAG v1.14.0 + VERSION 1.14.0 OPTIONS "BUILD_GMOCK OFF" "INSTALL_GTEST OFF" "gtest_force_shared_crt" OVERRIDE_FIND_PACKAGE ) @@ -214,7 +214,7 @@ CPMAddPackage( GIT_TAG main OPTIONS "BUILD_TESTING OFF" ) -CPMAddPackage("gh:fmtlib/fmt#10.0.0") +CPMAddPackage("gh:fmtlib/fmt#10.1.1") CPMAddPackage("gh:Neargye/magic_enum#v0.9.2") CPMAddPackage("gh:okdshin/PicoSHA2#master") From 26ad3608c750e0fb0c3586d3581ee1b910bff1fb Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Sun, 5 Nov 2023 10:28:02 +0200 Subject: [PATCH 20/21] workaround for https://github.com/actions/runner-images/issues/8659 --- .github/workflows/build-and-test.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index bd2e5ba..cc551af 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -50,6 +50,15 @@ jobs: with: arch: x64 + # Work around https://github.com/actions/runner-images/issues/8659 + - name: "Remove GCC 13 from runner image (workaround)" + shell: bash + if: startsWith(runner.os, 'Linux') + run: | + sudo rm -f /etc/apt/sources.list.d/ubuntu-toolchain-r-ubuntu-test-jammy.list + sudo apt-get update + sudo apt-get install -y --allow-downgrades libc6=2.35-0ubuntu3.4 libc6-dev=2.35-0ubuntu3.4 libstdc++6=12.3.0-1ubuntu1~22.04 libgcc-s1=12.3.0-1ubuntu1~22.04 + - name: Setup Ninja uses: ashutoshvarma/setup-ninja@master with: From c2786955973592e24aa9bfacdec56a095f0709a8 Mon Sep 17 00:00:00 2001 From: Dan Shechter Date: Sun, 5 Nov 2023 15:51:00 +0200 Subject: [PATCH 21/21] Another attempt to increase node stack size --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index cc551af..8935373 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -197,7 +197,7 @@ jobs: uses: dorny/test-reporter@v1 if: steps.check_cpu.outputs.has_avx2 == 1 || steps.check_cpu.outputs.has_avx512 == 1 env: - NODE_OPTIONS: --max-old-space-size=4096 + NODE_OPTIONS: --max-old-space-size=4096 --stack-size=2048 with: name: tests/${{ matrix.config.name}} path: build/tests/junit/*.xml