From 7d9b55ee25c339f56c10aad2eb24aa3b6aec387d Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Thu, 2 Oct 2025 03:10:38 +0900 Subject: [PATCH 1/7] add quickwit benchmark --- quickwit/.gitignore | 1 + quickwit/config/index-config.yaml | 38 ++++++++++ quickwit/config/quickwit.yaml | 3 + quickwit/count.sh | 4 ++ quickwit/data_size.sh | 4 ++ quickwit/drop_tables.sh | 7 ++ quickwit/install.sh | 18 +++++ quickwit/load_data.sh | 67 +++++++++++++++++ quickwit/main.sh | 78 ++++++++++++++++++++ quickwit/queries.json5 | 5 ++ quickwit/queries_formatted.json5 | 116 ++++++++++++++++++++++++++++++ quickwit/query_results.sh | 20 ++++++ quickwit/run_queries.sh | 39 ++++++++++ quickwit/start.sh | 17 +++++ quickwit/total_size.sh | 3 + 15 files changed, 420 insertions(+) create mode 100644 quickwit/.gitignore create mode 100644 quickwit/config/index-config.yaml create mode 100644 quickwit/config/quickwit.yaml create mode 100755 quickwit/count.sh create mode 100755 quickwit/data_size.sh create mode 100755 quickwit/drop_tables.sh create mode 100755 quickwit/install.sh create mode 100755 quickwit/load_data.sh create mode 100755 quickwit/main.sh create mode 100644 quickwit/queries.json5 create mode 100644 quickwit/queries_formatted.json5 create mode 100755 quickwit/query_results.sh create mode 100755 quickwit/run_queries.sh create mode 100755 quickwit/start.sh create mode 100755 quickwit/total_size.sh diff --git a/quickwit/.gitignore b/quickwit/.gitignore new file mode 100644 index 0000000..412b4f7 --- /dev/null +++ b/quickwit/.gitignore @@ -0,0 +1 @@ +qwdata/ diff --git a/quickwit/config/index-config.yaml b/quickwit/config/index-config.yaml new file mode 100644 index 0000000..18937f6 --- /dev/null +++ b/quickwit/config/index-config.yaml @@ -0,0 +1,38 @@ +version: 0.8 + +index_id: jsonbench + +doc_mapping: + mode: dynamic + dynamic_mapping: + indexed: true + stored: true + tokenizer: raw + expand_dots: true + field_mappings: + - name: did + type: text + tokenizer: raw + fast: true + - name: time_us + type: datetime + input_formats: [unix_timestamp] + output_format: unix_timestamp_micros + fast: true + fast_precision: microseconds + - name: kind + type: text + tokenizer: raw + fast: true + - name: commit + type: object + field_mappings: + - name: operation + type: text + tokenizer: raw + fast: true + - name: collection + type: text + tokenizer: raw + fast: true + timestamp_field: time_us diff --git a/quickwit/config/quickwit.yaml b/quickwit/config/quickwit.yaml new file mode 100644 index 0000000..93ac23d --- /dev/null +++ b/quickwit/config/quickwit.yaml @@ -0,0 +1,3 @@ +version: 0.8 + +listen_address: 0.0.0.0 diff --git a/quickwit/count.sh b/quickwit/count.sh new file mode 100755 index 0000000..d0baa19 --- /dev/null +++ b/quickwit/count.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +curl -s --fail http://localhost:7280/api/v1/indexes/jsonbench/describe \ + | jq ".num_published_docs" diff --git a/quickwit/data_size.sh b/quickwit/data_size.sh new file mode 100755 index 0000000..21ac0e9 --- /dev/null +++ b/quickwit/data_size.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +curl -s --fail http://localhost:7280/api/v1/indexes/jsonbench/describe \ + | jq ".size_published_splits" diff --git a/quickwit/drop_tables.sh b/quickwit/drop_tables.sh new file mode 100755 index 0000000..24be4cf --- /dev/null +++ b/quickwit/drop_tables.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "Stopping Quickwit" +pidof quickwit && kill $(pidof quickwit) + +echo "Dropping all data" +rm -rf ./qwdata diff --git a/quickwit/install.sh b/quickwit/install.sh new file mode 100755 index 0000000..8832518 --- /dev/null +++ b/quickwit/install.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# The latest official release of Quickwit is too old, many unsupported tantivy quries. +# They publish edge build as Docker images to Docker Hub. We can extract the binary from that images. +# +# It will be replaced by the official release when it is updated. +# +# RELEASE_VERSION=v0.9.0 +# wget -N "https://github.com/quickwit-oss/quickwit/releases/download/${RELEASE_VERSION}/quickwit-${RELEASE_VERSION}-x86_64-unknown-linux-gnu.tar.gz" +# tar xzf quickwit-${RELEASE_VERSION}-x86_64-unknown-linux-gnu.tar.gz +# mv quickwit-${RELEASE_VERSION}/quickwit ./ +# rm -rf quickwit-${RELEASE_VERSION} +# +# Using prebuilt binary here for testing +PREBUILT_NAME=quickwit-f6cb417-x86_64-unknown-linux-gnu +wget -N "https://github-actions-assets.cometkim.dev/prebuilt/$PREBUILT_NAME" +mv "$PREBUILT_NAME" ./quickwit +chmod +x ./quickwit diff --git a/quickwit/load_data.sh b/quickwit/load_data.sh new file mode 100755 index 0000000..da74c37 --- /dev/null +++ b/quickwit/load_data.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 4 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DATA_DIRECTORY="$1" +MAX_FILES="$2" +SUCCESS_LOG="$3" +ERROR_LOG="$4" + +# Validate arguments +[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } +[[ ! "$MAX_FILES" =~ ^[0-9]+$ ]] && { echo "Error: MAX_FILES must be a positive integer."; exit 1; } + +# Absolute path of Quickwit executable +QW_CMD="$PWD/quickwit" + +echo "Prepare clean index: jsonbench" +$QW_CMD index create --index-config ./config/index-config.yaml --overwrite --yes + +# Create a temporary directory for uncompressed files +TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX) +trap "rm -rf $TEMP_DIR" EXIT # Cleanup temp directory on script exit + +pushd $DATA_DIRECTORY +counter=0 +for file in $(ls *.json.gz | head -n $MAX_FILES); do + echo "Processing file: $file" + + # Uncompress the file into the TEMP_DIR + uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")" + gunzip -c "$file" > "$uncompressed_file" + + if [[ $? -ne 0 ]]; then + echo "Error: Failed to uncompress $file" >> "$ERROR_LOG" + continue + fi + + $QW_CMD tool local-ingest \ + --index jsonbench \ + --input-path "$uncompressed_file" + + first_attempt=$? + if [[ $first_attempt -eq 0 ]]; then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $file." >> "$SUCCESS_LOG" + else + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed for $file. Giving up." >> "$ERROR_LOG" + fi + + counter=$((counter + 1)) + if [[ $counter -ge $MAX_FILES ]]; then + break + fi +done +popd + +# See https://github.com/quickwit-oss/quickwit/issues/4869 +echo "Wating 60 secs for Quickwit to commit the data" +sleep 60 + +$QW_CMD tool gc --index jsonbench + +echo -e "\nLoaded $MAX_FILES data files from $DATA_DIRECTORY to Quickwit." diff --git a/quickwit/main.sh b/quickwit/main.sh new file mode 100755 index 0000000..673105b --- /dev/null +++ b/quickwit/main.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +DEFAULT_CHOICE=ask +DEFAULT_DATA_DIRECTORY=~/data/bluesky + +# Allow the user to optionally provide the scale factor ("choice") as an argument +CHOICE="${1:-$DEFAULT_CHOICE}" + +# Allow the user to optionally provide the data directory as an argument +DATA_DIRECTORY="${2:-$DEFAULT_DATA_DIRECTORY}" + +# Define success and error log files +SUCCESS_LOG="${3:-success.log}" +ERROR_LOG="${4:-error.log}" + +# Define prefix for output files +OUTPUT_PREFIX="${5:-_m6i.8xlarge}" + +# Check if the directory exists +if [[ ! -d "$DATA_DIRECTORY" ]]; then + echo "Error: Data directory '$DATA_DIRECTORY' does not exist." + exit 1 +fi + +if [ "$CHOICE" = "ask" ]; then + echo "Select the dataset size to benchmark:" + echo "1) 1m (default)" + echo "2) 10m" + echo "3) 100m" + echo "4) 1000m" + echo "5) all" + read -p "Enter the number corresponding to your choice: " CHOICE +fi + +export QW_CONFIG="$PWD/config/quickwit.yaml" +export QW_DATA_DIR="$PWD/qwdata" + +# ./install.sh + +benchmark() { + local size=$1 + # Check DATA_DIRECTORY contains the required number of files to run the benchmark + file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) + if (( file_count < size )); then + echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." + exit 1 + fi + + ./start.sh + ./load_data.sh "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" + ./total_size.sh | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size" + ./data_size.sh | tee "${OUTPUT_PREFIX}_bluesky_${size}m.data_size" + ./count.sh | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count" + #./query_results.sh | tee "${OUTPUT_PREFIX}_bluesky_${size}m.query_results" + ./run_queries.sh | tee "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime" + ./drop_tables.sh +} + +case $CHOICE in + 2) + benchmark 10 + ;; + 3) + benchmark 100 + ;; + 4) + benchmark 1000 + ;; + 5) + benchmark 1 + benchmark 10 + benchmark 100 + benchmark 1000 + ;; + *) + benchmark 1 + ;; +esac diff --git a/quickwit/queries.json5 b/quickwit/queries.json5 new file mode 100644 index 0000000..4623252 --- /dev/null +++ b/quickwit/queries.json5 @@ -0,0 +1,5 @@ +{"query":"*","max_hits":0,"aggs":{"events":{"terms":{"field":"commit.collection","order":{"_count":"desc"},"size": 1000}}}} +{"query":"kind:commit AND commit.operation:create","max_hits":0,"aggs":{"events":{"terms":{"field":"commit.collection","order":{"_count":"desc"},"size":1000},"aggs":{"users":{"cardinality":{"field":"did"}}}}}} +{"query":"kind:commit AND commit.operation:create AND commit.collection:IN [app.bsky.feed.post app.bsky.feed.repost app.bsky.feed.like]","max_hits":0,"aggs":{"events":{"terms":{"field":"commit.collection","order":{"_key":"asc"},"size":1000},"aggs":{"hour_of_day":{"date_histogram":{"field":"time_us","fixed_interval":"1h"}}}}}} +{"query":"kind:commit AND commit.operation:create AND commit.collection:app.bsky.feed.post","max_hits":0,"aggs":{"users":{"terms":{"field":"did","order":{"first_post":"asc"},"size":3},"aggs":{"first_post":{"min":{"field":"time_us"}}}}}} +{"query":"kind:commit AND commit.operation:create AND commit.collection:app.bsky.feed.post","max_hits":0,"aggs":{"users":{"terms":{"field":"did","order":{"activity_span.max":"desc"},"size":3},"aggs":{"activity_span":{"stats":{"field":"time_us"}}}}}} diff --git a/quickwit/queries_formatted.json5 b/quickwit/queries_formatted.json5 new file mode 100644 index 0000000..dc7a0a5 --- /dev/null +++ b/quickwit/queries_formatted.json5 @@ -0,0 +1,116 @@ +/** + * Q1 - Top event types + */ +{ + "query": "*", + "max_hits": 0, + "aggs": { + "events": { + "terms": { + "field": "commit.collection", + "order": { "_count": "desc" }, + "size": 1000 + } + } + } +} + +/** + * Q2 - Top event types together with unique users per event type + */ +{ + "query": "kind:commit AND commit.operation:create", + "max_hits": 0, + "aggs": { + "events": { + "terms": { + "field": "commit.collection", + "order": { "_count": "desc" }, + "size": 1000 + }, + "aggs": { + "users": { + "cardinality": { + "field": "did" + } + } + } + } + } +} + +/** + * Q3 - When do people use BlueSky + */ +{ + "query": "kind:commit AND commit.operation:create AND commit.collection:IN [app.bsky.feed.post app.bsky.feed.repost app.bsky.feed.like]", + "max_hits": 0, + "aggs": { + "events": { + "terms": { + "field": "commit.collection", + "order": { "_key": "asc" }, + "size": 1000 + }, + "aggs": { + "hour_of_day": { + "date_histogram": { + "field": "time_us", + "fixed_interval": "1h" + } + } + } + } + } +} + +/** + * Q4 - top 3 post veterans + */ +{ + "query": "kind:commit AND commit.operation:create AND commit.collection:app.bsky.feed.post", + "max_hits": 0, + "aggs": { + "users": { + "terms": { + "field": "did", + "order": { "first_post": "asc" }, + "size": 3 + }, + "aggs": { + "first_post_ts": { + "min": { + "field": "time_us" + } + } + } + } + } +} + +/** + * Q5 - top 3 users with longest activity + * + * Not completely supported. + * Tantivy need to support `bucket_script` aggregation to allow this query. + */ +{ + "query": "kind:commit AND commit.operation:create AND commit.collection:app.bsky.feed.post", + "max_hits": 0, + "aggs": { + "users": { + "terms": { + "field": "did", + "order": { "activity_span.max": "desc" }, + "size": 3 + }, + "aggs": { + "activity_span": { + "stats": { + "field": "time_us" + } + } + } + } + } +} diff --git a/quickwit/query_results.sh b/quickwit/query_results.sh new file mode 100755 index 0000000..c34bb72 --- /dev/null +++ b/quickwit/query_results.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +QUERY_NUM=1 + +cat queries.json5 | while read -r query; do + + # Print the query + echo "------------------------------------------------------------------------------------------------------------------------" + echo "Result for query Q$QUERY_NUM:" + echo + + curl -s --fail -X "POST" \ + "http://localhost:7280/api/v1/jsonbench/search" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -d "$query" | jq ".aggregations" + + # Increment the query number + QUERY_NUM=$((QUERY_NUM + 1)) +done; diff --git a/quickwit/run_queries.sh b/quickwit/run_queries.sh new file mode 100755 index 0000000..7e67426 --- /dev/null +++ b/quickwit/run_queries.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +TRIES=3 + +cat queries.json5 | while read -r query; do + + # Clear the Linux file system cache + echo "Clearing file system cache..." + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + echo "File system cache cleared." + + # Print the query + echo "Running query: $query" + + echo -n "[" + # Execute the query multiple times + for i in $(seq 1 $TRIES); do + response=$(curl -s --fail -X "POST" \ + "http://localhost:7280/api/v1/jsonbench/search" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -d "$query") + exit_code=$? + if [[ "$exit_code" == "0" ]]; then + elapsed_micros=$(echo "$response" | jq -r '.elapsed_time_micros // empty') + if [[ -n "$elapsed_micros" ]]; then + RES=$(awk "BEGIN {print $elapsed_micros / 1000000}" | tr ',' '.') + echo -n "${RES}" + else + echo -n "null" + fi + else + echo -n "null" + fi + [[ "$i" != $TRIES ]] && echo -n ", " + done; + echo "]" +done; diff --git a/quickwit/start.sh b/quickwit/start.sh new file mode 100755 index 0000000..15a1ae6 --- /dev/null +++ b/quickwit/start.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Do we run already? +pidof quickwit >/dev/null && exit 1 + +# Ensure data directory exists +mkdir -p ./qwdata + +echo "Starting Quickwit" +./quickwit run & + +while true +do + ./quickwit index list >/dev/null 2>&1 && break + sleep 1 +done +echo "Started Quickwit." diff --git a/quickwit/total_size.sh b/quickwit/total_size.sh new file mode 100755 index 0000000..6d87bfb --- /dev/null +++ b/quickwit/total_size.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +du -b ${QW_DATA_DIR:-qwdata}/indexes/jsonbench | cut -f1 From fa68e93e617973e3919503900da32956e91d05a6 Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Thu, 13 Nov 2025 23:47:02 +0900 Subject: [PATCH 2/7] fix load_data script for increasing ingestion speed --- quickwit/load_data.sh | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/quickwit/load_data.sh b/quickwit/load_data.sh index da74c37..9e72b4e 100755 --- a/quickwit/load_data.sh +++ b/quickwit/load_data.sh @@ -20,48 +20,24 @@ ERROR_LOG="$4" QW_CMD="$PWD/quickwit" echo "Prepare clean index: jsonbench" -$QW_CMD index create --index-config ./config/index-config.yaml --overwrite --yes - -# Create a temporary directory for uncompressed files -TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX) -trap "rm -rf $TEMP_DIR" EXIT # Cleanup temp directory on script exit +./quickwit index create --index-config ./config/index-config.yaml --overwrite --yes pushd $DATA_DIRECTORY counter=0 for file in $(ls *.json.gz | head -n $MAX_FILES); do - echo "Processing file: $file" - - # Uncompress the file into the TEMP_DIR - uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")" - gunzip -c "$file" > "$uncompressed_file" - - if [[ $? -ne 0 ]]; then - echo "Error: Failed to uncompress $file" >> "$ERROR_LOG" - continue - fi - - $QW_CMD tool local-ingest \ - --index jsonbench \ - --input-path "$uncompressed_file" - - first_attempt=$? - if [[ $first_attempt -eq 0 ]]; then - echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $file." >> "$SUCCESS_LOG" - else - echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed for $file. Giving up." >> "$ERROR_LOG" - fi + gunzip -c "$file" counter=$((counter + 1)) if [[ $counter -ge $MAX_FILES ]]; then break fi -done +done | $QW_CMD tool local-ingest --index jsonbench popd # See https://github.com/quickwit-oss/quickwit/issues/4869 -echo "Wating 60 secs for Quickwit to commit the data" +echo "Wait 1 min for Quickwit search become available" sleep 60 -$QW_CMD tool gc --index jsonbench +./quickwit tool gc --index jsonbench echo -e "\nLoaded $MAX_FILES data files from $DATA_DIRECTORY to Quickwit." From 7db2ce9a9562751bc469e2691f621b500fa807e1 Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Fri, 14 Nov 2025 03:44:24 +0900 Subject: [PATCH 3/7] increase memory limit for aggregations --- quickwit/config/quickwit.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/quickwit/config/quickwit.yaml b/quickwit/config/quickwit.yaml index 93ac23d..e3417a6 100644 --- a/quickwit/config/quickwit.yaml +++ b/quickwit/config/quickwit.yaml @@ -1,3 +1,6 @@ version: 0.8 listen_address: 0.0.0.0 + +searcher: + aggregation_memory_limit: 64G From 08b76c332cad915646bcc062d324d7611105353c Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Fri, 14 Nov 2025 03:55:01 +0900 Subject: [PATCH 4/7] increase request timeout for long running quries --- quickwit/config/quickwit.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/quickwit/config/quickwit.yaml b/quickwit/config/quickwit.yaml index e3417a6..609cbb4 100644 --- a/quickwit/config/quickwit.yaml +++ b/quickwit/config/quickwit.yaml @@ -4,3 +4,4 @@ listen_address: 0.0.0.0 searcher: aggregation_memory_limit: 64G + request_timeout_secs: 300 From 9a54de5ae770f5a27e680f151b28049413fa5f52 Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Fri, 14 Nov 2025 04:26:39 +0900 Subject: [PATCH 5/7] add initial results --- .../_m6i.8xlarge_bluesky_1m.query_results | 292 ++++++++++++++++++ .../results/m6i.8xlarge_bluesky_1000m.json | 21 ++ .../results/m6i.8xlarge_bluesky_100m.json | 21 ++ quickwit/results/m6i.8xlarge_bluesky_10m.json | 21 ++ quickwit/results/m6i.8xlarge_bluesky_1m.json | 21 ++ 5 files changed, 376 insertions(+) create mode 100644 quickwit/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results create mode 100644 quickwit/results/m6i.8xlarge_bluesky_1000m.json create mode 100644 quickwit/results/m6i.8xlarge_bluesky_100m.json create mode 100644 quickwit/results/m6i.8xlarge_bluesky_10m.json create mode 100644 quickwit/results/m6i.8xlarge_bluesky_1m.json diff --git a/quickwit/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results b/quickwit/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results new file mode 100644 index 0000000..e1d509d --- /dev/null +++ b/quickwit/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results @@ -0,0 +1,292 @@ +------------------------------------------------------------------------------------------------------------------------ +Result for query Q1: + +{ + "events": { + "buckets": [ + { + "key": "app.bsky.feed.like", + "doc_count": 448944 + }, + { + "key": "app.bsky.graph.follow", + "doc_count": 360374 + }, + { + "key": "app.bsky.feed.post", + "doc_count": 90816 + }, + { + "key": "app.bsky.feed.repost", + "doc_count": 58540 + }, + { + "key": "app.bsky.graph.block", + "doc_count": 14040 + }, + { + "key": "app.bsky.actor.profile", + "doc_count": 11762 + }, + { + "key": "app.bsky.graph.listitem", + "doc_count": 8103 + }, + { + "key": "app.bsky.graph.listblock", + "doc_count": 895 + }, + { + "key": "app.bsky.graph.starterpack", + "doc_count": 405 + }, + { + "key": "app.bsky.graph.list", + "doc_count": 356 + }, + { + "key": "app.bsky.feed.threadgate", + "doc_count": 255 + }, + { + "key": "app.bsky.feed.postgate", + "doc_count": 104 + }, + { + "key": "app.bsky.feed.generator", + "doc_count": 74 + }, + { + "key": "app.bsky.labeler.service", + "doc_count": 4 + } + ], + "sum_other_doc_count": 0, + "doc_count_error_upper_bound": 0 + } +} +------------------------------------------------------------------------------------------------------------------------ +Result for query Q2: + +{ + "events": { + "buckets": [ + { + "key": "app.bsky.feed.like", + "doc_count": 444523, + "users": { + "value": 117671.0 + } + }, + { + "key": "app.bsky.graph.follow", + "doc_count": 337978, + "users": { + "value": 64023.0 + } + }, + { + "key": "app.bsky.feed.post", + "doc_count": 86812, + "users": { + "value": 50489.0 + } + }, + { + "key": "app.bsky.feed.repost", + "doc_count": 56993, + "users": { + "value": 26584.0 + } + }, + { + "key": "app.bsky.graph.block", + "doc_count": 13838, + "users": { + "value": 5785.0 + } + }, + { + "key": "app.bsky.graph.listitem", + "doc_count": 7568, + "users": { + "value": 1078.0 + } + }, + { + "key": "app.bsky.actor.profile", + "doc_count": 5337, + "users": { + "value": 5336.0 + } + }, + { + "key": "app.bsky.graph.listblock", + "doc_count": 860, + "users": { + "value": 449.0 + } + }, + { + "key": "app.bsky.graph.list", + "doc_count": 259, + "users": { + "value": 218.0 + } + }, + { + "key": "app.bsky.feed.threadgate", + "doc_count": 228, + "users": { + "value": 196.0 + } + }, + { + "key": "app.bsky.graph.starterpack", + "doc_count": 104, + "users": { + "value": 101.0 + } + }, + { + "key": "app.bsky.feed.postgate", + "doc_count": 101, + "users": { + "value": 82.0 + } + }, + { + "key": "app.bsky.feed.generator", + "doc_count": 10, + "users": { + "value": 9.0 + } + } + ], + "sum_other_doc_count": 0, + "doc_count_error_upper_bound": 0 + } +} +------------------------------------------------------------------------------------------------------------------------ +Result for query Q3: + +{ + "events": { + "buckets": [ + { + "key": "app.bsky.feed.like", + "doc_count": 444523, + "hour_of_day": { + "buckets": [ + { + "key_as_string": "2024-11-21T16:00:00Z", + "key": 1732204800000.0, + "doc_count": 444523 + } + ] + } + }, + { + "key": "app.bsky.feed.post", + "doc_count": 86812, + "hour_of_day": { + "buckets": [ + { + "key_as_string": "2024-11-21T16:00:00Z", + "key": 1732204800000.0, + "doc_count": 86812 + } + ] + } + }, + { + "key": "app.bsky.feed.repost", + "doc_count": 56993, + "hour_of_day": { + "buckets": [ + { + "key_as_string": "2024-11-21T16:00:00Z", + "key": 1732204800000.0, + "doc_count": 56993 + } + ] + } + } + ], + "sum_other_doc_count": 0 + } +} +------------------------------------------------------------------------------------------------------------------------ +Result for query Q4: + +{ + "users": { + "buckets": [ + { + "key": "did:plc:yj3sjq3blzpynh27cumnp5ks", + "doc_count": 1, + "first_post": { + "value": 1.732206349000167E+18 + } + }, + { + "key": "did:plc:l5o3qjrmfztir54cpwlv2eme", + "doc_count": 1, + "first_post": { + "value": 1.732206349001905E+18 + } + }, + { + "key": "did:plc:s4bwqchfzm6gjqfeb6mexgbu", + "doc_count": 1, + "first_post": { + "value": 1.732206349003907E+18 + } + } + ], + "sum_other_doc_count": 86809 + } +} +------------------------------------------------------------------------------------------------------------------------ +Result for query Q5: + +{ + "users": { + "buckets": [ + { + "key": "did:plc:ps6ytbha3ljh7eeyyaest42j", + "doc_count": 1, + "activity_span": { + "count": 1, + "sum": 1.732207162788702E+18, + "min": 1.732207162788702E+18, + "max": 1.732207162788702E+18, + "avg": 1.732207162788702E+18 + } + }, + { + "key": "did:plc:pkugst3ilk3eunckdfswseih", + "doc_count": 1, + "activity_span": { + "count": 1, + "sum": 1.732207162784808E+18, + "min": 1.732207162784808E+18, + "max": 1.732207162784808E+18, + "avg": 1.732207162784808E+18 + } + }, + { + "key": "did:plc:3fjp7624vv7jsyjsiaw35mrj", + "doc_count": 1, + "activity_span": { + "count": 1, + "sum": 1.732207162783277E+18, + "min": 1.732207162783277E+18, + "max": 1.732207162783277E+18, + "avg": 1.732207162783277E+18 + } + } + ], + "sum_other_doc_count": 86809 + } +} diff --git a/quickwit/results/m6i.8xlarge_bluesky_1000m.json b/quickwit/results/m6i.8xlarge_bluesky_1000m.json new file mode 100644 index 0000000..55faa2c --- /dev/null +++ b/quickwit/results/m6i.8xlarge_bluesky_1000m.json @@ -0,0 +1,21 @@ +{ + "system": "Quickwit", + "version": "quickwit-f6cb417-x86_64-unknown-linux-gnu", + "os": "Ubuntu 24.04", + "date": "2025-11-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_strucutre": "no", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 947421756, + "total_size": 203301029796, + "data_size": 203300967417, + "result": [ + [4.16384, 0.002826, 0.001797], + [35.3774, 0.144555, 0.143494], + [22.5511, 0.004456, 0.003906], + [120.308, 80.7852, 81.28], + [116.023, 81.02, 81.3811] + ] +} diff --git a/quickwit/results/m6i.8xlarge_bluesky_100m.json b/quickwit/results/m6i.8xlarge_bluesky_100m.json new file mode 100644 index 0000000..b1af6cf --- /dev/null +++ b/quickwit/results/m6i.8xlarge_bluesky_100m.json @@ -0,0 +1,21 @@ +{ + "system": "Quickwit", + "version": "quickwit-f6cb417-x86_64-unknown-linux-gnu", + "os": "Ubuntu 24.04", + "date": "2025-11-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_strucutre": "no", + "tags": [ + ], + "dataset_size": 100000000, + "num_loaded_documents": 99999968, + "total_size": 21291387950, + "data_size": 21291377109, + "result": [ + [0.198066, 0.001542, 0.001267], + [4.97863, 0.030403, 0.030781], + [1.77687, 0.001639, 0.001406], + [11.4127, 11.9122, 11.909], + [12.0937, 11.9134, 11.9609] + ] +} diff --git a/quickwit/results/m6i.8xlarge_bluesky_10m.json b/quickwit/results/m6i.8xlarge_bluesky_10m.json new file mode 100644 index 0000000..d295e92 --- /dev/null +++ b/quickwit/results/m6i.8xlarge_bluesky_10m.json @@ -0,0 +1,21 @@ +{ + "system": "Quickwit", + "version": "quickwit-f6cb417-x86_64-unknown-linux-gnu", + "os": "Ubuntu 24.04", + "date": "2025-11-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_strucutre": "no", + "tags": [ + ], + "dataset_size": 10000000, + "num_loaded_documents": 9999994, + "total_size": 2241846927, + "data_size": 2241840152, + "result": [ + [0.097113, 0.001325, 0.00121], + [0.865711, 0.016137, 0.01209], + [0.235189, 0.001455, 0.001249], + [2.23065, 1.64808, 1.69323], + [2.16964, 2.13744, 2.28789] + ] +} diff --git a/quickwit/results/m6i.8xlarge_bluesky_1m.json b/quickwit/results/m6i.8xlarge_bluesky_1m.json new file mode 100644 index 0000000..a395b26 --- /dev/null +++ b/quickwit/results/m6i.8xlarge_bluesky_1m.json @@ -0,0 +1,21 @@ +{ + "system": "Quickwit", + "version": "quickwit-f6cb417-x86_64-unknown-linux-gnu", + "os": "Ubuntu 24.04", + "date": "2025-11-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_strucutre": "no", + "tags": [ + ], + "dataset_size": 1000000, + "num_loaded_documents": 1000000, + "total_size": 228201734, + "data_size": 228196627, + "result": [ + [0.064869, 0.001315, 0.001054], + [0.233581, 0.003866, 0.003543], + [0.054347, 0.001295, 0.000992], + [0.312483, 0.192769, 0.192032], + [0.289629, 0.18825, 0.191885] + ] +} From 7805e473444e57ae6cc5a07806e465af38816727 Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Fri, 14 Nov 2025 04:32:06 +0900 Subject: [PATCH 6/7] fix typo --- quickwit/results/m6i.8xlarge_bluesky_1000m.json | 2 +- quickwit/results/m6i.8xlarge_bluesky_100m.json | 2 +- quickwit/results/m6i.8xlarge_bluesky_10m.json | 2 +- quickwit/results/m6i.8xlarge_bluesky_1m.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/quickwit/results/m6i.8xlarge_bluesky_1000m.json b/quickwit/results/m6i.8xlarge_bluesky_1000m.json index 55faa2c..d8118d8 100644 --- a/quickwit/results/m6i.8xlarge_bluesky_1000m.json +++ b/quickwit/results/m6i.8xlarge_bluesky_1000m.json @@ -4,7 +4,7 @@ "os": "Ubuntu 24.04", "date": "2025-11-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_strucutre": "no", + "retains_structure": "no", "tags": [ ], "dataset_size": 1000000000, diff --git a/quickwit/results/m6i.8xlarge_bluesky_100m.json b/quickwit/results/m6i.8xlarge_bluesky_100m.json index b1af6cf..1e51706 100644 --- a/quickwit/results/m6i.8xlarge_bluesky_100m.json +++ b/quickwit/results/m6i.8xlarge_bluesky_100m.json @@ -4,7 +4,7 @@ "os": "Ubuntu 24.04", "date": "2025-11-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_strucutre": "no", + "retains_structure": "no", "tags": [ ], "dataset_size": 100000000, diff --git a/quickwit/results/m6i.8xlarge_bluesky_10m.json b/quickwit/results/m6i.8xlarge_bluesky_10m.json index d295e92..949adad 100644 --- a/quickwit/results/m6i.8xlarge_bluesky_10m.json +++ b/quickwit/results/m6i.8xlarge_bluesky_10m.json @@ -4,7 +4,7 @@ "os": "Ubuntu 24.04", "date": "2025-11-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_strucutre": "no", + "retains_structure": "no", "tags": [ ], "dataset_size": 10000000, diff --git a/quickwit/results/m6i.8xlarge_bluesky_1m.json b/quickwit/results/m6i.8xlarge_bluesky_1m.json index a395b26..5b5c5be 100644 --- a/quickwit/results/m6i.8xlarge_bluesky_1m.json +++ b/quickwit/results/m6i.8xlarge_bluesky_1m.json @@ -4,7 +4,7 @@ "os": "Ubuntu 24.04", "date": "2025-11-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_strucutre": "no", + "retains_structure": "no", "tags": [ ], "dataset_size": 1000000, From 05ce373b84a31226ae92279e570473b9f42be631 Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Fri, 14 Nov 2025 04:34:08 +0900 Subject: [PATCH 7/7] uncomment install phase --- quickwit/main.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickwit/main.sh b/quickwit/main.sh index 673105b..d9bbc33 100755 --- a/quickwit/main.sh +++ b/quickwit/main.sh @@ -35,7 +35,7 @@ fi export QW_CONFIG="$PWD/config/quickwit.yaml" export QW_DATA_DIR="$PWD/qwdata" -# ./install.sh +./install.sh benchmark() { local size=$1