Skip to content

Commit b8e99e8

Browse files
authored
update download URLs, fallback to wget when torrent fails (jwngr#309)
Replaced the deprecated mirror site https://dumps.wikimedia.your.org/ with https://dumps.wikimedia.org/ and enhanced the fallback to wget since the torrent link is also deprecated and redirects to another link.
1 parent bb7de14 commit b8e99e8

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

scripts/buildDatabase.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export LC_ALL=C
99
# By default, the latest Wikipedia dump will be downloaded. If a download date in the format
1010
# YYYYMMDD is provided as the first argument, it will be used instead.
1111
if [[ $# -eq 0 ]]; then
12-
DOWNLOAD_DATE=$(wget -q -O- https://dumps.wikimedia.your.org/enwiki/ | grep -Po '\d{8}' | sort | tail -n1)
12+
DOWNLOAD_DATE=$(wget -q -O- https://dumps.wikimedia.org/enwiki/ | grep -Po '\d{8}' | sort | tail -n1)
1313
else
1414
if [ ${#1} -ne 8 ]; then
1515
echo "[ERROR] Invalid download date provided: $1"
@@ -22,8 +22,8 @@ fi
2222
ROOT_DIR=`pwd`
2323
OUT_DIR="dump"
2424

25-
DOWNLOAD_URL="https://dumps.wikimedia.your.org/enwiki/$DOWNLOAD_DATE"
26-
TORRENT_URL="https://tools.wmflabs.org/dump-torrents/enwiki/$DOWNLOAD_DATE"
25+
DOWNLOAD_URL="https://dumps.wikimedia.org/enwiki/$DOWNLOAD_DATE"
26+
TORRENT_URL="https://dump-torrents.toolforge.org/enwiki/$DOWNLOAD_DATE"
2727

2828
SHA1SUM_FILENAME="enwiki-$DOWNLOAD_DATE-sha1sums.txt"
2929
REDIRECTS_FILENAME="enwiki-$DOWNLOAD_DATE-redirect.sql.gz"
@@ -51,8 +51,10 @@ function download_file() {
5151
if [ $1 != sha1sums ] && command -v aria2c > /dev/null; then
5252
echo "[INFO] Downloading $1 file via torrent"
5353
time aria2c --summary-interval=0 --console-log-level=warn --seed-time=0 \
54-
"$TORRENT_URL/$2.torrent"
55-
else
54+
"$TORRENT_URL/$2.torrent" 2>&1 | grep -v "ERROR\|Exception" || true
55+
fi
56+
57+
if [ ! -f $2 ]; then
5658
echo "[INFO] Downloading $1 file via wget"
5759
time wget --progress=dot:giga "$DOWNLOAD_URL/$2"
5860
fi

0 commit comments

Comments
 (0)