diff --git a/examples/html-py-ever/Cargo.lock b/examples/html-py-ever/Cargo.lock
index b54ea051..51a77522 100644
--- a/examples/html-py-ever/Cargo.lock
+++ b/examples/html-py-ever/Cargo.lock
@@ -8,12 +8,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
[[package]]
name = "bitflags"
version = "2.5.0"
@@ -32,27 +26,17 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-[[package]]
-name = "convert_case"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
-
[[package]]
name = "cssparser"
-version = "0.27.2"
+version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
+checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
- "matches",
"phf",
- "proc-macro2",
- "quote",
"smallvec",
- "syn 1.0.109",
]
[[package]]
@@ -71,10 +55,8 @@ version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
- "convert_case",
"proc-macro2",
"quote",
- "rustc_version",
"syn 1.0.109",
]
@@ -93,6 +75,12 @@ dependencies = [
"dtoa",
]
+[[package]]
+name = "ego-tree"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
+
[[package]]
name = "futf"
version = "0.1.5"
@@ -113,14 +101,12 @@ dependencies = [
]
[[package]]
-name = "getrandom"
-version = "0.1.16"
+name = "getopts"
+version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
+checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
- "cfg-if",
- "libc",
- "wasi 0.9.0+wasi-snapshot-preview1",
+ "unicode-width",
]
[[package]]
@@ -131,7 +117,7 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
- "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasi",
]
[[package]]
@@ -144,23 +130,20 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
name = "html-py-ever"
version = "0.1.0"
dependencies = [
- "kuchiki",
"pyo3",
- "tendril",
+ "scraper",
]
[[package]]
name = "html5ever"
-version = "0.25.2"
+version = "0.29.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5c13fb08e5d4dfc151ee5e88bae63f7773d61852f3bdc73c9f4b9e1bde03148"
+checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c"
dependencies = [
"log",
"mac",
"markup5ever",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
+ "match_token",
]
[[package]]
@@ -171,21 +154,9 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
[[package]]
name = "itoa"
-version = "0.4.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
-
-[[package]]
-name = "kuchiki"
-version = "0.8.1"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ea8e9c6e031377cff82ee3001dc8026cdf431ed4e2e6b51f98ab8c73484a358"
-dependencies = [
- "cssparser",
- "html5ever",
- "matches",
- "selectors",
-]
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "libc"
@@ -217,9 +188,9 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
-version = "0.10.1"
+version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd"
+checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18"
dependencies = [
"log",
"phf",
@@ -230,10 +201,15 @@ dependencies = [
]
[[package]]
-name = "matches"
-version = "0.1.10"
+name = "match_token"
+version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
+checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.61",
+]
[[package]]
name = "memoffset"
@@ -250,12 +226,6 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
-[[package]]
-name = "nodrop"
-version = "0.1.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
-
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -287,75 +257,73 @@ dependencies = [
[[package]]
name = "phf"
-version = "0.8.0"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
+checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
- "phf_shared 0.8.0",
- "proc-macro-hack",
+ "phf_shared 0.11.3",
]
[[package]]
name = "phf_codegen"
-version = "0.8.0"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
+checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
- "phf_generator 0.8.0",
- "phf_shared 0.8.0",
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
]
[[package]]
name = "phf_generator"
-version = "0.8.0"
+version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
+checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
- "phf_shared 0.8.0",
- "rand 0.7.3",
+ "phf_shared 0.10.0",
+ "rand",
]
[[package]]
name = "phf_generator"
-version = "0.10.0"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
+checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
- "phf_shared 0.10.0",
- "rand 0.8.5",
+ "phf_shared 0.11.3",
+ "rand",
]
[[package]]
name = "phf_macros"
-version = "0.8.0"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
+checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
- "phf_generator 0.8.0",
- "phf_shared 0.8.0",
- "proc-macro-hack",
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
"proc-macro2",
"quote",
- "syn 1.0.109",
+ "syn 2.0.61",
]
[[package]]
name = "phf_shared"
-version = "0.8.0"
+version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
+checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
- "siphasher",
+ "siphasher 0.3.11",
]
[[package]]
name = "phf_shared"
-version = "0.10.0"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
+checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
- "siphasher",
+ "siphasher 1.0.2",
]
[[package]]
@@ -376,12 +344,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
-[[package]]
-name = "proc-macro-hack"
-version = "0.5.20+deprecated"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
-
[[package]]
name = "proc-macro2"
version = "1.0.82"
@@ -461,20 +423,6 @@ dependencies = [
"proc-macro2",
]
-[[package]]
-name = "rand"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
-dependencies = [
- "getrandom 0.1.16",
- "libc",
- "rand_chacha 0.2.2",
- "rand_core 0.5.1",
- "rand_hc",
- "rand_pcg",
-]
-
[[package]]
name = "rand"
version = "0.8.5"
@@ -482,18 +430,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
- "rand_chacha 0.3.1",
- "rand_core 0.6.4",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.5.1",
+ "rand_chacha",
+ "rand_core",
]
[[package]]
@@ -503,16 +441,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
- "rand_core 0.6.4",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
-dependencies = [
- "getrandom 0.1.16",
+ "rand_core",
]
[[package]]
@@ -521,25 +450,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
- "getrandom 0.2.15",
-]
-
-[[package]]
-name = "rand_hc"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
-dependencies = [
- "rand_core 0.5.1",
-]
-
-[[package]]
-name = "rand_pcg"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
-dependencies = [
- "rand_core 0.5.1",
+ "getrandom",
]
[[package]]
@@ -548,16 +459,7 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
dependencies = [
- "bitflags 2.5.0",
-]
-
-[[package]]
-name = "rustc_version"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
-dependencies = [
- "semver",
+ "bitflags",
]
[[package]]
@@ -567,31 +469,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
-name = "selectors"
+name = "scraper"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
+checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15"
dependencies = [
- "bitflags 1.3.2",
+ "cssparser",
+ "ego-tree",
+ "getopts",
+ "html5ever",
+ "precomputed-hash",
+ "selectors",
+ "tendril",
+]
+
+[[package]]
+name = "selectors"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
+dependencies = [
+ "bitflags",
"cssparser",
"derive_more",
"fxhash",
"log",
- "matches",
+ "new_debug_unreachable",
"phf",
"phf_codegen",
"precomputed-hash",
"servo_arc",
"smallvec",
- "thin-slice",
]
-[[package]]
-name = "semver"
-version = "1.0.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
-
[[package]]
name = "serde"
version = "1.0.200"
@@ -614,11 +524,10 @@ dependencies = [
[[package]]
name = "servo_arc"
-version = "0.1.1"
+version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
+checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
dependencies = [
- "nodrop",
"stable_deref_trait",
]
@@ -628,6 +537,12 @@ version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
+[[package]]
+name = "siphasher"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+
[[package]]
name = "smallvec"
version = "1.13.2"
@@ -705,18 +620,18 @@ dependencies = [
"utf-8",
]
-[[package]]
-name = "thin-slice"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
-
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
[[package]]
name = "unindent"
version = "0.2.3"
@@ -729,12 +644,6 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
-[[package]]
-name = "wasi"
-version = "0.9.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
-
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
diff --git a/examples/html-py-ever/Cargo.toml b/examples/html-py-ever/Cargo.toml
index f943a8e1..d8fa4e12 100644
--- a/examples/html-py-ever/Cargo.toml
+++ b/examples/html-py-ever/Cargo.toml
@@ -5,9 +5,8 @@ authors = ["konstin "]
edition = "2021"
[dependencies]
-kuchiki = "0.8.0"
+scraper = "0.22"
pyo3 = "0.27"
-tendril = "0.4.3"
[lib]
name = "html_py_ever"
diff --git a/examples/html-py-ever/README.md b/examples/html-py-ever/README.md
index 4e3c3b28..e6fac0e3 100644
--- a/examples/html-py-ever/README.md
+++ b/examples/html-py-ever/README.md
@@ -1,6 +1,6 @@
# html-py-ever
-Demoing how to use [html5ever](https://github.com/servo/html5ever) through [kuchiki](https://github.com/kuchiki-rs/kuchiki) to speed up html parsing and css-selecting.
+Demoing how to use [html5ever](https://github.com/servo/html5ever) through [scraper](https://github.com/rust-scraper/scraper) to speed up html parsing and css-selecting.
## Usage
diff --git a/examples/html-py-ever/noxfile.py b/examples/html-py-ever/noxfile.py
index ed5da892..8976f4eb 100644
--- a/examples/html-py-ever/noxfile.py
+++ b/examples/html-py-ever/noxfile.py
@@ -12,3 +12,12 @@ def test(session: nox.Session):
session.install("--no-build-isolation", ".")
# Test Python package
session.run("pytest", *session.posargs)
+
+
+@nox.session()
+def bench(session: nox.Session):
+ session.install(SETUPTOOLS_RUST, "pytest", "pytest-benchmark", "beautifulsoup4")
+ # Ensure build uses version of setuptools-rust under development
+ session.install("--no-build-isolation", ".")
+ # Test Python package
+ session.run("pytest", "--benchmark-enable", *session.posargs)
diff --git a/examples/html-py-ever/pyproject.toml b/examples/html-py-ever/pyproject.toml
index e368fbdb..23e6e098 100644
--- a/examples/html-py-ever/pyproject.toml
+++ b/examples/html-py-ever/pyproject.toml
@@ -36,3 +36,6 @@ target = "html_py_ever.html_py_ever"
# ^-- The last part of the name (e.g. "html_py_ever") has to match lib.name in Cargo.toml,
# but you can add a prefix to nest it inside of a Python package.
# See reference for RustExtension in https://setuptools-rust.readthedocs.io/en/latest/reference.html
+#
+[tool.pytest.ini_options]
+addopts = "--benchmark-disable"
diff --git a/examples/html-py-ever/pytest.ini b/examples/html-py-ever/pytest.ini
deleted file mode 100644
index e69de29b..00000000
diff --git a/examples/html-py-ever/rust/lib.rs b/examples/html-py-ever/rust/lib.rs
index 6558418a..b731a702 100644
--- a/examples/html-py-ever/rust/lib.rs
+++ b/examples/html-py-ever/rust/lib.rs
@@ -3,37 +3,43 @@ use pyo3::prelude::*;
#[pymodule]
mod html_py_ever {
use pyo3::prelude::*;
+ use scraper::{Html, Selector};
+ use std::fs;
use std::io::Read;
use std::path::Path;
- use tendril::stream::TendrilSink;
/// A parsed html document
#[pyclass(unsendable)]
struct Document {
- node: kuchiki::NodeRef,
+ html: Html,
}
#[pymethods]
impl Document {
/// Returns the selected elements as strings
- fn select(&self, selector: &str) -> Vec {
- self.node
- .select(selector)
- .unwrap()
- .map(|css_match| css_match.text_contents())
- .collect()
+ fn select(&self, selector: &str) -> PyResult> {
+ let selector = Selector::parse(selector)
+ .map_err(|e| PyErr::new::(format!("{e:?}")))?;
+ Ok(self
+ .html
+ .select(&selector)
+ .map(|element| element.html())
+ .collect())
}
}
impl Document {
fn from_reader(reader: &mut impl Read) -> PyResult {
- let node = kuchiki::parse_html().from_utf8().read_from(reader)?;
- Ok(Document { node })
+ let mut html_string = String::new();
+ reader.read_to_string(&mut html_string)?;
+ let html = Html::parse_document(&html_string);
+ Ok(Document { html })
}
fn from_file(path: &Path) -> PyResult {
- let node = kuchiki::parse_html().from_utf8().from_file(path)?;
- Ok(Document { node })
+ let html_string = fs::read_to_string(path)?;
+ let html = Html::parse_document(&html_string);
+ Ok(Document { html })
}
}
diff --git a/examples/html-py-ever/rust/main.rs b/examples/html-py-ever/rust/main.rs
index e1748e49..340b4dad 100644
--- a/examples/html-py-ever/rust/main.rs
+++ b/examples/html-py-ever/rust/main.rs
@@ -1,11 +1,11 @@
//! Pure rust version for comparing with python based calls
-use kuchiki;
+use scraper::{Html, Selector};
use std::env;
+use std::fs;
use std::path::PathBuf;
use std::time::Instant;
-use tendril::stream::TendrilSink;
fn main() {
let path = PathBuf::from(
@@ -15,13 +15,15 @@ fn main() {
);
let now = Instant::now();
- let document = kuchiki::parse_html().from_utf8().from_file(&path).unwrap();
+ let html_string = fs::read_to_string(&path).unwrap();
+ let document = Html::parse_document(&html_string);
println!("{:?}", now.elapsed());
+
let now2 = Instant::now();
+ let selector = Selector::parse("a[href]").unwrap();
let links: Vec = document
- .select("a[href]")
- .unwrap()
- .map(|css_match| css_match.text_contents())
+ .select(&selector)
+ .map(|element| element.text().collect())
.collect();
println!("{} {:?}", links.len(), now2.elapsed());
}
diff --git a/examples/html-py-ever/tests/test_selector.py b/examples/html-py-ever/tests/test_selector.py
index 862c9b12..8ac3e187 100755
--- a/examples/html-py-ever/tests/test_selector.py
+++ b/examples/html-py-ever/tests/test_selector.py
@@ -1,12 +1,11 @@
#!/usr/bin/env python
-from glob import glob
import os
+from glob import glob
import html_py_ever
import pytest
from bs4 import BeautifulSoup
-
HTML_FILES = glob(os.path.join(os.path.dirname(__file__), "*.html"))