diff --git a/.gitignore b/.gitignore index 43bd95c2b..b24029433 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ poetry.lock noxenv.txt noxsettings.toml +hyperparamtuning/ +*.prof ### Python ### *.pyc @@ -16,6 +18,8 @@ push_to_pypi.sh .nfs* *.log *.json +!kernel_tuner/schema/T1/1.0.0/input-schema.json +!test/test_T1_input.json *.csv .cache *.ipynb_checkpoints diff --git a/CHANGELOG.md b/CHANGELOG.md index 57aaf27b3..c57986b50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,17 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased + +- Additional improvements to search space construction - changed HIP python bindings from pyhip-interface to the official hip-python +- Added Python 3.13 and experimental 3.14 support +- Dropped Python 3.8 and 3.9 support (due to incompatibility with newer scipy versions) ## [1.0.0] - 2024-04-04 - HIP backend to support tuning HIP kernels on AMD GPUs - Experimental features for mixed-precision and accuracy tuning - Experimental features for OpenACC tuning -- Major speedup due to new parser and using revamped python-constraint for searchspace building +- Major speedup due to new parser and using revamped python-constraint for search space construction - Implemented ability to use `PySMT` and `ATF` for searchspace building - Added Poetry for dependency and build management - Switched from `setup.py` and `setup.cfg` to `pyproject.toml` for centralized metadata, added relevant tests diff --git a/INSTALL.rst b/INSTALL.rst index 13df5f95c..8e938676f 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -20,7 +20,7 @@ Linux users could type the following to download and install Python 3 using Mini wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh bash Miniconda3-latest-Linux-x86_64.sh -You are of course also free to use your own Python installation, and the Kernel Tuner is developed to be fully compatible with Python 3.9 and newer. +You are of course also free to use your own Python installation, and the Kernel Tuner is developed to be fully compatible with Python 3.10 and newer. Installing Python Packages -------------------------- diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt index f4f62912c..8a5ac0b63 100644 --- a/doc/requirements_test.txt +++ b/doc/requirements_test.txt @@ -1,116 +1,195 @@ -argcomplete==3.6.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b \ - --hash=sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd -attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e \ - --hash=sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a -build==1.2.2.post1 ; python_version >= "3.9" and python_version < "3.15" \ +argcomplete==3.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:927531c2fbaa004979f18c2316f6ffadcfc5cc2de15ae2624dfe65deaf60e14f \ + --hash=sha256:cef54d7f752560570291214f0f1c48c3b8ef09aca63d65de7747612666725dbc +asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \ + --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2 +attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \ + --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b +build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \ --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7 -colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and (sys_platform == "win32" or os_name == "nt") \ +colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32" or python_version >= "3.12" and python_version < "4" and os_name == "nt" \ --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 -colorlog==6.9.0 ; python_version >= "3.9" and python_version < "3.15" \ +colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 -coverage[toml]==7.6.12 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:00b2086892cf06c7c2d74983c9595dc511acca00665480b3ddff749ec4fb2a95 \ - --hash=sha256:0533adc29adf6a69c1baa88c3d7dbcaadcffa21afbed3ca7a225a440e4744bf9 \ - --hash=sha256:06097c7abfa611c91edb9e6920264e5be1d6ceb374efb4986f38b09eed4cb2fe \ - --hash=sha256:07e92ae5a289a4bc4c0aae710c0948d3c7892e20fd3588224ebe242039573bf0 \ - --hash=sha256:0a9d8be07fb0832636a0f72b80d2a652fe665e80e720301fb22b191c3434d924 \ - --hash=sha256:0e549f54ac5f301e8e04c569dfdb907f7be71b06b88b5063ce9d6953d2d58574 \ - --hash=sha256:0ef01d70198431719af0b1f5dcbefc557d44a190e749004042927b2a3fed0702 \ - --hash=sha256:0f16f44025c06792e0fb09571ae454bcc7a3ec75eeb3c36b025eccf501b1a4c3 \ - --hash=sha256:14d47376a4f445e9743f6c83291e60adb1b127607a3618e3185bbc8091f0467b \ - --hash=sha256:1a936309a65cc5ca80fa9f20a442ff9e2d06927ec9a4f54bcba9c14c066323f2 \ - --hash=sha256:1ceeb90c3eda1f2d8c4c578c14167dbd8c674ecd7d38e45647543f19839dd6ea \ - --hash=sha256:1f7ffa05da41754e20512202c866d0ebfc440bba3b0ed15133070e20bf5aeb5f \ - --hash=sha256:200e10beb6ddd7c3ded322a4186313d5ca9e63e33d8fab4faa67ef46d3460af3 \ - --hash=sha256:220fa6c0ad7d9caef57f2c8771918324563ef0d8272c94974717c3909664e674 \ - --hash=sha256:2251fabcfee0a55a8578a9d29cecfee5f2de02f11530e7d5c5a05859aa85aee9 \ - --hash=sha256:2458f275944db8129f95d91aee32c828a408481ecde3b30af31d552c2ce284a0 \ - --hash=sha256:299cf973a7abff87a30609879c10df0b3bfc33d021e1adabc29138a48888841e \ - --hash=sha256:2b996819ced9f7dbb812c701485d58f261bef08f9b85304d41219b1496b591ef \ - --hash=sha256:3688b99604a24492bcfe1c106278c45586eb819bf66a654d8a9a1433022fb2eb \ - --hash=sha256:3a1e465f398c713f1b212400b4e79a09829cd42aebd360362cd89c5bdc44eb87 \ - --hash=sha256:488c27b3db0ebee97a830e6b5a3ea930c4a6e2c07f27a5e67e1b3532e76b9ef1 \ - --hash=sha256:48cfc4641d95d34766ad41d9573cc0f22a48aa88d22657a1fe01dca0dbae4de2 \ - --hash=sha256:4b467a8c56974bf06e543e69ad803c6865249d7a5ccf6980457ed2bc50312703 \ - --hash=sha256:53c56358d470fa507a2b6e67a68fd002364d23c83741dbc4c2e0680d80ca227e \ - --hash=sha256:5d1095bbee1851269f79fd8e0c9b5544e4c00c0c24965e66d8cba2eb5bb535fd \ - --hash=sha256:641dfe0ab73deb7069fb972d4d9725bf11c239c309ce694dd50b1473c0f641c3 \ - --hash=sha256:64cbb1a3027c79ca6310bf101014614f6e6e18c226474606cf725238cf5bc2d4 \ - --hash=sha256:66fe626fd7aa5982cdebad23e49e78ef7dbb3e3c2a5960a2b53632f1f703ea45 \ - --hash=sha256:676f92141e3c5492d2a1596d52287d0d963df21bf5e55c8b03075a60e1ddf8aa \ - --hash=sha256:69e62c5034291c845fc4df7f8155e8544178b6c774f97a99e2734b05eb5bed31 \ - --hash=sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8 \ - --hash=sha256:78f5243bb6b1060aed6213d5107744c19f9571ec76d54c99cc15938eb69e0e86 \ - --hash=sha256:79cac3390bfa9836bb795be377395f28410811c9066bc4eefd8015258a7578c6 \ - --hash=sha256:7ae6eabf519bc7871ce117fb18bf14e0e343eeb96c377667e3e5dd12095e0288 \ - --hash=sha256:7e39e845c4d764208e7b8f6a21c541ade741e2c41afabdfa1caa28687a3c98cf \ - --hash=sha256:8161d9fbc7e9fe2326de89cd0abb9f3599bccc1287db0aba285cb68d204ce929 \ - --hash=sha256:8bec2ac5da793c2685ce5319ca9bcf4eee683b8a1679051f8e6ec04c4f2fd7dc \ - --hash=sha256:959244a17184515f8c52dcb65fb662808767c0bd233c1d8a166e7cf74c9ea985 \ - --hash=sha256:9b148068e881faa26d878ff63e79650e208e95cf1c22bd3f77c3ca7b1d9821a3 \ - --hash=sha256:aa6f302a3a0b5f240ee201297fff0bbfe2fa0d415a94aeb257d8b461032389bd \ - --hash=sha256:ace9048de91293e467b44bce0f0381345078389814ff6e18dbac8fdbf896360e \ - --hash=sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879 \ - --hash=sha256:b01a840ecc25dce235ae4c1b6a0daefb2a203dba0e6e980637ee9c2f6ee0df57 \ - --hash=sha256:b076e625396e787448d27a411aefff867db2bffac8ed04e8f7056b07024eed5a \ - --hash=sha256:b172f8e030e8ef247b3104902cc671e20df80163b60a203653150d2fc204d1ad \ - --hash=sha256:b1f097878d74fe51e1ddd1be62d8e3682748875b461232cf4b52ddc6e6db0bba \ - --hash=sha256:b95574d06aa9d2bd6e5cc35a5bbe35696342c96760b69dc4287dbd5abd4ad51d \ - --hash=sha256:bda1c5f347550c359f841d6614fb8ca42ae5cb0b74d39f8a1e204815ebe25750 \ - --hash=sha256:cec6b9ce3bd2b7853d4a4563801292bfee40b030c05a3d29555fd2a8ee9bd68c \ - --hash=sha256:d1a987778b9c71da2fc8948e6f2656da6ef68f59298b7e9786849634c35d2c3c \ - --hash=sha256:d74c08e9aaef995f8c4ef6d202dbd219c318450fe2a76da624f2ebb9c8ec5d9f \ - --hash=sha256:e18aafdfb3e9ec0d261c942d35bd7c28d031c5855dadb491d2723ba54f4c3015 \ - --hash=sha256:e216c5c45f89ef8971373fd1c5d8d1164b81f7f5f06bbf23c37e7908d19e8558 \ - --hash=sha256:e695df2c58ce526eeab11a2e915448d3eb76f75dffe338ea613c1201b33bab2f \ - --hash=sha256:e7575ab65ca8399c8c4f9a7d61bbd2d204c8b8e447aab9d355682205c9dd948d \ - --hash=sha256:e995b3b76ccedc27fe4f477b349b7d64597e53a43fc2961db9d3fbace085d69d \ - --hash=sha256:ea31689f05043d520113e0552f039603c4dd71fa4c287b64cb3606140c66f425 \ - --hash=sha256:eb5507795caabd9b2ae3f1adc95f67b1104971c22c624bb354232d65c4fc90b3 \ - --hash=sha256:eb8668cfbc279a536c633137deeb9435d2962caec279c3f8cf8b91fff6ff8953 \ - --hash=sha256:ecea0c38c9079570163d663c0433a9af4094a60aafdca491c6a3d248c7432827 \ - --hash=sha256:f25d8b92a4e31ff1bd873654ec367ae811b3a943583e05432ea29264782dc32c \ - --hash=sha256:f60a297c3987c6c02ffb29effc70eadcbb412fe76947d394a1091a3615948e2f \ - --hash=sha256:f973643ef532d4f9be71dd88cf7588936685fdb576d93a79fe9f65bc337d9d73 -distlib==0.3.9 ; python_version >= "3.9" and python_version < "3.15" \ +coverage==7.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:02fad4f8faa4153db76f9246bc95c1d99f054f4e0a884175bff9155cf4f856cb \ + --hash=sha256:092b134129a8bb940c08b2d9ceb4459af5fb3faea77888af63182e17d89e1cf1 \ + --hash=sha256:0ce92c5a9d7007d838456f4b77ea159cb628187a137e1895331e530973dcf862 \ + --hash=sha256:0dab4ef76d7b14f432057fdb7a0477e8bffca0ad39ace308be6e74864e632271 \ + --hash=sha256:1165490be0069e34e4f99d08e9c5209c463de11b471709dfae31e2a98cbd49fd \ + --hash=sha256:11dd6f52c2a7ce8bf0a5f3b6e4a8eb60e157ffedc3c4b4314a41c1dfbd26ce58 \ + --hash=sha256:15d54ecef1582b1d3ec6049b20d3c1a07d5e7f85335d8a3b617c9960b4f807e0 \ + --hash=sha256:171e9977c6a5d2b2be9efc7df1126fd525ce7cad0eb9904fe692da007ba90d81 \ + --hash=sha256:177d837339883c541f8524683e227adcaea581eca6bb33823a2a1fdae4c988e1 \ + --hash=sha256:18f544356bceef17cc55fcf859e5664f06946c1b68efcea6acdc50f8f6a6e776 \ + --hash=sha256:199a1272e642266b90c9f40dec7fd3d307b51bf639fa0d15980dc0b3246c1393 \ + --hash=sha256:1e6f867379fd033a0eeabb1be0cffa2bd660582b8b0c9478895c509d875a9d9e \ + --hash=sha256:2444fbe1ba1889e0b29eb4d11931afa88f92dc507b7248f45be372775b3cef4f \ + --hash=sha256:25fe40967717bad0ce628a0223f08a10d54c9d739e88c9cbb0f77b5959367542 \ + --hash=sha256:264ff2bcce27a7f455b64ac0dfe097680b65d9a1a293ef902675fa8158d20b24 \ + --hash=sha256:2a79c4a09765d18311c35975ad2eb1ac613c0401afdd9cb1ca4110aeb5dd3c4c \ + --hash=sha256:2c492401bdb3a85824669d6a03f57b3dfadef0941b8541f035f83bbfc39d4282 \ + --hash=sha256:315ff74b585110ac3b7ab631e89e769d294f303c6d21302a816b3554ed4c81af \ + --hash=sha256:34a3bf6b92e6621fc4dcdaab353e173ccb0ca9e4bfbcf7e49a0134c86c9cd303 \ + --hash=sha256:37351dc8123c154fa05b7579fdb126b9f8b1cf42fd6f79ddf19121b7bdd4aa04 \ + --hash=sha256:385618003e3d608001676bb35dc67ae3ad44c75c0395d8de5780af7bb35be6b2 \ + --hash=sha256:392cc8fd2b1b010ca36840735e2a526fcbd76795a5d44006065e79868cc76ccf \ + --hash=sha256:3d03287eb03186256999539d98818c425c33546ab4901028c8fa933b62c35c3a \ + --hash=sha256:44683f2556a56c9a6e673b583763096b8efbd2df022b02995609cf8e64fc8ae0 \ + --hash=sha256:44af11c00fd3b19b8809487630f8a0039130d32363239dfd15238e6d37e41a48 \ + --hash=sha256:452735fafe8ff5918236d5fe1feac322b359e57692269c75151f9b4ee4b7e1bc \ + --hash=sha256:4c181ceba2e6808ede1e964f7bdc77bd8c7eb62f202c63a48cc541e5ffffccb6 \ + --hash=sha256:4dd532dac197d68c478480edde74fd4476c6823355987fd31d01ad9aa1e5fb59 \ + --hash=sha256:520af84febb6bb54453e7fbb730afa58c7178fd018c398a8fcd8e269a79bf96d \ + --hash=sha256:553ba93f8e3c70e1b0031e4dfea36aba4e2b51fe5770db35e99af8dc5c5a9dfe \ + --hash=sha256:5b7b02e50d54be6114cc4f6a3222fec83164f7c42772ba03b520138859b5fde1 \ + --hash=sha256:63306486fcb5a827449464f6211d2991f01dfa2965976018c9bab9d5e45a35c8 \ + --hash=sha256:75c82b27c56478d5e1391f2e7b2e7f588d093157fa40d53fd9453a471b1191f2 \ + --hash=sha256:7ba5ff236c87a7b7aa1441a216caf44baee14cbfbd2256d306f926d16b026578 \ + --hash=sha256:7e688010581dbac9cab72800e9076e16f7cccd0d89af5785b70daa11174e94de \ + --hash=sha256:80b5b207a8b08c6a934b214e364cab2fa82663d4af18981a6c0a9e95f8df7602 \ + --hash=sha256:822fa99dd1ac686061e1219b67868e25d9757989cf2259f735a4802497d6da31 \ + --hash=sha256:881cae0f9cbd928c9c001487bb3dcbfd0b0af3ef53ae92180878591053be0cb3 \ + --hash=sha256:88d96127ae01ff571d465d4b0be25c123789cef88ba0879194d673fdea52f54e \ + --hash=sha256:8b1c65a739447c5ddce5b96c0a388fd82e4bbdff7251396a70182b1d83631019 \ + --hash=sha256:8fed429c26b99641dc1f3a79179860122b22745dd9af36f29b141e178925070a \ + --hash=sha256:9bb47cc9f07a59a451361a850cb06d20633e77a9118d05fd0f77b1864439461b \ + --hash=sha256:a6b6b3bd121ee2ec4bd35039319f3423d0be282b9752a5ae9f18724bc93ebe7c \ + --hash=sha256:ae13ed5bf5542d7d4a0a42ff5160e07e84adc44eda65ddaa635c484ff8e55917 \ + --hash=sha256:af94fb80e4f159f4d93fb411800448ad87b6039b0500849a403b73a0d36bb5ae \ + --hash=sha256:b4c144c129343416a49378e05c9451c34aae5ccf00221e4fa4f487db0816ee2f \ + --hash=sha256:b52edb940d087e2a96e73c1523284a2e94a4e66fa2ea1e2e64dddc67173bad94 \ + --hash=sha256:b559adc22486937786731dac69e57296cb9aede7e2687dfc0d2696dbd3b1eb6b \ + --hash=sha256:b838a91e84e1773c3436f6cc6996e000ed3ca5721799e7789be18830fad009a2 \ + --hash=sha256:ba8480ebe401c2f094d10a8c4209b800a9b77215b6c796d16b6ecdf665048950 \ + --hash=sha256:bc96441c9d9ca12a790b5ae17d2fa6654da4b3962ea15e0eabb1b1caed094777 \ + --hash=sha256:c90e9141e9221dd6fbc16a2727a5703c19443a8d9bf7d634c792fa0287cee1ab \ + --hash=sha256:d2e73e2ac468536197e6b3ab79bc4a5c9da0f078cd78cfcc7fe27cf5d1195ef0 \ + --hash=sha256:d3154b369141c3169b8133973ac00f63fcf8d6dbcc297d788d36afbb7811e511 \ + --hash=sha256:d66ff48ab3bb6f762a153e29c0fc1eb5a62a260217bc64470d7ba602f5886d20 \ + --hash=sha256:d6874929d624d3a670f676efafbbc747f519a6121b581dd41d012109e70a5ebd \ + --hash=sha256:e33426a5e1dc7743dd54dfd11d3a6c02c5d127abfaa2edd80a6e352b58347d1a \ + --hash=sha256:e52eb31ae3afacdacfe50705a15b75ded67935770c460d88c215a9c0c40d0e9c \ + --hash=sha256:eae79f8e3501133aa0e220bbc29573910d096795882a70e6f6e6637b09522133 \ + --hash=sha256:eebd927b86761a7068a06d3699fd6c20129becf15bb44282db085921ea0f1585 \ + --hash=sha256:eff187177d8016ff6addf789dcc421c3db0d014e4946c1cc3fbf697f7852459d \ + --hash=sha256:f5f99a93cecf799738e211f9746dc83749b5693538fbfac279a61682ba309387 \ + --hash=sha256:fbba59022e7c20124d2f520842b75904c7b9f16c854233fa46575c69949fb5b9 +decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \ + --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a +distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 -exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" \ +exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \ --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \ --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc -filelock==3.17.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338 \ - --hash=sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e -importlib-metadata==8.6.1 ; python_version >= "3.9" and python_full_version < "3.10.2" \ +executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \ + --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755 +filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \ + --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de +importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \ --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \ --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580 -iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 -joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15" \ +iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \ + --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760 +ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \ + --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a +jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \ + --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9 +joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e -jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15" \ +jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \ --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf -jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15" \ +jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \ --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566 -mock==5.2.0 ; python_version >= "3.9" and python_version < "3.15" \ +markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ + --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ + --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ + --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ + --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ + --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ + --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ + --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ + --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ + --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ + --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ + --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ + --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ + --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ + --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ + --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ + --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ + --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ + --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ + --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ + --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ + --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ + --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ + --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ + --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ + --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ + --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ + --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ + --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ + --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ + --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ + --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ + --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ + --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ + --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ + --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ + --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ + --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ + --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ + --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ + --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ + --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ + --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ + --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ + --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ + --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ + --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ + --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ + --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ + --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ + --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ + --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ + --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ + --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ + --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ + --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ + --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ + --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ + --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ + --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 +matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \ + --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca +mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \ --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f -nox-poetry==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \ +nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \ --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682 -nox==2024.10.9 ; python_version >= "3.9" and python_version < "3.15" \ +nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 -numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \ +numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ @@ -147,10 +226,10 @@ numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f -packaging==24.2 ; python_version >= "3.9" and python_version < "3.15" \ +packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f -pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \ +pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \ --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \ --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \ @@ -193,54 +272,72 @@ pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \ --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \ --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319 -pep440==0.1.2 ; python_version >= "3.9" and python_version < "3.15" \ +parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \ + --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d +pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \ --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d -platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb -pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15" \ +pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \ + --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \ + --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f +platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \ + --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351 +pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 -pyproject-hooks==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \ +prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \ + --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198 +ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and os_name != "nt" or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \ + --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \ + --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220 +pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \ + --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42 +pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \ + --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c +pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \ --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913 -pytest-cov==5.0.0 ; python_version >= "3.9" and python_version < "3.15" \ +pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \ --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857 -pytest-timeout==2.3.1 ; python_version >= "3.9" and python_version < "3.15" \ +pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \ --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e -pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15" \ +pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \ --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845 -python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:02f46e4a7e8a46048604870287f1c55312eea47c2c15dd58b51057cb7d057bdc \ - --hash=sha256:0e5ece0b4e85ed680af6b9db33ef3497a6f9499b8957cd830cd139f17ac29aef \ - --hash=sha256:0f3a09c1947e6a90b9558cd1651e86dbe10f698aad56247596f2b856307707f0 \ - --hash=sha256:1c650d717c2585fd8b2247f680ca1dcc6ea970cc5644c1d847f97eacb9f7dce2 \ - --hash=sha256:38e4dbb2522ca2295873a57f6e0fddbb0856a780c87edd79b4074fd78790fed3 \ - --hash=sha256:441f6a06e6c88c5fbe724b834c820d959ba7542037139153d1466c7be00c7cc0 \ - --hash=sha256:6b8f82be66242fc5587011360b07c39e6e71e5d1c8f26a107dd2b04ab7854fcc \ - --hash=sha256:8086a21724048746e68ab721cb4a216db15f86bb700d557af0ac60f2087d4d4e \ - --hash=sha256:abea9ae443bf33fb396a6fb597b713e110f2abd9ecf1a656cd81f53da6751b79 \ - --hash=sha256:ace17786565250de48b8d18da555feb31f5fb3521b2bd65e9871459e1d179600 \ - --hash=sha256:b2385c99a9fe67ae26085a5a048c1d206cf0bd74acf0cd036227afa2a90fa4fd \ - --hash=sha256:e29bed90abe1240bf24794e73e4d8fa3e50b6aa9226d915b1902cdd03375c28b \ - --hash=sha256:ee3d33ca5694724a17bb596b93ff8687c70b4c07945e40a9007250e282e7ab28 \ - --hash=sha256:f28d07eae04d83d454f0e6ba2da0678786a21f2d405998a3eec960b56d809692 \ - --hash=sha256:fbb6ab033a7a4250bce11ca12fdf8958c6c42853e933cf585dbd265e0967dd93 \ - --hash=sha256:fc3cffd0f16cb9b34d2e95bd6d27425dd24044073760477a1341e835fc9c45f4 -python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15" \ +python-constraint2==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:0a841b088076d9dc481989359076b439d5201126583d920173ed9ab9cf7c4771 \ + --hash=sha256:0f0acfbae77ef7fcbff25d1c46b2360e0c486667c1a595b5c7cd4a6540cad5e6 \ + --hash=sha256:203b740a78266123e36d88215bb232e5e682c5845b674d2d5b1218fb3394ff1f \ + --hash=sha256:298c322c157ae6f5a9a9b8de3d08eefcdfed7b78e4abb2ddffe1bd345ed7997b \ + --hash=sha256:348ee17de0de028b68bf8050af142adfae37b500e60ac6758dc499bc19712805 \ + --hash=sha256:46cb1946fc7fb63262c43d4366f8cfceb551fb7a2bf10f275ac236d968746e02 \ + --hash=sha256:48c4f8ca1573f08bb6ef900cbe2e642aa6afb77c11a1f7c9d42c054fcfd93b8b \ + --hash=sha256:7bf723afbfdd13155f38d1344b015fd962818fdf70cdf39005a6a5bf810e5001 \ + --hash=sha256:85ea5330b12ccb4a474c89e3fdd037c5173db0216985da0e9a5bc20f6e26d0ca \ + --hash=sha256:8a39fecbb893137814a4f0ce82fd78df68789d658c6991bb6d57d773a6f8878d \ + --hash=sha256:aae18d318fd5150cda3befcf40b178a8dc661abb79cf663fefb7edd6e3afd6ab \ + --hash=sha256:b4d6159d05204cddfa4e46eef24a10f1d6aed41a905ca83314f5d1caa31599ab \ + --hash=sha256:c337839cfb0b3559f2f211e2ae67993c7187abf5dddbc5b587fe26b7c1b5d0b0 \ + --hash=sha256:c3b887f073f59cf5151df3cd25c2142016676da9034d5af56478c735526882d3 \ + --hash=sha256:d060b179461f09ee6571222ee63b4ac8dafdb6a41ffa75296a2f6b07a6bc500e \ + --hash=sha256:f1590a5699e1097f0057513e64bac4ac2d11f5848467c1c27967e1217f8bec3d +python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 -pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57 \ - --hash=sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e -referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15" \ +pytz==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \ + --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00 +referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \ --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0 -rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \ +rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \ --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \ --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \ @@ -344,7 +441,7 @@ rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \ --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \ --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8 -ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \ +ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \ --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \ --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \ @@ -362,7 +459,7 @@ ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \ --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \ --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca -scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \ +scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \ --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \ --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \ @@ -393,39 +490,63 @@ scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \ --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \ --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415 -scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d \ - --hash=sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c \ - --hash=sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca \ - --hash=sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9 \ - --hash=sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54 \ - --hash=sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16 \ - --hash=sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2 \ - --hash=sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5 \ - --hash=sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59 \ - --hash=sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326 \ - --hash=sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b \ - --hash=sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1 \ - --hash=sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d \ - --hash=sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24 \ - --hash=sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627 \ - --hash=sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c \ - --hash=sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa \ - --hash=sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949 \ - --hash=sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989 \ - --hash=sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004 \ - --hash=sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f \ - --hash=sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884 \ - --hash=sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299 \ - --hash=sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94 \ - --hash=sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f -six==1.17.0 ; python_version >= "3.9" and python_version < "3.15" \ +scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \ + --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \ + --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \ + --hash=sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d \ + --hash=sha256:28a0d2c2075946346e4408b211240764759e0fabaeb08d871639b5f3b1aca8a0 \ + --hash=sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8 \ + --hash=sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af \ + --hash=sha256:42dabaaa798e987c425ed76062794e93a243be8f0f20fff6e7a89f4d61cb3d40 \ + --hash=sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9 \ + --hash=sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971 \ + --hash=sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d \ + --hash=sha256:597a0c7008b21c035831c39927406c6181bcf8f60a73f36219b69d010aa04737 \ + --hash=sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e \ + --hash=sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32 \ + --hash=sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53 \ + --hash=sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1 \ + --hash=sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d \ + --hash=sha256:6a8e34cf4c188b6dd004654f88586d78f95639e48a25dfae9c5e34a6dc34547e \ + --hash=sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776 \ + --hash=sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5 \ + --hash=sha256:6f5e296ec63c5da6ba6fa0343ea73fd51b8b3e1a300b0a8cae3ed4b1122c7462 \ + --hash=sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274 \ + --hash=sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301 \ + --hash=sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3 \ + --hash=sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58 \ + --hash=sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4 \ + --hash=sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa \ + --hash=sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9 \ + --hash=sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27 \ + --hash=sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9 \ + --hash=sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f \ + --hash=sha256:a8bf5cb4a25046ac61d38f8d3c3426ec11ebc350246a4642f2f315fe95bda655 \ + --hash=sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20 \ + --hash=sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65 \ + --hash=sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93 \ + --hash=sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828 \ + --hash=sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd \ + --hash=sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f \ + --hash=sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec \ + --hash=sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb \ + --hash=sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6 \ + --hash=sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded \ + --hash=sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e \ + --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \ + --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \ + --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db +six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 -threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ - --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 -tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \ +stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \ + --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695 +threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ + --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e +tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ @@ -458,21 +579,27 @@ tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 -tomlkit==0.13.2 ; python_version >= "3.9" and python_version < "3.15" \ +tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \ --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79 -typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" \ +traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \ + --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f +typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 -tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694 \ - --hash=sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639 -virtualenv==20.29.3 ; python_version >= "3.9" and python_version < "3.15" \ +tzdata==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \ + --hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9 +virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \ --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac -xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15" \ +wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ + --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ + --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 +xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \ --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac -zipp==3.21.0 ; python_version >= "3.9" and python_full_version < "3.10.2" \ +zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \ --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \ --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931 diff --git a/doc/source/dev-environment.rst b/doc/source/dev-environment.rst index 6c36101ac..570a8c970 100644 --- a/doc/source/dev-environment.rst +++ b/doc/source/dev-environment.rst @@ -27,8 +27,8 @@ Steps with :bash:`sudo` access (e.g. on a local device): * After installation, restart your shell. #. Install the required Python versions: * On some systems, additional packages may be needed to build Python versions. For example on Ubuntu: :bash:`sudo apt install build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev liblzma-dev lzma`. - * Install the Python versions with: :bash:`pyenv install 3.9 3.10 3.11 3.12`. The reason we're installing all these versions as opposed to just one, is so we can test against all supported Python versions. -#. Set the Python versions so they can be found: :bash:`pyenv local 3.9 3.10 3.11 3.12` (replace :bash:`local` with :bash:`global` when not using the virtualenv). + * Install the Python versions with: :bash:`pyenv install 3.9 3.10 3.11 3.12 3.13`. The reason we're installing all these versions as opposed to just one, is so we can test against all supported Python versions. +#. Set the Python versions so they can be found: :bash:`pyenv local 3.9 3.10 3.11 3.12 3.13` (replace :bash:`local` with :bash:`global` when not using the virtualenv). #. Setup a local virtual environment in the folder: :bash:`pyenv virtualenv 3.11 kerneltuner` (or whatever environment name and Python version you prefer). #. `Install Poetry `__. * Use :bash:`curl -sSL https://install.python-poetry.org | python3 -` to install Poetry. diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py index b64d69813..40b88d463 100644 --- a/kernel_tuner/__init__.py +++ b/kernel_tuner/__init__.py @@ -1,5 +1,5 @@ from kernel_tuner.integration import store_results, create_device_targets -from kernel_tuner.interface import tune_kernel, run_kernel +from kernel_tuner.interface import tune_kernel, tune_kernel_T1, run_kernel from importlib.metadata import version diff --git a/kernel_tuner/backends/backend.py b/kernel_tuner/backends/backend.py index 586c3204f..6063dbb43 100644 --- a/kernel_tuner/backends/backend.py +++ b/kernel_tuner/backends/backend.py @@ -1,16 +1,16 @@ -"""This module contains the interface of all kernel_tuner backends""" +"""This module contains the interface of all kernel_tuner backends.""" from __future__ import print_function from abc import ABC, abstractmethod class Backend(ABC): - """Base class for kernel_tuner backends""" + """Base class for kernel_tuner backends.""" @abstractmethod def ready_argument_list(self, arguments): """This method must implement the allocation of the arguments on device memory.""" - pass + return arguments @abstractmethod def compile(self, kernel_instance): @@ -64,7 +64,7 @@ def refresh_memory(self, device_memory, host_arguments, should_sync): class GPUBackend(Backend): - """Base class for GPU backends""" + """Base class for GPU backends.""" @abstractmethod def __init__(self, device, iterations, compiler_options, observers): @@ -93,7 +93,7 @@ def refresh_memory(self, gpu_memory, host_arguments, should_sync): class CompilerBackend(Backend): - """Base class for compiler backends""" + """Base class for compiler backends.""" @abstractmethod def __init__(self, iterations, compiler_options, compiler): diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py new file mode 100644 index 000000000..65a263ce1 --- /dev/null +++ b/kernel_tuner/backends/hypertuner.py @@ -0,0 +1,131 @@ +"""This module contains a 'device' for hyperparameter tuning using the autotuning methodology.""" + +import platform +from pathlib import Path + +from numpy import mean + +from kernel_tuner.backends.backend import Backend +from kernel_tuner.observers.observer import BenchmarkObserver + +try: + methodology_available = True + from autotuning_methodology.experiments import generate_experiment_file + from autotuning_methodology.report_experiments import get_strategy_scores +except ImportError: + methodology_available = False + + +class ScoreObserver(BenchmarkObserver): + def __init__(self, dev): + self.dev = dev + self.scores = [] + + def after_finish(self): + self.scores.append(self.dev.last_score) + + def get_results(self): + results = {'score': mean(self.scores), 'scores': self.scores.copy()} + self.scores = [] + return results + +class HypertunerFunctions(Backend): + """Class for executing hyperparameter tuning.""" + units = {} + + def __init__(self, iterations): + self.iterations = iterations + self.observers = [ScoreObserver(self)] + self.name = platform.processor() + self.max_threads = 1024 + self.last_score = None + + # set the environment options + env = dict() + env["iterations"] = self.iterations + self.env = env + + # check for the methodology package + if methodology_available is not True: + raise ImportError("Unable to import the autotuning methodology, run `pip install autotuning_methodology`.") + + def ready_argument_list(self, arguments): + arglist = super().ready_argument_list(arguments) + if arglist is None: + arglist = [] + return arglist + + def compile(self, kernel_instance): + super().compile(kernel_instance) + path = Path(__file__).parent.parent.parent / "hyperparamtuning" + path.mkdir(exist_ok=True) + + # TODO get applications & GPUs args from benchmark + gpus = ["RTX_3090", "RTX_2080_Ti"] + applications = None + # applications = [ + # { + # "name": "convolution", + # "folder": "./cached_data_used/kernels", + # "input_file": "convolution.json" + # }, + # { + # "name": "pnpoly", + # "folder": "./cached_data_used/kernels", + # "input_file": "pnpoly.json" + # } + # ] + + # strategy settings + strategy: str = kernel_instance.arguments[0] + hyperparams = [{'name': k, 'value': v} for k, v in kernel_instance.params.items()] + hyperparams_string = "_".join(f"{k}={str(v)}" for k, v in kernel_instance.params.items()) + searchspace_strategies = [{ + "autotuner": "KernelTuner", + "name": f"{strategy.lower()}_{hyperparams_string}", + "display_name": strategy.replace('_', ' ').capitalize(), + "search_method": strategy.lower(), + 'search_method_hyperparameters': hyperparams + }] + + # any additional settings + override = { + "experimental_groups_defaults": { + "samples": self.iterations + } + } + + name = kernel_instance.name if len(kernel_instance.name) > 0 else kernel_instance.kernel_source.kernel_name + experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, applications, gpus, + override=override, overwrite_existing_file=True) + return str(experiments_filepath) + + def start_event(self): + return super().start_event() + + def stop_event(self): + return super().stop_event() + + def kernel_finished(self): + super().kernel_finished() + return True + + def synchronize(self): + return super().synchronize() + + def run_kernel(self, func, gpu_args=None, threads=None, grid=None, stream=None): + # generate the experiments file + experiments_filepath = Path(func) + + # run the methodology to get a fitness score for this configuration + scores = get_strategy_scores(str(experiments_filepath)) + self.last_score = scores[list(scores.keys())[0]]['score'] + + def memset(self, allocation, value, size): + return super().memset(allocation, value, size) + + def memcpy_dtoh(self, dest, src): + return super().memcpy_dtoh(dest, src) + + def memcpy_htod(self, dest, src): + return super().memcpy_htod(dest, src) diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py index 4dbce0827..9b9318cd2 100644 --- a/kernel_tuner/core.py +++ b/kernel_tuner/core.py @@ -1,4 +1,4 @@ -""" Module for grouping the core functionality needed by most runners """ +"""Module for grouping the core functionality needed by most runners.""" import logging import re @@ -14,15 +14,16 @@ import kernel_tuner.util as util from kernel_tuner.accuracy import Tunable -from kernel_tuner.backends.pycuda import PyCudaFunctions +from kernel_tuner.backends.compiler import CompilerFunctions from kernel_tuner.backends.cupy import CupyFunctions from kernel_tuner.backends.hip import HipFunctions +from kernel_tuner.backends.hypertuner import HypertunerFunctions from kernel_tuner.backends.nvcuda import CudaFunctions from kernel_tuner.backends.opencl import OpenCLFunctions -from kernel_tuner.backends.compiler import CompilerFunctions +from kernel_tuner.backends.pycuda import PyCudaFunctions from kernel_tuner.observers.nvml import NVMLObserver -from kernel_tuner.observers.tegra import TegraObserver from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver, PrologueObserver +from kernel_tuner.observers.tegra import TegraObserver try: import torch @@ -50,15 +51,15 @@ class KernelInstance(_KernelInstance): - """Class that represents the specific parameterized instance of a kernel""" + """Class that represents the specific parameterized instance of a kernel.""" def delete_temp_files(self): - """Delete any generated temp files""" + """Delete any generated temp files.""" for v in self.temp_files.values(): util.delete_temp_file(v) def prepare_temp_files_for_error_msg(self): - """Prepare temp file with source code, and return list of temp file names""" + """Prepare temp file with source code, and return list of temp file names.""" temp_filename = util.get_temp_filename(suffix=self.kernel_source.get_suffix()) util.write_file(temp_filename, self.kernel_string) ret = [temp_filename] @@ -92,7 +93,7 @@ def __init__(self, kernel_name, kernel_sources, lang, defines=None): self.lang = lang.upper() def get_kernel_string(self, index=0, params=None): - """retrieve the kernel source with the given index and return as a string + """Retrieve the kernel source with the given index and return as a string. See util.get_kernel_string() for details. @@ -108,11 +109,16 @@ def get_kernel_string(self, index=0, params=None): """ logging.debug("get_kernel_string called") + if hasattr(self, 'lang') and self.lang.upper() == "HYPERTUNER": + return "" + kernel_source = self.kernel_sources[index] return util.get_kernel_string(kernel_source, params) - def prepare_list_of_files(self, kernel_name, params, grid, threads, block_size_names): - """prepare the kernel string along with any additional files + def prepare_list_of_files( + self, kernel_name, params, grid, threads, block_size_names + ): + """Prepare the kernel string along with any additional files. The first file in the list is allowed to include or read in the others The files beyond the first are considered additional files that may also contain tunable parameters @@ -145,6 +151,9 @@ def prepare_list_of_files(self, kernel_name, params, grid, threads, block_size_n """ temp_files = dict() + if self.lang.upper() == "HYPERTUNER": + return tuple(["", "", temp_files]) + for i, f in enumerate(self.kernel_sources): if i > 0 and not util.looks_like_a_filename(f): raise ValueError("When passing multiple kernel sources, the secondary entries must be filenames") @@ -191,7 +200,6 @@ def get_suffix(self, index=0): This uses the user-specified suffix if available, or one based on the lang/backend otherwise. """ - # TODO: Consider delegating this to the backend suffix = self.get_user_suffix(index) if suffix is not None: @@ -204,7 +212,7 @@ def get_suffix(self, index=0): return ".c" def check_argument_lists(self, kernel_name, arguments): - """Check if the kernel arguments have the correct types + """Check if the kernel arguments have the correct types. This is done by calling util.check_argument_list on each kernel string. """ @@ -216,7 +224,7 @@ def check_argument_lists(self, kernel_name, arguments): class DeviceInterface(object): - """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner""" + """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner.""" def __init__( self, @@ -229,7 +237,7 @@ def __init__( iterations=7, observers=None, ): - """Instantiate the DeviceInterface, based on language in kernel source + """Instantiate the DeviceInterface, based on language in kernel source. :param kernel_source: The kernel sources :type kernel_source: kernel_tuner.core.KernelSource @@ -259,6 +267,7 @@ def __init__( """ lang = kernel_source.lang + self.requires_warmup = True logging.debug("DeviceInterface instantiated, lang=%s", lang) @@ -305,6 +314,9 @@ def __init__( iterations=iterations, observers=observers, ) + elif lang.upper() == "HYPERTUNER": + dev = HypertunerFunctions(iterations=iterations) + self.requires_warmup = False else: raise ValueError( "Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet" @@ -347,8 +359,7 @@ def __init__( print("Using: " + self.dev.name) def benchmark_prologue(self, func, gpu_args, threads, grid, result): - """Benchmark prologue one kernel execution per PrologueObserver""" - + """Benchmark prologue one kernel execution per PrologueObserver.""" for obs in self.prologue_observers: self.dev.synchronize() obs.before_start() @@ -358,8 +369,7 @@ def benchmark_prologue(self, func, gpu_args, threads, grid, result): result.update(obs.get_results()) def benchmark_default(self, func, gpu_args, threads, grid, result): - """Benchmark one kernel execution for 'iterations' at a time""" - + """Benchmark one kernel execution for 'iterations' at a time.""" self.dev.synchronize() for _ in range(self.iterations): for obs in self.benchmark_observers: @@ -383,7 +393,7 @@ def benchmark_default(self, func, gpu_args, threads, grid, result): def benchmark_continuous(self, func, gpu_args, threads, grid, result, duration): - """Benchmark continuously for at least 'duration' seconds""" + """Benchmark continuously for at least 'duration' seconds.""" iterations = int(np.ceil(duration / (result["time"] / 1000))) self.dev.synchronize() for obs in self.continuous_observers: @@ -474,8 +484,10 @@ def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_sett raise e return result - def check_kernel_output(self, func, gpu_args, instance, answer, atol, verify, verbose): - """runs the kernel once and checks the result against answer""" + def check_kernel_output( + self, func, gpu_args, instance, answer, atol, verify, verbose + ): + """Runs the kernel once and checks the result against answer.""" logging.debug("check_kernel_output") # if not using custom verify function, check if the length is the same @@ -610,7 +622,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options, return result def compile_kernel(self, instance, verbose): - """compile the kernel for this specific instance""" + """Compile the kernel for this specific instance.""" logging.debug("compile_kernel " + instance.name) # compile kernel_string into device func @@ -643,23 +655,23 @@ def compile_kernel(self, instance, verbose): @staticmethod def preprocess_gpu_arguments(old_arguments, params): - """Get a flat list of arguments based on the configuration given by `params`""" + """Get a flat list of arguments based on the configuration given by `params`.""" return _preprocess_gpu_arguments(old_arguments, params) def copy_shared_memory_args(self, smem_args): - """adds shared memory arguments to the most recently compiled module""" + """Adds shared memory arguments to the most recently compiled module.""" self.dev.copy_shared_memory_args(smem_args) def copy_constant_memory_args(self, cmem_args): - """adds constant memory arguments to the most recently compiled module""" + """Adds constant memory arguments to the most recently compiled module.""" self.dev.copy_constant_memory_args(cmem_args) def copy_texture_memory_args(self, texmem_args): - """adds texture memory arguments to the most recently compiled module""" + """Adds texture memory arguments to the most recently compiled module.""" self.dev.copy_texture_memory_args(texmem_args) def create_kernel_instance(self, kernel_source, kernel_options, params, verbose): - """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on""" + """Create kernel instance from kernel source, parameters, problem size, grid divisors, and so on.""" grid_div = ( kernel_options.grid_div_x, kernel_options.grid_div_y, @@ -702,15 +714,15 @@ def create_kernel_instance(self, kernel_source, kernel_options, params, verbose) return KernelInstance(name, kernel_source, kernel_string, temp_files, threads, grid, params, arguments) def get_environment(self): - """Return dictionary with information about the environment""" + """Return dictionary with information about the environment.""" return self.dev.env def memcpy_dtoh(self, dest, src): - """perform a device to host memory copy""" + """Perform a device to host memory copy.""" self.dev.memcpy_dtoh(dest, src) def ready_argument_list(self, arguments): - """ready argument list to be passed to the kernel, allocates gpu mem if necessary""" + """Ready argument list to be passed to the kernel, allocates gpu mem if necessary.""" flat_args = [] # Flatten all arguments into a single list. Required to deal with `Tunable`s @@ -737,7 +749,7 @@ def ready_argument_list(self, arguments): return gpu_args def run_kernel(self, func, gpu_args, instance): - """Run a compiled kernel instance on a device""" + """Run a compiled kernel instance on a device.""" logging.debug("run_kernel %s", instance.name) logging.debug("thread block dims (%d, %d, %d)", *instance.threads) logging.debug("grid dims (%d, %d, %d)", *instance.grid) @@ -755,7 +767,7 @@ def run_kernel(self, func, gpu_args, instance): def _preprocess_gpu_arguments(old_arguments, params): - """Get a flat list of arguments based on the configuration given by `params`""" + """Get a flat list of arguments based on the configuration given by `params`.""" new_arguments = [] for argument in old_arguments: @@ -768,8 +780,7 @@ def _preprocess_gpu_arguments(old_arguments, params): def _default_verify_function(instance, answer, result_host, atol, verbose): - """default verify function based on np.allclose""" - + """Default verify function based on np.allclose.""" # first check if the length is the same if len(instance.arguments) != len(answer): raise TypeError("The length of argument list and provided results do not match.") @@ -886,7 +897,7 @@ def _flatten(a): # these functions facilitate compiling templated kernels with PyCuda def split_argument_list(argument_list): - """split all arguments in a list into types and names""" + """Split all arguments in a list into types and names.""" regex = r"(.*[\s*]+)(.+)?" type_list = [] name_list = [] @@ -900,10 +911,10 @@ def split_argument_list(argument_list): def apply_template_typenames(type_list, templated_typenames): - """replace the typename tokens in type_list with their templated typenames""" + """Replace the typename tokens in type_list with their templated typenames.""" def replace_typename_token(matchobj): - """function for a whitespace preserving token regex replace""" + """Function for a whitespace preserving token regex replace.""" # replace only the match, leaving the whitespace around it as is return matchobj.group(1) + templated_typenames[matchobj.group(2)] + matchobj.group(3) @@ -917,7 +928,7 @@ def replace_typename_token(matchobj): def get_templated_typenames(template_parameters, template_arguments): - """based on the template parameters and arguments, create dict with templated typenames""" + """Based on the template parameters and arguments, create dict with templated typenames.""" templated_typenames = {} for i, param in enumerate(template_parameters): if "typename " in param: @@ -927,7 +938,7 @@ def get_templated_typenames(template_parameters, template_arguments): def wrap_templated_kernel(kernel_string, kernel_name): - """rewrite kernel_string to insert wrapper function for templated kernel""" + """Rewrite kernel_string to insert wrapper function for templated kernel.""" # parse kernel_name to find template_arguments and real kernel name name = kernel_name.split("<")[0] template_arguments = re.search(r".*?<(.*)>", kernel_name, re.S).group(1).split(",") diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py index e5d3dcb90..2b75cc023 100644 --- a/kernel_tuner/file_utils.py +++ b/kernel_tuner/file_utils.py @@ -1,19 +1,43 @@ """This module contains utility functions for operations on files, mostly JSON cache files.""" import json -import os import subprocess from importlib.metadata import PackageNotFoundError, requires, version from pathlib import Path from sys import platform +import jsonschema import xmltodict from packaging.requirements import Requirement from kernel_tuner import util -schema_dir = os.path.dirname(os.path.realpath(__file__)) + "/schema" +schema_dir = Path(__file__).parent / "schema" +def input_file_schema(): + """Get the requested JSON input schema and the version number. + + :returns: the current version of the T1 schemas and the JSON string of the schema + :rtype: string, string + """ + current_version = "1.0.0" + input_file = schema_dir.joinpath(f"T1/{current_version}/input-schema.json") + with input_file.open() as fh: + json_string = json.load(fh) + return current_version, json_string + +def get_input_file(filepath: Path, validate=True) -> dict[str, any]: + """Load the T1 input file from the given path, validates it and returns contents if valid. + + :param filepath: Path to the input file to load. + :returns: the contents of the file if valid. + """ + with filepath.open() as fp: + input_file = json.load(fp) + if validate: + _, input_schema = input_file_schema() + jsonschema.validate(input_file, input_schema) + return input_file def output_file_schema(target): """Get the requested JSON schema and the version number. @@ -26,8 +50,8 @@ def output_file_schema(target): """ current_version = "1.0.0" - output_file = schema_dir + f"/T4/{current_version}/{target}-schema.json" - with open(output_file, "r") as fh: + output_file = schema_dir.joinpath(f"T4/{current_version}/{target}-schema.json") + with output_file.open() as fh: json_string = json.load(fh) return current_version, json_string @@ -63,13 +87,10 @@ def make_filenamepath(filenamepath: Path): filepath.mkdir() -def store_output_file(output_filename: str, results, tune_params, objective="time"): - """Store the obtained auto-tuning results in a JSON output file. +def get_t4_results(results, tune_params, objective="time"): + """Get the obtained auto-tuning results in a dictionary. - This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema. - - :param output_filename: Name or 'path / name' of the to be created output file - :type output_filename: string + This function produces a dictionary that adheres to the T4 auto-tuning output JSON schema. :param results: Results list as return by tune_kernel :type results: list of dicts @@ -81,9 +102,6 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim :type objective: string """ - output_filenamepath = Path(filename_ensure_json_extension(output_filename)) - make_filenamepath(output_filenamepath) - timing_keys = ["compile_time", "benchmark_time", "framework_time", "strategy_time", "verification_time"] not_measurement_keys = list(tune_params.keys()) + timing_keys + ["timestamp"] + ["times"] @@ -134,7 +152,30 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim # write output_data to a JSON file version, _ = output_file_schema("results") - output_json = dict(results=output_data, schema_version=version) + output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'miliseconds'}) + return output_json + +def store_output_file(output_filename: str, results, tune_params, objective="time"): + """Store the obtained auto-tuning results in a JSON output file. + + This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema. + + :param output_filename: Name or 'path / name' of the to be created output file + :type output_filename: string + + :param results: Results list as return by tune_kernel + :type results: list of dicts + + :param tune_params: Tunable parameters as passed to tune_kernel + :type tune_params: dict + + :param objective: The objective used during auto-tuning, default is 'time'. + :type objective: string + + """ + output_filenamepath = Path(filename_ensure_json_extension(output_filename)) + make_filenamepath(output_filenamepath) + output_json = get_t4_results(results, tune_params, objective) with open(output_filenamepath, "w+") as fh: json.dump(output_json, fh, cls=util.NpEncoder) @@ -175,17 +216,11 @@ def get_device_query(target): raise ValueError("get_device_query target not supported") -def store_metadata_file(metadata_filename: str): - """Store the metadata about the current hardware and software environment in a JSON output file. - - This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema. - - :param metadata_filename: Name or 'path / name' of the to be created metadata file - :type metadata_filename: string +def get_t4_metadata(): + """Get the metadata about the current hardware and software environment. + This function produces a dictionary that adheres to the T4 auto-tuning metadata JSON schema. """ - metadata_filenamepath = Path(filename_ensure_json_extension(metadata_filename)) - make_filenamepath(metadata_filenamepath) metadata = {} supported_operating_systems = ["linux", "win32", "darwin"] @@ -250,5 +285,20 @@ def store_metadata_file(metadata_filename: str): # write metadata to JSON file version, _ = output_file_schema("metadata") metadata_json = dict(metadata=metadata, schema_version=version) + return metadata_json + +def store_metadata_file(metadata_filename: str): + """Store the metadata about the current hardware and software environment in a JSON output file. + + This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema. + + :param metadata_filename: Name or 'path / name' of the to be created metadata file + :type metadata_filename: string + + """ + metadata_filenamepath = Path(filename_ensure_json_extension(metadata_filename)) + make_filenamepath(metadata_filenamepath) + metadata_json = get_t4_metadata() with open(metadata_filenamepath, "w+") as fh: json.dump(metadata_json, fh, indent=" ") + diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py index f002882f3..867f2ac0e 100644 --- a/kernel_tuner/hyper.py +++ b/kernel_tuner/hyper.py @@ -1,15 +1,23 @@ -""" Module for functions related to hyperparameter optimization """ +"""Module for functions related to hyperparameter optimization.""" -import itertools -import warnings -import numpy as np + +from pathlib import Path +from random import randint import kernel_tuner -from kernel_tuner.util import get_config_string -def tune_hyper_params(target_strategy, hyper_params, *args, **kwargs): - """ Tune hyperparameters for a given strategy and kernel +def get_random_unique_filename(prefix = '', suffix=''): + """Get a random, unique filename that does not yet exist.""" + def randpath(): + return Path(f"{prefix}{randint(1000, 9999)}{suffix}") + path = randpath() + while path.exists(): + path = randpath() + return path + +def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs): + """Tune hyperparameters for a given strategy and kernel. This function is to be called just like tune_kernel, except that you specify a strategy and a dictionary with hyperparameters in front of the arguments you pass to tune_kernel. @@ -32,58 +40,59 @@ def tune_hyper_params(target_strategy, hyper_params, *args, **kwargs): :type kwargs: dict """ - if "cache" not in kwargs: - raise ValueError("Please specify a cachefile to store benchmarking data when tuning hyperparameters") + # v Have the methodology as a dependency + # - User inputs: + # - a set of bruteforced cachefiles / template experiments file + # - an optimization algorithm + # - the hyperparameter values to try + # - overarching optimization algorithm (meta-strategy) + # - At each round: + # - The meta-strategy selects a hyperparameter configuration to try + # - Kernel Tuner generates an experiments file with the hyperparameter configuration + # - Kernel Tuner executes this experiments file using the methodology + # - The methodology returns the fitness metric + # - The fitness metric is fed back into the meta-strategy + + iterations = 1 + if "iterations" in kwargs: + iterations = kwargs['iterations'] + del kwargs['iterations'] + + # pass a temporary cache file to avoid duplicate execution + cachefile = get_random_unique_filename('temp_', '.json') + kwargs['cache'] = str(cachefile) def put_if_not_present(target_dict, key, value): target_dict[key] = value if key not in target_dict else target_dict[key] - put_if_not_present(kwargs, "verbose", False) - put_if_not_present(kwargs, "quiet", True) - put_if_not_present(kwargs, "simulation_mode", True) - kwargs['strategy'] = 'brute_force' - - #last position argument is tune_params - tune_params = args[-1] - - #find optimum - kwargs["strategy"] = "brute_force" - results, _ = kernel_tuner.tune_kernel(*args, **kwargs) - optimum = min(results, key=lambda p: p["time"])["time"] - - #could throw a warning for the kwargs that will be overwritten, strategy(_options) - kwargs["strategy"] = target_strategy - - parameter_space = itertools.product(*hyper_params.values()) - all_results = [] - - for params in parameter_space: - strategy_options = dict(zip(hyper_params.keys(), params)) - - kwargs["strategy_options"] = strategy_options - - fevals = [] - p_of_opt = [] - for _ in range(100): - #measure - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - results, _ = kernel_tuner.tune_kernel(*args, **kwargs) - - #get unique function evaluations - unique_fevals = {",".join([str(v) for k, v in record.items() if k in tune_params]) - for record in results} - - fevals.append(len(unique_fevals)) - p_of_opt.append(min(results, key=lambda p: p["time"])["time"] / optimum * 100) - - strategy_options["fevals"] = np.average(fevals) - strategy_options["fevals_std"] = np.std(fevals) - - strategy_options["p_of_opt"] = np.average(p_of_opt) - strategy_options["p_of_opt_std"] = np.std(p_of_opt) - - print(get_config_string(strategy_options)) - all_results.append(strategy_options) - - return all_results + put_if_not_present(kwargs, "verbose", True) + put_if_not_present(kwargs, "quiet", False) + kwargs['simulation_mode'] = False + kwargs['strategy'] = 'dual_annealing' + kwargs['verify'] = None + arguments = [target_strategy] + + # execute the hyperparameter tuning + result, env = kernel_tuner.tune_kernel('hyperparamtuning', None, [], arguments, hyper_params, *args, lang='Hypertuner', + objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs) + + # remove the temporary cachefile and return only unique results in order + cachefile.unlink() + result_unique = dict() + for r in result: + config_id = ",".join(str(r[k]) for k in hyper_params.keys()) + if config_id not in result_unique: + result_unique[config_id] = r + return list(result_unique.values()), env + +if __name__ == "__main__": + hyperparams = { + 'popsize': [10, 20, 30], + 'maxiter': [50, 100, 150], + 'w': [0.25, 0.5, 0.75], + 'c1': [1.0, 2.0, 3.0], + 'c2': [0.5, 1.0, 1.5] + } + result, env = tune_hyper_params('pso', hyperparams) + print(result) + print(env['best_config']) diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index bd421aeab..2a420705f 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -23,14 +23,19 @@ See the License for the specific language governing permissions and limitations under the License. """ + import logging +from argparse import ArgumentParser +from ast import literal_eval from datetime import datetime +from pathlib import Path from time import perf_counter import numpy import kernel_tuner.core as core import kernel_tuner.util as util +from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results from kernel_tuner.integration import get_objective_defaults from kernel_tuner.runners.sequential import SequentialRunner from kernel_tuner.runners.simulation import SimulationRunner @@ -399,7 +404,7 @@ def __deepcopy__(self, _): All strategies support the following two options: 1. "max_fevals": the maximum number of unique valid function evaluations (i.e. compiling and - benchmarking a kernel configuration the strategy is allowed to perform as part of the optimization. + benchmarking a kernel configuration) the strategy is allowed to perform as part of the optimization. Note that some strategies implement a default max_fevals of 100. 2. "time_limit": the maximum amount of time in seconds the strategy is allowed to spent on trying to @@ -607,16 +612,24 @@ def tune_kernel( tuning_options = Options([(k, opts[k]) for k in _tuning_options.keys()]) device_options = Options([(k, opts[k]) for k in _device_options.keys()]) tuning_options["unique_results"] = {} - if strategy_options and "max_fevals" in strategy_options: - tuning_options["max_fevals"] = strategy_options["max_fevals"] - if strategy_options and "time_limit" in strategy_options: - tuning_options["time_limit"] = strategy_options["time_limit"] + # copy some values from strategy_options + searchspace_construction_options = {} + if strategy_options: + if "max_fevals" in strategy_options: + tuning_options["max_fevals"] = strategy_options["max_fevals"] + if "time_limit" in strategy_options: + tuning_options["time_limit"] = strategy_options["time_limit"] + if "searchspace_construction_options" in strategy_options: + searchspace_construction_options = strategy_options["searchspace_construction_options"] + + # log the user inputs logging.debug("tune_kernel called") logging.debug("kernel_options: %s", util.get_config_string(kernel_options)) logging.debug("tuning_options: %s", util.get_config_string(tuning_options)) logging.debug("device_options: %s", util.get_config_string(device_options)) + # check whether the selected strategy and options are valid if strategy: if strategy in strategy_map: strategy = strategy_map[strategy] @@ -660,6 +673,8 @@ def tune_kernel( # process cache if cache: + if isinstance(cache, Path): + cache = str(cache.resolve()) if cache[-5:] != ".json": cache += ".json" @@ -669,14 +684,23 @@ def tune_kernel( tuning_options.cachefile = None # create search space - searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads) + searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads, **searchspace_construction_options) restrictions = searchspace._modified_restrictions tuning_options.restrictions = restrictions if verbose: print(f"Searchspace has {searchspace.size} configurations after restrictions.") - # call the strategy to execute the tuning process + # register the times and raise an exception if the budget is exceeded + if "time_limit" in tuning_options: + tuning_options["startup_time"] = perf_counter() - start_overhead_time + if tuning_options["startup_time"] > tuning_options["time_limit"]: + raise RuntimeError( + f"The startup time of the tuning process ({tuning_options['startup_time']} seconds) has exceeded the time limit ({tuning_options['time_limit']} seconds). " + "Please increase the time limit or decrease the size of the search space." + ) tuning_options["start_time"] = perf_counter() + + # call the strategy to execute the tuning process results = strategy.tune(searchspace, runner, tuning_options) env = runner.get_environment(tuning_options) @@ -684,7 +708,7 @@ def tune_kernel( if results: # checks if results is not empty best_config = util.get_best_config(results, objective, objective_higher_is_better) # add the best configuration to env - env['best_config'] = best_config + env["best_config"] = best_config if not device_options.quiet: units = getattr(runner, "units", None) print("best performing configuration:") @@ -835,3 +859,126 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names): # check for types and length of block_size_names util.check_block_size_names(block_size_names) + + +def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation_mode = False, output_T4 = True, iterations = 7, strategy_options = None): + """Call the tune function with a T1 input file.""" + inputs = get_input_file(input_filepath) + kernelspec: dict = inputs["KernelSpecification"] + kernel_name: str = kernelspec["KernelName"] + kernel_filepath = Path(kernelspec["KernelFile"]) + kernel_source = ( + kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent.parent / kernel_filepath + ) + assert kernel_source.exists(), f"KernelFile '{kernel_source}' does not exist at {kernel_source.resolve()}" + language: str = kernelspec["Language"] + problem_size = kernelspec["ProblemSize"] + device = kernelspec["Device"]["Name"] + strategy = inputs["Search"]["Name"] + + if cache_filepath is None and "SimulationInput" in kernelspec: + cache_filepath = Path(kernelspec["SimulationInput"]) + + # get the grid divisions + grid_divs = {} + for grid_div in ["GridDivX", "GridDivY", "GridDivZ"]: + grid_divs[grid_div] = None + if grid_div in kernelspec and len(kernelspec[grid_div]) > 0: + grid_divs[grid_div] = kernelspec[grid_div] + + # convert tuneable parameters + tune_params = dict() + for param in inputs["ConfigurationSpace"]["TuningParameters"]: + tune_param = None + if param["Type"] in ["int", "float"]: + vals = param["Values"] + if vals[:5] == "list(" or (vals[0] == "[" and vals[-1] == "]"): + tune_param = eval(vals) + else: + tune_param = literal_eval(vals) + if tune_param is not None: + tune_params[param["Name"]] = tune_param + else: + raise NotImplementedError(f"Conversion for this type of parameter has not yet been implemented: {param}") + + # convert restrictions + restrictions = list() + for res in inputs["ConfigurationSpace"]["Conditions"]: + restriction = None + if isinstance(res["Expression"], str): + restriction = res["Expression"] + if restriction is not None: + restrictions.append(restriction) + else: + raise NotImplementedError(f"Conversion for this type of restriction has not yet been implemented: {res}") + + # convert arguments (must be after resolving tune_params) + arguments = list() + cmem_arguments = {} + for arg in kernelspec["Arguments"]: + argument = None + if arg["Type"] == "float" and arg["MemoryType"] == "Vector": + size = arg["Size"] + if isinstance(size, str): + args = tune_params.copy() + args["ProblemSize"] = problem_size + size = int(eval(size, args)) + if not isinstance(size, int): + raise TypeError(f"Size should be an integer, but is {size} (type ({type(size)}, from {arg['Size']}))") + if arg["FillType"] == "Constant": + argument = numpy.full(size, arg["FillValue"]).astype(numpy.float32) + elif arg["FillType"] == "Random": + argument = numpy.random.randn(size).astype(numpy.float32) + else: + raise NotImplementedError(f"Conversion for fill type '{arg['FillType']}' has not yet been implemented") + if argument is not None: + arguments.append(argument) + if "MemType" in arg and arg["MemType"] == "Constant": + cmem_arguments[arg["Name"]] = argument + else: + raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}") + + # tune with the converted inputs + # TODO add objective to tune_kernel and get_t4_results calls once available in T1 + results, env = tune_kernel( + kernel_name, + kernel_source, + problem_size, + arguments, + tune_params, + device=device, + grid_div_x=grid_divs["GridDivX"], + grid_div_y=grid_divs["GridDivY"], + grid_div_z=grid_divs["GridDivZ"], + cmem_args=cmem_arguments, + restrictions=restrictions, + lang=language, + cache=cache_filepath, + simulation_mode=simulation_mode, + quiet=True, + verbose=False, + iterations=iterations, + strategy=strategy, + strategy_options=strategy_options + ) + if output_T4: + return get_t4_metadata(), get_t4_results(results, tune_params) + return results, env + + +def entry_point(args=None): # pragma: no cover + """Command-line interface entry point.""" + cli = ArgumentParser() + cli.add_argument("input_file", type=str, help="The path to the input json file to execute (T1 standard)") + cli.add_argument( + "cache_file", type=str, help="The path to the cachefile to use (optional)", required=False, default=None + ) + args = cli.parse_args(args) + input_filepath_arg: str = args.input_file + if input_filepath_arg is None or input_filepath_arg == "": + raise ValueError("Invalid '--input_file' option. Run 'kernel_tuner -h' to read more.") + input_filepath = Path(input_filepath_arg) + cachefile_filepath_arg = args.cache_file + if cachefile_filepath_arg is not None: + cachefile_filepath_arg = Path(cachefile_filepath_arg) + tune_kernel_T1(input_filepath, cache_filepath=cachefile_filepath_arg) diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py index aeebd5116..5e53093be 100644 --- a/kernel_tuner/runners/sequential.py +++ b/kernel_tuner/runners/sequential.py @@ -34,7 +34,7 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob self.units = self.dev.units self.quiet = device_options.quiet self.kernel_source = kernel_source - self.warmed_up = False + self.warmed_up = False if self.dev.requires_warmup else True self.simulation_mode = False self.start_time = perf_counter() self.last_strategy_start_time = self.start_time diff --git a/kernel_tuner/schema/T1/1.0.0/input-schema.json b/kernel_tuner/schema/T1/1.0.0/input-schema.json new file mode 100644 index 000000000..bb53ee594 --- /dev/null +++ b/kernel_tuner/schema/T1/1.0.0/input-schema.json @@ -0,0 +1,412 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/odgaard/TuningSchema/blob/main/TuningSchema.json", + "title": "Tuning format", + "description": "A description of a tuning problem which can be loaded by an autotuning framework", + "type": "object", + "required": [ + "ConfigurationSpace", + "KernelSpecification" + ], + "properties": { + "ConfigurationSpace": { + "type": "object", + "required": [ + "TuningParameters" + ], + "properties": { + "TuningParameters": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Name", + "Type", + "Values" + ], + "properties": { + "Name": { + "type": "string" + }, + "Type": { + "enum": [ + "int", + "uint", + "float", + "bool", + "string" + ] + }, + "Values": { + "type": "string" + } + } + } + }, + "Conditions": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Parameters", + "Expression" + ], + "properties": { + "Parameters": { + "type": "array", + "items": { + "type": "string" + } + }, + "Expression": { + "type": "string" + } + } + } + } + } + }, + "Search": { + "type": "object", + "required": [ + "Name" + ], + "properties": { + "Name": { + "type": "string" + }, + "Attributes": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Name", + "Value" + ], + "properties": { + "Name": { + "type": "string" + }, + "Value": { + "type": [ + "number", + "string", + "boolean", + "object", + "array" + ] + } + } + } + } + } + }, + "Budget": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Type", + "BudgetValue" + ], + "properties": { + "Type": { + "enum": [ + "TuningDuration", + "ConfigurationCount", + "ConfigurationFraction" + ] + }, + "BudgetValue": { + "type": "number" + } + } + } + }, + "General": { + "type": "object", + "properties": { + "FormatVersion": { + "type": "integer" + }, + "LoggingLevel": { + "enum": [ + "Off", + "Error", + "Warning", + "Info", + "Debug" + ] + }, + "TimeUnit": { + "enum": [ + "Nanoseconds", + "Microseconds", + "Milliseconds", + "Seconds" + ] + }, + "OutputFile": { + "type": "string", + "examples": [ + "ReductionOutput", + "Results" + ] + }, + "OutputFormat": { + "enum": [ + "JSON", + "XML" + ] + } + } + }, + "KernelSpecification": { + "type": "object", + "required": [ + "Language", + "KernelName", + "KernelFile", + "GlobalSize", + "LocalSize" + ], + "properties": { + "Device": { + "type": "object", + "properties": { + "PlatformId": { + "type": "integer" + }, + "DeviceId": { + "type": "integer" + }, + "Name": { + "type": "string" + } + } + }, + "Language": { + "enum": [ + "OpenCL", + "CUDA", + "Vulkan" + ] + }, + "CompilerOptions": { + "type": "array", + "items": { + "type": "string" + } + }, + "Profiling": { + "type": "boolean" + }, + "KernelName": { + "type": "string" + }, + "KernelFile": { + "type": "string" + }, + "GlobalSizeType": { + "enum": [ + "OpenCL", + "CUDA", + "Vulkan" + ] + }, + "SharedMemory": { + "type": "integer" + }, + "SimulationInput": { + "type": "string" + }, + "GlobalSize": { + "type": "object", + "required": [ + "X" + ], + "properties": { + "X": { + "type": "string" + }, + "Y": { + "type": "string" + }, + "Z": { + "type": "string" + } + } + }, + "LocalSize": { + "type": "object", + "required": [ + "X" + ], + "properties": { + "X": { + "type": "string" + }, + "Y": { + "type": "string" + }, + "Z": { + "type": "string" + } + } + }, + "Arguments": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Type", + "MemoryType" + ], + "properties": { + "Name": { + "type": "string" + }, + "Type": { + "enum": [ + "bool", + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", + "half", + "half2", + "half4", + "half8", + "half16", + "float", + "float2", + "float4", + "float8", + "float16", + "double", + "double2", + "double4", + "double8", + "double16", + "custom" + ] + }, + "Size": { + "type": [ + "integer", + "string" + ], + "examples": [ + 720, + 26000, + "ProblemSize[0]+max(filter_width)-1" + ] + }, + "TypeSize": { + "type": "integer", + "examples": [ + 4, + 16 + ] + }, + "FillType": { + "enum": [ + "Constant", + "Random", + "Generator", + "Script", + "BinaryRaw", + "BinaryHDF" + ] + }, + "FillValue": { + "type": "number", + "examples": [ + 40, + 1.0 + ] + }, + "DataSource": { + "type": "string" + }, + "RandomSeed": { + "type": "integer" + }, + "AccessType": { + "enum": [ + "ReadOnly", + "WriteOnly", + "ReadWrite" + ] + }, + "MemoryType": { + "enum": [ + "Scalar", + "Vector", + "Local", + "Symbol" + ] + } + } + } + }, + "ReferenceArguments": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Name", + "TargetName", + "FillType" + ], + "properties": { + "Name": { + "type": "string" + }, + "TargetName": { + "type": "string" + }, + "FillType": { + "enum": [ + "Constant", + "Random", + "Generator", + "Script", + "BinaryRaw", + "BinaryHDF" + ] + }, + "FillValue": { + "type": "number", + "examples": [ + 40, + 1.0 + ] + }, + "DataSource": { + "type": "string" + }, + "RandomSeed": { + "type": "integer" + }, + "ValidationMethod": { + "enum": [ + "AbsoluteDifference", + "SideBySideComparison", + "SideBySideRelativeComparison" + ] + }, + "ValidationThreshold": { + "type": "number" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index 36e772639..d9b310f4e 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -12,13 +12,19 @@ MaxProdConstraint, MinConflictsSolver, OptimizedBacktrackingSolver, + # ParallelSolver, Problem, RecursiveBacktrackingSolver, Solver, ) from kernel_tuner.util import check_restrictions as check_instance_restrictions -from kernel_tuner.util import compile_restrictions, default_block_size_names +from kernel_tuner.util import ( + compile_restrictions, + convert_constraint_lambdas, + default_block_size_names, + get_interval, +) supported_neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming"] @@ -57,7 +63,8 @@ def __init__( self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values()) self.params_values_indices = None self.build_neighbors_index = build_neighbors_index - self.__neighbor_cache = dict() + self.solver_method = solver_method + self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods } self.neighbor_method = neighbor_method if (neighbor_method is not None or build_neighbors_index) and neighbor_method not in supported_neighbor_methods: raise ValueError(f"Neighbor method is {neighbor_method}, must be one of {supported_neighbor_methods}") @@ -67,10 +74,15 @@ def __init__( if ( len(restrictions) > 0 and any(isinstance(restriction, str) for restriction in restrictions) - and not (framework_l == "pysmt" or framework_l == "pyatf" or framework_l == "bruteforce") + and not ( + framework_l == "pysmt" or framework_l == "bruteforce" or framework_l == "pythonconstraint" or solver_method.lower() == "pc_parallelsolver" + ) ): self.restrictions = compile_restrictions( - restrictions, tune_params, monolithic=False, try_to_constraint=framework_l == "pythonconstraint" + restrictions, + tune_params, + monolithic=False, + format=framework_l if framework_l == "pyatf" else None, ) # get the framework given the framework argument @@ -86,7 +98,7 @@ def __init__( elif framework_l == "bruteforce": searchspace_builder = self.__build_searchspace_bruteforce else: - raise ValueError(f"Invalid framework parameter {framework}") + raise ValueError(f"Invalid framework parameter '{framework}'") # get the solver given the solver method argument solver = "" @@ -94,6 +106,9 @@ def __init__( solver = BacktrackingSolver() elif solver_method.lower() == "pc_optimizedbacktrackingsolver": solver = OptimizedBacktrackingSolver(forwardcheck=False) + elif solver_method.lower() == "pc_parallelsolver": + raise NotImplementedError("ParallelSolver is not yet implemented") + # solver = ParallelSolver() elif solver_method.lower() == "pc_recursivebacktrackingsolver": solver = RecursiveBacktrackingSolver() elif solver_method.lower() == "pc_minconflictssolver": @@ -145,7 +160,7 @@ def __init__( # num_solutions: int = csp.n_solutions() # number of solutions # solutions = [csp.values(sol=i) for i in range(num_solutions)] # list of solutions - def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: int, solver = None): + def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: int, solver=None): # bruteforce solving of the searchspace from itertools import product @@ -167,9 +182,15 @@ def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: in restrictions = [restrictions] block_size_restriction_spaced = f"{' * '.join(used_block_size_names)} <= {max_threads}" block_size_restriction_unspaced = f"{'*'.join(used_block_size_names)} <= {max_threads}" - if block_size_restriction_spaced not in restrictions and block_size_restriction_unspaced not in restrictions: + if ( + block_size_restriction_spaced not in restrictions + and block_size_restriction_unspaced not in restrictions + ): restrictions.append(block_size_restriction_spaced) - if isinstance(self._modified_restrictions, list) and block_size_restriction_spaced not in self._modified_restrictions: + if ( + isinstance(self._modified_restrictions, list) + and block_size_restriction_spaced not in self._modified_restrictions + ): self._modified_restrictions.append(block_size_restriction_spaced) if isinstance(self.restrictions, list): self.restrictions.append(block_size_restriction_spaced) @@ -252,25 +273,74 @@ def all_smt(formula, keys) -> list: def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, solver: Solver): """Builds the searchspace using pyATF.""" - from pyatf import TP, Set, Tuner + from pyatf import TP, Interval, Set, Tuner from pyatf.cost_functions.generic import CostFunction from pyatf.search_techniques import Exhaustive - costfunc = CostFunction("echo 'hello'") + # Define a bogus cost function + costfunc = CostFunction(":") # bash no-op + # add the Kernel Tuner default blocksize threads restrictions + assert isinstance(self.restrictions, list) + valid_block_size_names = list( + block_size_name for block_size_name in block_size_names if block_size_name in self.param_names + ) + if len(valid_block_size_names) > 0: + # adding the default blocksize restriction requires recompilation because pyATF requires combined restrictions for the same parameter + max_block_size_product = f"{' * '.join(valid_block_size_names)} <= {max_threads}" + restrictions = self._modified_restrictions.copy() + [max_block_size_product] + self.restrictions = compile_restrictions(restrictions, self.tune_params, format="pyatf") + + # build a dictionary of the restrictions, combined based on last parameter + res_dict = dict() + registered_params = list() + registered_restrictions = list() + for param in self.tune_params.keys(): + registered_params.append(param) + for index, (res, params, source) in enumerate(self.restrictions): + if index in registered_restrictions: + continue + if all(p in registered_params for p in params): + if param in res_dict: + raise KeyError( + f"`{param}` is already in res_dict with `{res_dict[param][1]}`, can't add `{source}`" + ) + res_dict[param] = (res, source) + print(source, res, param, params) + registered_restrictions.append(index) + + # define the Tunable Parameters def get_params(): - params = List() - for key, values in self.tune_params.items(): - TP(key, Set(values)) + params = list() + for index, (key, values) in enumerate(self.tune_params.items()): + vi = get_interval(values) + vals = ( + Interval(vi[0], vi[1], vi[2]) if vi is not None and vi[2] != 0 else Set(*np.array(values).flatten()) + ) + constraint = res_dict.get(key, None) + constraint_source = None + if constraint is not None: + constraint, constraint_source = constraint + # in case of a leftover monolithic restriction, append at the last parameter + if index == len(self.tune_params) - 1 and len(res_dict) == 0 and len(self.restrictions) == 1: + res, params, source = self.restrictions[0] + assert callable(res) + constraint = res + params.append(TP(key, vals, constraint, constraint_source)) return params - tuning_result = ( - Tuner() - .tuning_parameters(*get_params()) - .search_technique(Exhaustive()) - .tune(costfunc) + # tune + _, _, tuning_data = ( + Tuner().verbosity(0).tuning_parameters(*get_params()).search_technique(Exhaustive()).tune(costfunc) ) - return tuning_result + + # transform the result into a list of parameter configurations for validation + tune_params = self.tune_params + parameter_tuple_list = list() + for entry in tuning_data.history._entries: + parameter_tuple_list.append(tuple(entry.configuration[p] for p in tune_params.keys())) + pl = self.__parameter_space_list_to_lookup_and_return_type(parameter_tuple_list) + return pl def __build_searchspace_ATF_cache(self, block_size_names: list, max_threads: int, solver: Solver): """Imports the valid configurations from an ATF CSV file, returns the searchspace, a dict of the searchspace for fast lookups and the size.""" @@ -305,7 +375,7 @@ def __parameter_space_list_to_lookup_and_return_type( parameter_space_dict, size_list, ) - + def __build_searchspace(self, block_size_names: list, max_threads: int, solver: Solver): """Compute valid configurations in a search space based on restrictions and max_threads.""" # instantiate the parameter space with all the variables @@ -314,6 +384,9 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver: parameter_space.addVariable(str(param_name), param_values) # add the user-specified restrictions as constraints on the parameter space + if not isinstance(self.restrictions, (list, tuple)): + self.restrictions = [self.restrictions] + self.restrictions = convert_constraint_lambdas(self.restrictions) parameter_space = self.__add_restrictions(parameter_space) # add the default blocksize threads restrictions last, because it is unlikely to reduce the parameter space by much @@ -323,10 +396,13 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver: if len(valid_block_size_names) > 0: parameter_space.addConstraint(MaxProdConstraint(max_threads), valid_block_size_names) max_block_size_product = f"{' * '.join(valid_block_size_names)} <= {max_threads}" - if isinstance(self._modified_restrictions, list) and max_block_size_product not in self._modified_restrictions: + if ( + isinstance(self._modified_restrictions, list) + and max_block_size_product not in self._modified_restrictions + ): self._modified_restrictions.append(max_block_size_product) if isinstance(self.restrictions, list): - self.restrictions.append((MaxProdConstraint(max_threads), valid_block_size_names)) + self.restrictions.append((MaxProdConstraint(max_threads), valid_block_size_names, None)) # construct the parameter space with the constraints applied return parameter_space.getSolutionsAsListDict(order=self.param_names) @@ -337,21 +413,29 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem: for restriction in self.restrictions: required_params = self.param_names - # convert to a Constraint type if necessary - if isinstance(restriction, tuple): - restriction, required_params = restriction + # (un)wrap where necessary + if isinstance(restriction, tuple) and len(restriction) >= 2: + required_params = restriction[1] + restriction = restriction[0] if callable(restriction) and not isinstance(restriction, Constraint): - restriction = FunctionConstraint(restriction) - - # add the Constraint + # def restrictions_wrapper(*args): + # return check_instance_restrictions(restriction, dict(zip(self.param_names, args)), False) + # print(restriction, isinstance(restriction, Constraint)) + # restriction = FunctionConstraint(restrictions_wrapper) + restriction = FunctionConstraint(restriction, required_params) + + # add as a Constraint + all_params_required = all(param_name in required_params for param_name in self.param_names) + variables = None if all_params_required else required_params if isinstance(restriction, FunctionConstraint): - parameter_space.addConstraint(restriction, required_params) + parameter_space.addConstraint(restriction, variables) elif isinstance(restriction, Constraint): - all_params_required = all(param_name in required_params for param_name in self.param_names) - parameter_space.addConstraint( - restriction, - None if all_params_required else required_params - ) + parameter_space.addConstraint(restriction, variables) + elif isinstance(restriction, str): + if self.solver_method.lower() == "pc_parallelsolver": + parameter_space.addConstraint(restriction) + else: + parameter_space.addConstraint(restriction, variables) else: raise ValueError(f"Unrecognized restriction {restriction}") @@ -674,24 +758,25 @@ def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=No raise ValueError(f"The neighbor method {neighbor_method} is not in {supported_neighbor_methods}") def get_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> List[int]: - """Get the neighbors indices for a parameter configuration, possibly cached.""" - neighbors = self.__neighbor_cache.get(param_config, None) + """Get the neighbors indices for a parameter configuration, cached if requested before.""" + if neighbor_method is None: + neighbor_method = self.neighbor_method + if neighbor_method is None: + raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set") + neighbors = self.__neighbor_cache[neighbor_method].get(param_config, None) # if there are no cached neighbors, compute them if neighbors is None: neighbors = self.get_neighbors_indices_no_cache(param_config, neighbor_method) - self.__neighbor_cache[param_config] = neighbors - # if the neighbors were cached but the specified neighbor method was different than the one initially used to build the cache, throw an error - elif ( - self.neighbor_method is not None and neighbor_method is not None and self.neighbor_method != neighbor_method - ): - raise ValueError( - f"The neighbor method {neighbor_method} differs from the intially set {self.neighbor_method}, can not use cached neighbors. Use 'get_neighbors_no_cache()' when mixing neighbor methods to avoid this." - ) + self.__neighbor_cache[neighbor_method][param_config] = neighbors return neighbors - def are_neighbors_indices_cached(self, param_config: tuple) -> bool: + def are_neighbors_indices_cached(self, param_config: tuple, neighbor_method=None) -> bool: """Returns true if the neighbor indices are in the cache, false otherwise.""" - return param_config in self.__neighbor_cache + if neighbor_method is None: + neighbor_method = self.neighbor_method + if neighbor_method is None: + raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set") + return param_config in self.__neighbor_cache[neighbor_method] def get_neighbors_no_cache(self, param_config: tuple, neighbor_method=None) -> List[tuple]: """Get the neighbors for a parameter configuration (does not check running cache, useful when mixing neighbor methods).""" diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py index 024a3f8c0..663cb12c8 100644 --- a/kernel_tuner/strategies/bayes_opt.py +++ b/kernel_tuner/strategies/bayes_opt.py @@ -1,4 +1,5 @@ """Bayesian Optimization implementation from the thesis by Willemsen.""" + import itertools import time import warnings @@ -13,6 +14,7 @@ # BO imports from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies.common import CostFunc +from kernel_tuner.util import StopCriterionReached try: from sklearn.gaussian_process import GaussianProcessRegressor @@ -22,9 +24,7 @@ except ImportError: bayes_opt_present = False -from kernel_tuner import util - -supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"] +supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"] def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]: @@ -105,19 +105,8 @@ def tune(searchspace: Searchspace, runner, tuning_options): _, _, eps = cost_func.get_bounds_x0_eps() # compute cartesian product of all tunable parameters - parameter_space = itertools.product(*tune_params.values()) - - # check for search space restrictions - if searchspace.restrictions is not None: - tuning_options.verbose = False - parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space) - parameter_space = list(parameter_space) - if len(parameter_space) < 1: - raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.") - if len(parameter_space) == 1: - raise ValueError( - f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}" - ) + # TODO actually use the Searchspace object properly throughout Bayesian Optimization + parameter_space = searchspace.list # normalize search space to [0,1] normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps) @@ -135,18 +124,19 @@ def tune(searchspace: Searchspace, runner, tuning_options): # initialize and optimize try: bo = BayesianOptimization( - parameter_space, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func + parameter_space, searchspace, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func ) - except util.StopCriterionReached as e: - print( + except StopCriterionReached: + warnings.warn( "Stop criterion reached during initialization, was popsize (default 20) greater than max_fevals or the alotted time?" ) - raise e + return cost_func.results + # raise e try: if max_fevals - bo.fevals <= 0: raise ValueError("No function evaluations left for optimization after sampling") bo.optimize(max_fevals) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) @@ -162,7 +152,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): covariancelengthscale=("The covariance length scale", 1.5), method=( "The Bayesian Optimization method to use, choose any from " + ", ".join(supported_methods), - "multi-advanced", + "multi-ultrafast", ), samplingmethod=( "Method used for initial sampling the parameter space, either random or Latin Hypercube Sampling (LHS)", @@ -176,6 +166,7 @@ class BayesianOptimization: def __init__( self, searchspace: list, + searchspace_obj: Searchspace, removed_tune_params: list, tuning_options: dict, normalize_dict: dict, @@ -199,7 +190,7 @@ def get_hyperparam(name: str, default, supported_values=list()): # get hyperparameters cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels) cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5) - acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods) + acquisition_function = get_hyperparam("method", "multi-ultrafast", self.supported_methods) acq = acquisition_function acq_params = get_hyperparam("methodparams", {}) multi_af_names = get_hyperparam("multi_af_names", ["ei", "poi", "lcb"]) @@ -253,6 +244,7 @@ def get_hyperparam(name: str, default, supported_values=list()): # set remaining values self.__searchspace = searchspace + self.__searchspace_obj = searchspace_obj self.removed_tune_params = removed_tune_params self.searchspace_size = len(self.searchspace) self.num_dimensions = len(self.dimensions()) @@ -342,6 +334,8 @@ def set_acquisition_function(self, acquisition_function: str): self.optimize = self.__optimize_multi_advanced elif acquisition_function == "multi-fast": self.optimize = self.__optimize_multi_fast + elif acquisition_function == "multi-ultrafast": + self.optimize = self.__optimize_multi_ultrafast else: raise ValueError( "Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function) @@ -458,7 +452,7 @@ def evaluate_objective_function(self, param_config: tuple) -> float: """Evaluates the objective function.""" param_config = self.unprune_param_config(param_config) denormalized_param_config = self.denormalize_param_config(param_config) - if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads): + if not self.__searchspace_obj.is_param_config_valid(denormalized_param_config): return self.invalid_value val = self.cost_func(param_config) self.fevals += 1 @@ -843,6 +837,44 @@ def __optimize_multi_fast(self, max_fevals): self.update_after_evaluation(observation, candidate_index, candidate_params) self.fit_observations_to_model() + def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5): + """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, or fewer if predictions take too long. + + The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick. + Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago. + """ + last_prediction_counter = 0 + last_prediction_time = 0 + last_eval_time = 0 + while self.fevals < max_fevals: + aqfs = self.multi_afs + # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model + fit_observations = last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio + if fit_observations: + last_prediction_counter = 0 + pred_start = time.perf_counter() + if last_eval_time > 0.0: + self.fit_observations_to_model() + predictions, _, std = self.predict_list(self.unvisited_cache) + last_prediction_time = time.perf_counter() - pred_start + else: + last_prediction_counter += 1 + eval_start = time.perf_counter() + hyperparam = self.contextual_variance(std) + if self.__visited_num >= self.searchspace_size: + raise ValueError(self.error_message_searchspace_fully_observed) + for af in aqfs: + if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals: + break + list_of_acquisition_values = af(predictions, hyperparam) + best_af = self.argopt(list_of_acquisition_values) + del predictions[best_af] # to avoid going out of bounds + candidate_params = self.unvisited_cache[best_af] + candidate_index = self.find_param_config_index(candidate_params) + observation = self.evaluate_objective_function(candidate_params) + self.update_after_evaluation(observation, candidate_index, candidate_params) + last_eval_time = time.perf_counter() - eval_start + def af_random(self, predictions=None, hyperparam=None) -> list: """Acquisition function returning a randomly shuffled list for comparison.""" list_random = range(len(self.unvisited_cache)) diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py index 00700cd77..3c1adb0d4 100644 --- a/kernel_tuner/strategies/common.py +++ b/kernel_tuner/strategies/common.py @@ -45,10 +45,10 @@ def make_strategy_options_doc(strategy_options): def get_options(strategy_options, options): """Get the strategy-specific options or their defaults from user-supplied strategy_options.""" - accepted = list(options.keys()) + ["max_fevals", "time_limit"] + accepted = list(options.keys()) + ["max_fevals", "time_limit", "searchspace_construction_options"] for key in strategy_options: if key not in accepted: - raise ValueError(f"Unrecognized option {key} in strategy_options") + raise ValueError(f"Unrecognized option {key} in strategy_options (allowed: {accepted})") assert isinstance(options, dict) return [strategy_options.get(opt, default) for opt, (_, default) in options.items()] @@ -56,10 +56,12 @@ def get_options(strategy_options, options): class CostFunc: def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True): self.runner = runner - self.tuning_options = tuning_options self.snap = snap self.scaling = scaling self.searchspace = searchspace + self.tuning_options = tuning_options + if isinstance(self.tuning_options, dict): + self.tuning_options['max_fevals'] = min(tuning_options['max_fevals'] if 'max_fevals' in tuning_options else np.inf, searchspace.size) self.results = [] def __call__(self, x, check_restrictions=True): diff --git a/kernel_tuner/strategies/hillclimbers.py b/kernel_tuner/strategies/hillclimbers.py index b64e7d733..ccd4eebf0 100644 --- a/kernel_tuner/strategies/hillclimbers.py +++ b/kernel_tuner/strategies/hillclimbers.py @@ -1,13 +1,12 @@ import random -from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies.common import CostFunc def base_hillclimb(base_sol: tuple, neighbor_method: str, max_fevals: int, searchspace: Searchspace, tuning_options, cost_func: CostFunc, restart=True, randomize=True, order=None): - """ Hillclimbing search until max_fevals is reached or no improvement is found + """Hillclimbing search until max_fevals is reached or no improvement is found. Base hillclimber that evaluates neighbouring solutions in a random or fixed order and possibly immediately moves to the neighbour if it is an improvement. @@ -51,6 +50,9 @@ def base_hillclimb(base_sol: tuple, neighbor_method: str, max_fevals: int, searc """ if randomize and order: raise ValueError("Using a preset order and randomize at the same time is not supported.") + + # limit max_fevals to max size of the parameter space + max_fevals = min(searchspace.size, max_fevals) tune_params = searchspace.tune_params diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py index 022eda534..06ab4b9f6 100644 --- a/kernel_tuner/strategies/random_sample.py +++ b/kernel_tuner/strategies/random_sample.py @@ -17,7 +17,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): # override if max_fevals is specified if "max_fevals" in tuning_options: - num_samples = tuning_options.max_fevals + num_samples = min(tuning_options.max_fevals, searchspace.size) samples = searchspace.get_random_sample(num_samples) diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py index 3a89ad54b..d663b3f2b 100644 --- a/kernel_tuner/strategies/simulated_annealing.py +++ b/kernel_tuner/strategies/simulated_annealing.py @@ -27,7 +27,10 @@ def tune(searchspace: Searchspace, runner, tuning_options): # if user supplied max_fevals that is lower then max_iter we will # scale the annealing schedule to fit max_fevals - max_feval = tuning_options.strategy_options.get("max_fevals", max_iter) + max_fevals = tuning_options.strategy_options.get("max_fevals", max_iter) + + # limit max_fevals to max size of the parameter space + max_fevals = min(searchspace.size, max_fevals) # get random starting point and evaluate cost pos = list(searchspace.get_random_sample(1)[0]) @@ -64,7 +67,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): old_cost = new_cost c = len(tuning_options.unique_results) - T = T_start * alpha**(max_iter/max_feval*c) + T = T_start * alpha**(max_iter/max_fevals*c) # check if solver gets stuck and if so restart from random position if c == c_old: diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py index da36cba2d..f0546109a 100644 --- a/kernel_tuner/util.py +++ b/kernel_tuner/util.py @@ -1,5 +1,5 @@ """Module for kernel tuner utility functions.""" - +import ast import errno import json import logging @@ -7,11 +7,13 @@ import re import sys import tempfile +import textwrap import time import warnings -from inspect import signature +from inspect import getsource, signature +from pathlib import Path from types import FunctionType -from typing import Optional, Union +from typing import Union import numpy as np from constraint import ( @@ -193,7 +195,7 @@ def check_stop_criterion(to): """Checks if max_fevals is reached or time limit is exceeded.""" if "max_fevals" in to and len(to.unique_results) >= to.max_fevals: raise StopCriterionReached("max_fevals reached") - if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit): + if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time) > to.time_limit): raise StopCriterionReached("time limit exceeded") @@ -268,12 +270,15 @@ def check_restriction(restrict, params: dict) -> bool: # if it's a tuple, use only the parameters in the second argument to call the restriction elif ( isinstance(restrict, tuple) - and len(restrict) == 2 + and (len(restrict) == 2 or len(restrict) == 3) and callable(restrict[0]) and isinstance(restrict[1], (list, tuple)) ): # unpack the tuple - restrict, selected_params = restrict + if len(restrict) == 2: + restrict, selected_params = restrict + else: + restrict, selected_params, source = restrict # look up the selected parameters and their value selected_params = dict((key, params[key]) for key in selected_params) # call the restriction @@ -457,6 +462,28 @@ def get_instance_string(params): return "_".join([str(i) for i in params.values()]) +def get_interval(a: list): + """Checks if an array can be an interval. Returns (start, end, step) if interval, otherwise None.""" + if len(a) < 3: + return None + if not all(isinstance(e, (int, float)) for e in a): + return None + a_min = min(a) + a_max = max(a) + if len(a) <= 2: + return (a_min, a_max, a_max-a_min) + # determine the first step size + step = a[1]-a_min + # for each element, the step size should be equal to the first step + for i, e in enumerate(a): + if e-a[i-1] != step: + return None + result = (a_min, a_max, step) + if not all(isinstance(e, (int, float)) for e in result): + return None + return result + + def get_kernel_string(kernel_source, params=None): """Retrieve the kernel source and return as a string. @@ -471,8 +498,9 @@ def get_kernel_string(kernel_source, params=None): after all. :param kernel_source: One of the sources for the kernel, could be a - function that generates the kernel code, a string containing a filename - that points to the kernel source, or just a string that contains the code. + function that generates the kernel code, a string or Path containing a + filename that points to the kernel source, or just a string that + contains the code. :type kernel_source: string or callable :param params: Dictionary containing the tunable parameters for this specific @@ -488,6 +516,8 @@ def get_kernel_string(kernel_source, params=None): kernel_string = None if callable(kernel_source): kernel_string = kernel_source(params) + elif isinstance(kernel_source, Path): + kernel_string = read_file(kernel_source) elif isinstance(kernel_source, str): if looks_like_a_filename(kernel_source): kernel_string = read_file(kernel_source) or kernel_source @@ -779,7 +809,10 @@ def prepare_kernel_string(kernel_name, kernel_string, params, grid, threads, blo def read_file(filename): """Return the contents of the file named filename or None if file not found.""" - if os.path.isfile(filename): + if isinstance(filename, Path): + with filename.open() as f: + return f.read() + elif os.path.isfile(filename): with open(filename, "r") as f: return f.read() @@ -841,7 +874,7 @@ def has_kw_argument(func, name): def parse_restrictions( - restrictions: list[str], tune_params: dict, monolithic=False, try_to_constraint=True + restrictions: list[str], tune_params: dict, monolithic=False, format=None ) -> list[tuple[Union[Constraint, str], list[str]]]: """Parses restrictions from a list of strings into compilable functions and constraints, or a single compilable function (if monolithic is True). Returns a list of tuples of (strings or constraints) and parameters.""" # rewrite the restrictions so variables are singled out @@ -849,7 +882,7 @@ def parse_restrictions( def replace_params(match_object): key = match_object.group(1) - if key in tune_params: + if key in tune_params and format != "pyatf": param = str(key) return "params[params_index['" + param + "']]" else: @@ -864,166 +897,18 @@ def replace_params_split(match_object): return param else: return key - - def to_multiple_restrictions(restrictions: list[str]) -> list[str]: - """Split the restrictions into multiple restriction where possible (e.g. 3 <= x * y < 9 <= z -> [(MinProd(3), [x, y]), (MaxProd(9-1), [x, y]), (MinProd(9), [z])]).""" - split_restrictions = list() - for res in restrictions: - # if there are logic chains in the restriction, skip splitting further - if " and " in res or " or " in res: - split_restrictions.append(res) - continue - # find the indices of splittable comparators - comparators = ["<=", ">=", ">", "<"] - comparators_indices = [(m.start(0), m.end(0)) for m in re.finditer("|".join(comparators), res)] - if len(comparators_indices) <= 1: - # this can't be split further - split_restrictions.append(res) - continue - # split the restrictions from the previous to the next comparator - for index in range(len(comparators_indices)): - temp_copy = res - prev_stop = comparators_indices[index - 1][1] + 1 if index > 0 else 0 - next_stop = ( - comparators_indices[index + 1][0] if index < len(comparators_indices) - 1 else len(temp_copy) - ) - split_restrictions.append(temp_copy[prev_stop:next_stop].strip()) - return split_restrictions - - def to_numeric_constraint( - restriction: str, params: list[str] - ) -> Optional[Union[MinSumConstraint, ExactSumConstraint, MaxSumConstraint, MaxProdConstraint]]: - """Converts a restriction to a built-in numeric constraint if possible.""" - comparators = ["<=", "==", ">=", ">", "<"] - comparators_found = re.findall("|".join(comparators), restriction) - # check if there is exactly one comparator, if not, return None - if len(comparators_found) != 1: - return None - comparator = comparators_found[0] - - # split the string on the comparison and remove leading and trailing whitespace - left, right = tuple(s.strip() for s in restriction.split(comparator)) - - # find out which side is the constant number - def is_or_evals_to_number(s: str) -> Optional[Union[int, float]]: - try: - # check if it's a number or solvable to a number (e.g. '32*2') - number = eval(s) - assert isinstance(number, (int, float)) - return number - except Exception: - # it's not a solvable subexpression, return None - return None - - # either the left or right side of the equation must evaluate to a constant number - left_num = is_or_evals_to_number(left) - right_num = is_or_evals_to_number(right) - if (left_num is None and right_num is None) or (left_num is not None and right_num is not None): - # left_num and right_num can't be both None or both a constant - return None - number, variables, variables_on_left = ( - (left_num, right.strip(), False) if left_num is not None else (right_num, left.strip(), True) - ) - - # if the number is an integer, we can map '>' to '>=' and '<' to '<=' by changing the number (does not work with floating points!) - number_is_int = isinstance(number, int) - if number_is_int: - if comparator == "<": - if variables_on_left: - # (x < 2) == (x <= 2-1) - number -= 1 - else: - # (2 < x) == (2+1 <= x) - number += 1 - elif comparator == ">": - if variables_on_left: - # (x > 2) == (x >= 2+1) - number += 1 - else: - # (2 > x) == (2-1 >= x) - number -= 1 - - # check if an operator is applied on the variables, if not return - operators = [r"\*\*", r"\*", r"\+"] - operators_found = re.findall(str("|".join(operators)), variables) - if len(operators_found) == 0: - # no operators found, return only based on comparator - if len(params) != 1 or variables not in params: - # there were more than one variable but no operator - return None - # map to a Constraint - # if there are restrictions with a single variable, it will be used to prune the domain at the start - elif comparator == "==": - return ExactSumConstraint(number) - elif comparator == "<=" or (comparator == "<" and number_is_int): - return MaxSumConstraint(number) if variables_on_left else MinSumConstraint(number) - elif comparator == ">=" or (comparator == ">" and number_is_int): - return MinSumConstraint(number) if variables_on_left else MaxSumConstraint(number) - raise ValueError(f"Invalid comparator {comparator}") - - # check which operator is applied on the variables - operator = operators_found[0] - if not all(o == operator for o in operators_found): - # if the operator is inconsistent (e.g. 'x + y * z == 3'), return None - return None - - # split the string on the comparison - splitted = variables.split(operator) - # check if there are only pure, non-recurring variables (no operations or constants) in the restriction - if len(splitted) == len(params) and all(s.strip() in params for s in splitted): - # map to a Constraint - if operator == "**": - # power operations are not (yet) supported, added to avoid matching the double asterisk - return None - elif operator == "*": - if comparator == "<=" or (comparator == "<" and number_is_int): - return MaxProdConstraint(number) if variables_on_left else MinProdConstraint(number) - elif comparator == ">=" or (comparator == ">" and number_is_int): - return MinProdConstraint(number) if variables_on_left else MaxProdConstraint(number) - elif operator == "+": - if comparator == "==": - return ExactSumConstraint(number) - elif comparator == "<=" or (comparator == "<" and number_is_int): - return MaxSumConstraint(number) if variables_on_left else MinSumConstraint(number) - elif comparator == ">=" or (comparator == ">" and number_is_int): - return MinSumConstraint(number) if variables_on_left else MaxSumConstraint(number) - else: - raise ValueError(f"Invalid operator {operator}") - return None - - def to_equality_constraint( - restriction: str, params: list[str] - ) -> Optional[Union[AllEqualConstraint, AllDifferentConstraint]]: - """Converts a restriction to either an equality or inequality constraint on all the parameters if possible.""" - # check if all parameters are involved - if len(params) != len(tune_params): - return None - - # find whether (in)equalities appear in this restriction - equalities_found = re.findall("==", restriction) - inequalities_found = re.findall("!=", restriction) - # check if one of the two have been found, if none or both have been found, return None - if not (len(equalities_found) > 0 ^ len(inequalities_found) > 0): - return None - comparator = equalities_found[0] if len(equalities_found) > 0 else inequalities_found[0] - - # split the string on the comparison - splitted = restriction.split(comparator) - # check if there are only pure, non-recurring variables (no operations or constants) in the restriction - if len(splitted) == len(params) and all(s.strip() in params for s in splitted): - # map to a Constraint - if comparator == "==": - return AllEqualConstraint() - elif comparator == "!=": - return AllDifferentConstraint() - return ValueError(f"Not possible: comparator should be '==' or '!=', is {comparator}") - return None + + # remove functionally duplicate restrictions (preserves order and whitespace) + if all(isinstance(r, str) for r in restrictions): + # clean the restriction strings to functional equivalence + restrictions_cleaned = [r.replace(' ', '') for r in restrictions] + restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned)) # dict preserves order + # get the indices of the unique restrictions, use these to build a new list of restrictions + restrictions_unique_indices = [restrictions_cleaned.index(r) for r in restrictions_cleaned_unique] + restrictions = [restrictions[i] for i in restrictions_unique_indices] # create the parsed restrictions if monolithic is False: - # split into multiple restrictions where possible - if try_to_constraint: - restrictions = to_multiple_restrictions(restrictions) # split into functions that only take their relevant parameters parsed_restrictions = list() for res in restrictions: @@ -1031,24 +916,36 @@ def to_equality_constraint( parsed_restriction = re.sub(regex_match_variable, replace_params_split, res).strip() params_used = list(params_used) finalized_constraint = None - if try_to_constraint and " or " not in res and " and " not in res: - # if applicable, strip the outermost round brackets - while ( - parsed_restriction[0] == "(" - and parsed_restriction[-1] == ")" - and "(" not in parsed_restriction[1:] - and ")" not in parsed_restriction[:1] - ): - parsed_restriction = parsed_restriction[1:-1] - # check if we can turn this into the built-in numeric comparison constraint - finalized_constraint = to_numeric_constraint(parsed_restriction, params_used) - if finalized_constraint is None: - # check if we can turn this into the built-in equality comparison constraint - finalized_constraint = to_equality_constraint(parsed_restriction, params_used) - if finalized_constraint is None: - # we must turn it into a general function + # we must turn it into a general function + if format is not None and format.lower() == "pyatf": + finalized_constraint = parsed_restriction + else: finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n" parsed_restrictions.append((finalized_constraint, params_used)) + + # if pyATF, restrictions that are set on the same parameter must be combined into one + if format is not None and format.lower() == "pyatf": + res_dict = dict() + registered_params = list() + registered_restrictions = list() + parsed_restrictions_pyatf = list() + for param in tune_params.keys(): + registered_params.append(param) + for index, (res, params) in enumerate(parsed_restrictions): + if index in registered_restrictions: + continue + if all(p in registered_params for p in params): + if param not in res_dict: + res_dict[param] = (list(), list()) + res_dict[param][0].append(res) + res_dict[param][1].extend(params) + registered_restrictions.append(index) + # combine multiple restrictions into one + for res_tuple in res_dict.values(): + res, params_used = res_tuple + params_used = list(dict.fromkeys(params_used)) # param_used should only contain unique, dict preserves order + parsed_restrictions_pyatf.append((f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used)) + parsed_restrictions = parsed_restrictions_pyatf else: # create one monolithic function parsed_restrictions = ") and (".join( @@ -1062,20 +959,124 @@ def to_equality_constraint( # provide a mapping of the parameter names to the index in the tuple received params_index = dict(zip(tune_params.keys(), range(len(tune_params.keys())))) - parsed_restrictions = [ - ( - f"def restrictions(*params): params_index = {params_index}; return {parsed_restrictions} \n", - list(tune_params.keys()), - ) - ] + if format == "pyatf": + parsed_restrictions = [ + ( + f"def restrictions({', '.join(params_index.keys())}): return {parsed_restrictions} \n", + list(tune_params.keys()), + ) + ] + else: + parsed_restrictions = [ + ( + f"def restrictions(*params): params_index = {params_index}; return {parsed_restrictions} \n", + list(tune_params.keys()), + ) + ] return parsed_restrictions +def get_all_lambda_asts(func): + """Extracts the AST nodes of all lambda functions defined on the same line as func. + + Args: + func: A lambda function object. + + Returns: + A list of all ast.Lambda node objects on the line where func is defined. + + Raises: + ValueError: If the source can't be retrieved or no lambda is found. + """ + res = [] + try: + source = getsource(func) + source = textwrap.dedent(source).strip() + parsed = ast.parse(source) + + # Find the Lambda node + for node in ast.walk(parsed): + if isinstance(node, ast.Lambda): + res.append(node) + if not res: + raise ValueError(f"No lambda node found in the source {source}.") + except SyntaxError: + """ Ignore syntax errors on the lambda """ + return res + except OSError: + raise ValueError("Could not retrieve source. Is this defined interactively or dynamically?") + return res + + +class ConstraintLambdaTransformer(ast.NodeTransformer): + """Replaces any `NAME['string']` subscript with just `'string'`, if `NAME` + matches the lambda argument name. + """ + def __init__(self, dict_arg_name): + self.dict_arg_name = dict_arg_name + + def visit_Subscript(self, node): + # We only replace subscript expressions of the form ['some_string'] + if (isinstance(node.value, ast.Name) + and node.value.id == self.dict_arg_name + and isinstance(node.slice, ast.Constant) + and isinstance(node.slice.value, str)): + # Replace `dict_arg_name['some_key']` with the string used as key + return ast.Name(node.slice.value) + return self.generic_visit(node) + + +def unparse_constraint_lambda(lambda_ast): + """Parse the lambda function to replace accesses to tunable parameter dict + Returns string body of the rewritten lambda function + """ + args = lambda_ast.args + body = lambda_ast.args + + # Kernel Tuner only allows constraint lambdas with a single argument + arg = args.args[0].arg + + # Create transformer that replaces accesses to tunable parameter dict + # with simply the name of the tunable parameter + transformer = ConstraintLambdaTransformer(arg) + new_lambda_ast = transformer.visit(lambda_ast) + + rewritten_lambda_body_as_string = ast.unparse(new_lambda_ast.body).strip() + + return rewritten_lambda_body_as_string + + +def convert_constraint_lambdas(restrictions): + """Extract and convert all constraint lambdas from the restrictions""" + res = [] + for c in restrictions: + if isinstance(c, (str, Constraint)): + res.append(c) + if callable(c) and not isinstance(c, Constraint): + try: + lambda_asts = get_all_lambda_asts(c) + except ValueError: + res.append(c) # it's just a plain function, not a lambda + continue + + for lambda_ast in lambda_asts: + new_c = unparse_constraint_lambda(lambda_ast) + res.append(new_c) + + result = list(set(res)) + if not len(result) == len(restrictions): + raise ValueError("An error occured when parsing restrictions. If you mix lambdas and string-based restrictions, please define the lambda first.") + + return result + + def compile_restrictions( - restrictions: list, tune_params: dict, monolithic=False, try_to_constraint=True -) -> list[tuple[Union[str, Constraint, FunctionType], list[str]]]: - """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used, or a single Function if monolithic is true.""" + restrictions: list, tune_params: dict, monolithic=False, format=None +) -> list[tuple[Union[str, FunctionType], list[str], Union[str, None]]]: + """Parses restrictions from a list of strings into a list of strings or Functions and parameters used and source, or a single Function if monolithic is true.""" + restrictions = convert_constraint_lambdas(restrictions) + # filter the restrictions to get only the strings restrictions_str, restrictions_ignore = [], [] for r in restrictions: @@ -1084,9 +1085,7 @@ def compile_restrictions( return restrictions_ignore # parse the strings - parsed_restrictions = parse_restrictions( - restrictions_str, tune_params, monolithic=monolithic, try_to_constraint=try_to_constraint - ) + parsed_restrictions = parse_restrictions(restrictions_str, tune_params, monolithic=monolithic, format=format) # compile the parsed restrictions into a function compiled_restrictions: list[tuple] = list() @@ -1095,10 +1094,10 @@ def compile_restrictions( # if it's a string, parse it to a function code_object = compile(restriction, "", "exec") func = FunctionType(code_object.co_consts[0], globals()) - compiled_restrictions.append((func, params_used)) + compiled_restrictions.append((func, params_used, restriction)) elif isinstance(restriction, Constraint): # otherwise it already is a Constraint, pass it directly - compiled_restrictions.append((restriction, params_used)) + compiled_restrictions.append((restriction, params_used, None)) else: raise ValueError(f"Restriction {restriction} is neither a string or Constraint {type(restriction)}") @@ -1110,9 +1109,10 @@ def compile_restrictions( noncompiled_restrictions = [] for r in restrictions_ignore: if isinstance(r, tuple) and len(r) == 2 and isinstance(r[1], (list, tuple)): - noncompiled_restrictions.append(r) + restriction, params_used = r + noncompiled_restrictions.append((restriction, params_used, restriction)) else: - noncompiled_restrictions.append((r, ())) + noncompiled_restrictions.append((r, [], r)) return noncompiled_restrictions + compiled_restrictions @@ -1177,10 +1177,17 @@ def process_cache(cache, kernel_options, tuning_options, runner): # check if it is safe to continue tuning from this cache if cached_data["device_name"] != runner.dev.name: - raise ValueError("Cannot load cache which contains results for different device") + raise ValueError( + f"Cannot load cache which contains results for different device (cache: {cached_data['device_name']}, actual: {runner.dev.name})" + ) if cached_data["kernel_name"] != kernel_options.kernel_name: - raise ValueError("Cannot load cache which contains results for different kernel") + raise ValueError( + f"Cannot load cache which contains results for different kernel (cache: {cached_data['kernel_name']}, actual: {kernel_options.kernel_name})" + ) if "problem_size" in cached_data and not callable(kernel_options.problem_size): + # if it's a single value, convert to an array + if isinstance(cached_data["problem_size"], int): + cached_data["problem_size"] = [cached_data["problem_size"]] # if problem_size is not iterable, compare directly if not hasattr(kernel_options.problem_size, "__iter__"): if cached_data["problem_size"] != kernel_options.problem_size: @@ -1212,7 +1219,7 @@ def correct_open_cache(cache, open_cache=True): filestr = cachefile.read().strip() # if file was not properly closed, pretend it was properly closed - if len(filestr) > 0 and not filestr[-3:] in ["}\n}", "}}}"]: + if len(filestr) > 0 and filestr[-3:] not in ["}\n}", "}}}"]: # remove the trailing comma if any, and append closing brackets if filestr[-1] == ",": filestr = filestr[:-1] diff --git a/noxfile.py b/noxfile.py index 38fc6680a..2770bc7f1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -15,7 +15,7 @@ # set the test parameters verbose = False -python_versions_to_test = ["3.9", "3.10", "3.11", "3.12"] +python_versions_to_test = ["3.10", "3.11", "3.12", "3.13"] # 3.14 has not yet been officially released so is not tested against, but is allowed by the pyproject.toml nox.options.stop_on_first_error = True nox.options.error_on_missing_interpreters = True nox.options.default_venv_backend = 'virtualenv' @@ -85,7 +85,7 @@ def check_development_environment(session: Session) -> None: return None output: str = session.run("poetry", "install", "--sync", "--dry-run", "--with", "test", silent=True, external=True) match = re.search(r"Package operations: (\d+) (?:install|installs), (\d+) (?:update|updates), (\d+) (?:removal|removals), \d+ skipped", output) - assert match is not None, f"Invalid output: {output}" + assert match is not None, f"Could not check development environment, reason: {output}" groups = match.groups() installs, updates, removals = int(groups[0]), int(groups[1]), int(groups[2]) if installs > 0 or updates > 0: diff --git a/pyproject.toml b/pyproject.toml index 04b4512d2..a5e3f0522 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,24 +2,22 @@ requires = ["poetry-core>=1.7.0", "setuptools>=67.7.2"] build-backend = "poetry.core.masonry.api" -[tool.poetry] +[project] name = "kernel_tuner" -packages = [{ include = "kernel_tuner", from = "." }] description = "An easy to use CUDA/OpenCL kernel tuner in Python" -version = "1.1.3" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55 +version = "1.2.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55 +readme = "README.md" license = "Apache-2.0" authors = [ - "Ben van Werkhoven ", - "Alessio Sclocco ", - "Stijn Heldens ", - "Floris-Jan Willemsen ", - "Willem-Jan Palenstijn ", - "Bram Veenboer ", - "Richard Schoonhoven ", - "Leon Oostrum =2.0.0", + "scipy>=1.14.1", # Python >=3.13 needs scipy >=1.14 + "packaging", # required by file_utils + "jsonschema", + "python-constraint2>=2.3.1", + "xmltodict", + "pandas>=2.0.0", + "scikit-learn>=1.0.2", +] +# NOTE Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it + +[project.urls] homepage = "https://KernelTuner.github.io/kernel_tuner/" documentation = "https://KernelTuner.github.io/kernel_tuner/" repository = "https://github.com/KernelTuner/kernel_tuner" -[tool.poetry.urls] -"Tracker" = "https://github.com/KernelTuner/kernel_tuner/issues" -[tool.poetry.build] -generate-setup-file = false -[tool.poetry.scripts] -kernel_tuner = "kernel_tuner.interface:entry_point" +changelog = "https://github.com/KernelTuner/kernel_tuner/blob/master/CHANGELOG.md" +issues = "https://github.com/KernelTuner/kernel_tuner/issues" -[[tool.poetry.source]] -name = "testpypi" -url = "https://test.pypi.org/simple/" -priority = "explicit" +[project.scripts] +kernel_tuner = "kernel_tuner.interface:entry_point" -# ATTENTION: if anything is changed here, run `poetry update` -[tool.poetry.dependencies] -python = ">=3.9,<4" # <4 is because of hip-python # NOTE when changing the supported Python versions, also change the test versions in the noxfile -numpy = "^2.0.0" # Python 3.12 requires numpy at least 1.26 -scipy = ">=1.11.0" # held back by Python 3.9 -packaging = "*" # required by file_utils -jsonschema = "*" -python-constraint2 = "^2.1.0" -xmltodict = "*" -pandas = ">=2.0.0" -scikit-learn = ">=1.0.2" -# Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it +[tool.poetry] +packages = [{ include = "kernel_tuner", from = "." }] +include = [ + { path = "test" }, +] # this ensures that people won't have to clone the whole repo to include notebooks, they can just do `pip install kernel_tuner[tutorial,cuda]` # List of optional dependencies for user installation, e.g. `pip install kernel_tuner[cuda]`, used in the below `extras`. # Please note that this is different from the dependency groups below, e.g. `docs` and `test`, those are for development. # ATTENTION: if anything is changed here, run `poetry update` # CUDA -pycuda = { version = "^2024.1", optional = true } # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile -nvidia-ml-py = { version = "^12.535.108", optional = true } -pynvml = { version = "^11.4.1", optional = true } -# cupy-cuda11x = { version = "*", optional = true } # Note: these are completely optional dependencies as described in CONTRIBUTING.rst +# cupy-cuda11x = { version = "*", optional = true } # NOTE: these are completely optional dependencies as described in CONTRIBUTING.rst # cupy-cuda12x = { version = "*", optional = true } # cuda-python = { version = "*", optional = true } -# OpenCL -pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile -# HIP -hip-python = { version = "^6.3.3.540.31", source = "testpypi", optional = true } # Note: when released, switch this package to pypi and remove tool.poetry.source -# Tutorial (for the notebooks used in the examples) -jupyter = { version = "^1.0.0", optional = true } -matplotlib = { version = "^3.5.0", optional = true } -[tool.poetry.extras] -cuda = ["pycuda", "nvidia-ml-py", "pynvml"] -opencl = ["pyopencl"] -cuda_opencl = ["pycuda", "pyopencl"] +[[tool.poetry.source]] +name = "testpypi" +url = "https://test.pypi.org/simple/" +priority = "explicit" + +[tool.poetry.dependencies] +hip-python = { version = "^6.3.3.540.31", source = "testpypi", optional = true } # Note: when released, switch this package to pypi and remove tool.poetry.source and move this to [project.optional-dependencies] + +[project.optional-dependencies] +cuda = ["pycuda>=2025.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile +opencl = ["pyopencl"] # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile +cuda_opencl = ["pycuda>=2024.1", "pyopencl"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile hip = ["hip-python"] -tutorial = ["jupyter", "matplotlib", "nvidia-ml-py"] +tutorial = ["jupyter>=1.0.0", "matplotlib>=3.5.0", "nvidia-ml-py>=12.535.108"] # ATTENTION: if anything is changed here, run `poetry update` and `poetry export --with docs --without-hashes --format=requirements.txt --output doc/requirements.txt` # Please note that there is overlap with the `dev` group diff --git a/test/context.py b/test/context.py index 016ee0af6..afbd68e3e 100644 --- a/test/context.py +++ b/test/context.py @@ -60,6 +60,12 @@ except (ImportError, RuntimeError): hip_present = False +try: + from autotuning_methodology.report_experiments import get_strategy_scores + methodology_present = True +except ImportError: + methodology_present = False + skip_if_no_pycuda = pytest.mark.skipif( not pycuda_present, reason="PyCuda not installed or no CUDA device detected" ) @@ -80,6 +86,7 @@ skip_if_no_openmp = pytest.mark.skipif(not openmp_present, reason="No OpenMP found") skip_if_no_openacc = pytest.mark.skipif(not openacc_present, reason="No nvc++ on PATH") skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found or no HIP device detected") +skip_if_no_methodology = pytest.mark.skipif(not methodology_present, reason="Autotuning Methodology not found") def skip_backend(backend: str): diff --git a/test/convolution.cu b/test/convolution.cu new file mode 100644 index 000000000..ecafcf4b8 --- /dev/null +++ b/test/convolution.cu @@ -0,0 +1,166 @@ +#define image_height 4096 +#define image_width 4096 + +#ifndef filter_height + #define filter_height 17 +#endif +#ifndef filter_width + #define filter_width 17 +#endif + +#define border_height ((filter_height/2)*2) +#define border_width ((filter_width/2)*2) +#define input_height (image_height + border_height) +#define input_width (image_width + border_width) + +#ifndef block_size_x + #define block_size_x 16 +#endif +#ifndef block_size_y + #define block_size_y 16 +#endif +#ifndef block_size_z + #define block_size_z 1 +#endif +#ifndef tile_size_x + #define tile_size_x 1 +#endif +#ifndef tile_size_y + #define tile_size_y 1 +#endif + +#define i_end min(block_size_y*tile_size_y+border_height, input_height) +#define j_end min(block_size_x*tile_size_x+border_width, input_width) + +/* + * If requested, we can use the __ldg directive to load data through the + * read-only cache. + */ +#define USE_READ_ONLY_CACHE read_only +#if USE_READ_ONLY_CACHE == 1 +#define LDG(x, y) __ldg(x+y) +#elif USE_READ_ONLY_CACHE == 0 +#define LDG(x, y) x[y] +#endif + +__constant__ float d_filter[33*33]; //large enough for the largest filter + +/* + * If use_padding == 1, we introduce (only when necessary) a number of padding + * columns in shared memory to avoid shared memory bank conflicts + * + * padding columns are only inserted when block_size_x is not a multiple of 32 (the assumed number of memory banks) + * and when the width of the data needed is not a multiple of 32. The latter is because some filter_widths never + * cause bank conflicts. + * + * If not passed as a tunable parameter, padding is on by default + */ +#define shared_mem_width (block_size_x*tile_size_x+border_width) +#ifndef use_padding + #define use_padding 1 +#endif +#if use_padding == 1 + #if (((block_size_x % 32)!=0) && (((shared_mem_width-block_size_x)%32) != 0)) + // next line uses &31 instead of %32, because % in C is remainder not modulo + #define padding_columns ((32 - (border_width + block_size_x*tile_size_x - block_size_x)) & 31) + #undef shared_mem_width + #define shared_mem_width (block_size_x*tile_size_x+border_width+padding_columns) + #endif +#endif + + +__global__ void convolution_kernel(float *output, float *input, float *filter) { + int ty = threadIdx.y; + int tx = threadIdx.x; + int by = blockIdx.y * block_size_y * tile_size_y; + int bx = blockIdx.x * block_size_x * tile_size_x; + + //shared memory to hold all input data need by this thread block + __shared__ float sh_input[block_size_y*tile_size_y+border_height][shared_mem_width]; + + //load all input data needed by this thread block into shared memory + #pragma unroll + for (int i=ty; i 0 - diff --git a/test/test_runners.py b/test/test_runners.py index 527c1d252..acbb641e6 100644 --- a/test/test_runners.py +++ b/test/test_runners.py @@ -140,6 +140,17 @@ def test_diff_evo(env): assert len(result) > 0 +def test_restrictions(env): + restrictions = [lambda p: p["block_size_x"] <= 512, "block_size_x > 128"] + + result, _ = tune_kernel(*env, + verbose=True, + restrictions=restrictions, + cache=cache_filename, + simulation_mode=True) + assert len(result) == 6 + + @skip_if_no_pycuda def test_time_keeping(env): kernel_name, kernel_string, size, args, tune_params = env diff --git a/test/test_searchspace.py b/test/test_searchspace.py index 8672c1d03..20f004051 100644 --- a/test/test_searchspace.py +++ b/test/test_searchspace.py @@ -316,6 +316,34 @@ def test_neighbors_cached(): assert neighbors == neighbors_2 +def test_neighbors_cached_mixed_methods(): + """Test whether retrieving a set of neighbors with one method after another yields the correct neighbors.""" + simple_searchspace_duplicate = Searchspace( + simple_tuning_options.tune_params, + simple_tuning_options.restrictions, + max_threads, + ) + + test_configs = simple_searchspace_duplicate.get_random_sample(5) + for test_config in test_configs: + assert not simple_searchspace_duplicate.are_neighbors_indices_cached(test_config, "Hamming") + neighbors_hamming = simple_searchspace_duplicate.get_neighbors(test_config, "Hamming") + assert simple_searchspace_duplicate.are_neighbors_indices_cached(test_config, "Hamming") + + # now switch to a different method + neighbors_strictlyadjacent = simple_searchspace_duplicate.get_neighbors(test_config, "strictly-adjacent") + neighbors_strictlyadjacent_no_cache = simple_searchspace_duplicate.get_neighbors_no_cache(test_config, "strictly-adjacent") + + neighbors_adjacent = simple_searchspace_duplicate.get_neighbors(test_config, "adjacent") + neighbors_adjacent_no_cache = simple_searchspace_duplicate.get_neighbors_no_cache(test_config, "adjacent") + + # check that the neighbors are as expected + assert neighbors_strictlyadjacent == neighbors_strictlyadjacent_no_cache + assert neighbors_adjacent == neighbors_adjacent_no_cache + assert neighbors_hamming != neighbors_strictlyadjacent + assert neighbors_hamming != neighbors_adjacent + + def test_param_neighbors(): """Test whether for a given parameter configuration and index the correct neighboring parameters are returned.""" test_config = tuple([1.5, 4, "string_1"]) diff --git a/test/test_time_budgets.py b/test/test_time_budgets.py new file mode 100644 index 000000000..acf10a0e9 --- /dev/null +++ b/test/test_time_budgets.py @@ -0,0 +1,77 @@ +from itertools import product +from time import perf_counter + +import numpy as np +import pytest +from pytest import raises + +from kernel_tuner import tune_kernel + +from .context import skip_if_no_gcc + + +@pytest.fixture +def env(): + kernel_name = "vector_add" + kernel_string = """ + #include + + float vector_add(float *c, float *a, float *b, int n) { + struct timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); + + for (int i = 0; i < n; i++) { + c[i] = a[i] + b[i]; + } + + clock_gettime(CLOCK_MONOTONIC, &end); + double elapsed = (end.tv_sec - start.tv_sec) * 1e3 + (end.tv_nsec - start.tv_nsec) / 1e6; + return (float) elapsed; + }""" + + size = 100 + a = np.random.randn(size).astype(np.float32) + b = np.random.randn(size).astype(np.float32) + c = np.zeros_like(b) + n = np.int32(size) + + args = [c, a, b, n] + tune_params = {"nthreads": [1, 2, 4]} + + return kernel_name, kernel_string, size, args, tune_params + + +@skip_if_no_gcc +def test_no_time_budget(env): + """Ensure that a RuntimeError is raised if the startup takes longer than the time budget.""" + with raises(RuntimeError, match='startup time of the tuning process'): + tune_kernel(*env, strategy="random_sample", strategy_options={"strategy": "random_sample", "time_limit": 0.0}) + +@skip_if_no_gcc +def test_some_time_budget(env): + """Ensure that the time limit is respected.""" + time_limit = 1.0 + kernel_name, kernel_string, size, args, tune_params = env + tune_params["bogus"] = list(range(1000)) + env = kernel_name, kernel_string, size, args, tune_params + + # Ensure that if the tuning takes longer than the time budget, the results are returned early. + start_time = perf_counter() + res, _ = tune_kernel(*env, strategy="random_sample", strategy_options={"time_limit": time_limit}) + + # Ensure that there are at least some results, but not all. + size_all = len(list(product(*tune_params.values()))) + assert 0 < len(res) < size_all + + # Ensure that the time limit was respected by some margin. + assert perf_counter() - start_time < time_limit * 2 + +@skip_if_no_gcc +def test_full_time_budget(env): + """Ensure that given ample time budget, the entire space is explored.""" + res, _ = tune_kernel(*env, strategy="brute_force", strategy_options={"time_limit": 10.0}) + + # Ensure that the entire space is explored. + tune_params = env[-1] + size_all = len(list(product(*tune_params.values()))) + assert len(res) == size_all diff --git a/test/test_util_functions.py b/test/test_util_functions.py index f3431991b..5892a0286 100644 --- a/test/test_util_functions.py +++ b/test/test_util_functions.py @@ -712,7 +712,7 @@ def test_parse_restrictions(): assert expected in parsed[0] # test the split parsed function - parsed_multi = parse_restrictions(restrictions, tune_params, try_to_constraint=False) + parsed_multi = parse_restrictions(restrictions, tune_params) assert isinstance(parsed_multi, list) and isinstance(parsed_multi[0], tuple) assert len(parsed_multi) == 3 parsed, params = parsed_multi[0] @@ -725,32 +725,47 @@ def test_parse_restrictions(): assert restrictions[2] in parsed assert all(param in tune_params for param in params) - # test the conversion to constraints - parsed_multi_constraints = parse_restrictions(restrictions, tune_params, try_to_constraint=True) - assert isinstance(parsed_multi_constraints, list) and isinstance(parsed_multi_constraints[0], tuple) - assert len(parsed_multi_constraints) == 4 - parsed, params = parsed_multi_constraints[0] - assert isinstance(parsed, str) - assert params == ["block_size_x"] - parsed, params = parsed_multi_constraints[1] - assert isinstance(parsed, str) - assert all(param in tune_params for param in params) - parsed, params = parsed_multi_constraints[2] - assert isinstance(parsed, MinProdConstraint) - assert all(param in tune_params for param in params) - parsed, params = parsed_multi_constraints[3] - assert isinstance(parsed, MaxProdConstraint) - assert all(param in tune_params for param in params) - # test the conversion to constraints with a real-world edge-case - rw_tune_params = dict() - rw_tune_params["tile_size_x"] = [1, 2, 3, 4, 5, 6, 7, 8] - rw_tune_params["tile_size_y"] = [1, 2, 3, 4, 5, 6, 7, 8] - parsed_constraint, params_constraint = parse_restrictions(["tile_size_x*tile_size_y<30"], rw_tune_params, try_to_constraint=True)[0] - assert all(param in rw_tune_params for param in params_constraint) - assert isinstance(parsed_constraint, MaxProdConstraint) - assert parsed_constraint._maxprod == 29 - parsed_constraint, params_constraint = parse_restrictions(["30