From c03cad9249fd72a256f6ac0df231ea779191ec69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Wed, 18 Jun 2025 22:57:40 -0300 Subject: [PATCH 1/3] Scraped pydata-yerevan-2023 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #xxx Event config: ~~~yaml repo_dir: W:\Repositories\pyvideo-data # Copy the event template here and adapt to the event parameters # Only repo_dir: and events: are loaded # ============================================================================= events: # - title: PyData Virginia 2025 # dir: pydata-virginia-2025 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qLS7Mk-jI9jhb4t5UY6yDW # related_urls: # - label: Conference Website # url: https://pydata.org/virginia2025 # language: eng # dates: # begin: 2025-04-18 # end: 2025-04-19 # default: 2025-04-18 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Global 2024 # dir: pydata-global-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0otKlHvES9iBFtVQ71yZhed # related_urls: # - label: Conference Website # url: https://pydata.org/global2024 # language: eng # dates: # begin: 2024-12-03 # end: 2024-12-05 # default: 2024-12-03 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: - title: PyData New York City 2024 dir: pydata-new-york-city-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0ohEIZ-_wM2W_xqSVjyA3dC related_urls: - label: Conference Website url: https://pydata.org/nyc2024 language: eng dates: begin: 2024-11-06 end: 2024-11-08 default: 2024-11-06 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Tel Aviv 2024 dir: pydata-tel-avid-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pRsGPxDvLZfuufNgqREc0a related_urls: - label: Conference Website url: https://pydata.org/telaviv2024/ language: eng dates: begin: 2024-11-04 end: 2024-11-04 default: 2024-11-04 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Paris 2024 dir: pydata-paris-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pKya8gksd00ennKuyoH7v7 related_urls: - label: Conference Website url: https://pydata.org/paris2024 language: eng dates: begin: 2024-09-25 end: 2024-09-26 default: 2024-09-25 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Amsterdam 2024 dir: pydata-amsterdam-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0reU2lzNZCn9obkyRVaSnpF related_urls: - label: Conference Website url: https://web.archive.org/web/20240822042916/https://amsterdam.pydata.org/ language: eng dates: begin: 2024-09-18 end: 2024-09-20 default: 2024-09-18 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Vermont 2024 dir: pydata-vermont-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pME_xSRdmoYFzhlsHJYM8I related_urls: - label: Conference Website url: https://pydata.org/vermont2024/ language: eng dates: begin: 2024-07-29 end: 2024-07-30 default: 2024-07-29 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Eindhoven 2024 dir: pydata-eindhoven-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0q7a2aoNP1au_1egfZEjGL6 related_urls: - label: Conference Website url: https://pydata.org/eindhoven2024/ language: eng dates: begin: 2024-07-11 end: 2024-07-11 default: 2024-07-11 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData London 2024 dir: pydata-london-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rrhYTNedKKuJ9716fEaAdK related_urls: - label: Conference Website url: https://pydata.org/london2024/ language: eng dates: begin: 2024-06-14 end: 2024-06-16 default: 2024-06-14 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Berlin 2024 dir: pydata-berlin-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0r2tGyr-hjbnCrjXRkCMvwB related_urls: - label: Conference Website url: https://2024.pycon.de/ language: eng dates: begin: 2024-06-14 end: 2024-06-16 default: 2024-06-14 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Global 2023 dir: pydata-global-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0poULd1C4oUdPbPkTe4poJx related_urls: - label: Conference Website url: https://pydata.org/global2023/ language: eng dates: begin: 2023-12-06 end: 2023-12-08 default: 2023-12-06 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Eindhoven 2023 dir: pydata-eindhoven-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qkbJjIfppGO44yhDV2i4gR related_urls: - label: Conference Website url: https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023 language: eng dates: begin: 2023-11-30 end: 2023-11-30 default: 2023-11-30 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData New York City 2023 dir: pydata-new-york-city-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0o79mT1hHyqtFDSNzXXSYQM related_urls: - label: Conference Website url: https://pydata.org/nyc2023/ language: eng dates: begin: 2023-11-01 end: 2023-11-03 default: 2023-11-01 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Amsterdam 2023 dir: pydata-amsterdam-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pADyz2VboxPFIdrsozlENg related_urls: - label: Conference Website url: https://amsterdam2023.pydata.org/cfp/schedule/ language: eng dates: begin: 2023-09-14 end: 2023-09-16 default: 2023-09-14 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Seattle 2023 dir: pydata-seattle-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0q81_-rt5jzJ--ZEgcNArKb related_urls: - label: Conference Website url: https://pydata.org/seattle2023/ language: eng dates: begin: 2023-04-26 end: 2023-04-28 default: 2023-04-26 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Berlin 2023 dir: pydata-berlin-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0peDguAzds7kVmBr8avp46K related_urls: - label: Conference Website url: https://2023.pycon.de/ language: eng dates: begin: 2023-04-17 end: 2023-04-19 default: 2023-04-17 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Yerevan 2023 dir: pydata-yerevan-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pJKftCB2BtalTDE-2xS20g language: eng dates: begin: 2023-10-23 end: 2024-11-07 default: 2023-10-23 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Trójmiasto 2023 dir: pydata-trojmiasto-2023 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qS0mI7s9tpXnS-XV5l_Ibs related_urls: - label: Conference Website url: https://www.meetup.com/pl-PL/pydata-trojmiasto/ language: eng dates: begin: 2023-10-24 end: 2023-10-24 default: 2023-10-24 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Tel Avid 2022 dir: pydata-tel-avid-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0p6o_fjjdNPqy1rps49z2S0 related_urls: - label: Conference Website url: https://pydata.org/telaviv2022/ language: eng dates: begin: 2022-12-13 end: 2022-12-13 default: 2022-12-13 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Eindhoven 2022 dir: pydata-eindhoven-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pI60MsrFpHcII1qWm7drmZ related_urls: - label: Conference Website url: https://pydata.org/eindhoven2022/ language: eng dates: begin: 2022-12-02 end: 2022-12-02 default: 2022-12-02 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Global 2022 dir: pydata-global-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qgYUt0yn7F80wmzCnj2dEq related_urls: - label: Conference Website url: https://pydata.org/global2022/ language: eng dates: begin: 2022-12-01 end: 2022-12-03 default: 2022-12-01 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData New York City 2022 dir: pydata-new-york-city-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0opPc5-dp6ZDCFvOqDBlUuv related_urls: - label: Conference Website url: https://pydata.org/nyc2022/ language: eng dates: begin: 2022-11-09 end: 2022-11-11 default: 2022-11-09 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Yerevan 2022 dir: pydata-yerevan-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qWwVVDmdOw6oxAlqqH8Ca- related_urls: - label: Conference Website url: https://pydata.org/yerevan2022/ language: eng dates: begin: 2022-08-12 end: 2022-08-13 default: 2022-08-12 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData London 2022 dir: pydata-london-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qT0MXnci7VBSF-U-0WaQ-w related_urls: - label: Conference Website url: https://pydata.org/london2022/ language: eng dates: begin: 2022-06-17 end: 2022-06-19 default: 2022-06-17 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Berlin 2022 dir: pydata-berlin-2022 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0p0Fal8_YKg6fPXnf3iPtwD related_urls: - label: Conference Website url: https://2022.pycon.de/ language: eng dates: begin: 2022-04-11 end: 2022-04-13 default: 2022-04-11 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Global 2021 dir: pydata-global-2021 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rHb3JXG6puQnUAclFFZMlh related_urls: - label: Conference Website url: https://pydata.org/global2021/ language: eng dates: begin: 2021-10-28 end: 2021-10-30 default: 2021-10-28 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Eindhoven 2021 dir: pydata-eindhoven-2021 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rBKcoKoaWJiMrDGdNr2_S0 related_urls: - label: Conference Website url: https://pydata.org/eindhoven2021/ language: eng dates: begin: 2021-11-12 end: 2021-11-12 default: 2021-11-12 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Global 2020 dir: pydata-global-2020 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0r0eC9BnITmYJ786p9Y1Q8D related_urls: - label: Conference Website url: https://pydataglobal.github.io/ language: eng dates: begin: 2020-11-11 end: 2020-11-15 default: 2020-11-11 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Eindhoven 2020 dir: pydata-eindhoven-2020 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qpKjuGgNOgtOxIuATvnqEr related_urls: - label: Conference Website url: https://pydata.org/eindhoven2020/schedule/ language: eng dates: begin: 2020-10-07 end: 2020-10-09 default: 2020-10-07 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Amsterdam 2020 dir: pydata-amsterdam-2020 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0oX4ymLgldSvpfiZj-S8-fH related_urls: - label: Conference Website url: https://datasciencedistrict.nl/pydata-festival-amsterda/ language: eng dates: begin: 2020-06-15 end: 2020-06-20 default: 2020-06-15 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData South Africa 2018 dir: pydata-south-africa-2018 youtube_list: - https://www.youtube.com/watch?v=Lvw3Lp3KrTM&list=PLGjWYNrNnSuc78h5x23A5mLAzWlCl9LGf related_urls: - label: Conference Website url: https://2018.za.pycon.org/ language: eng dates: begin: 2018-10-11 end: 2018-10-12 default: 2018-10-11 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: - title: PyData Hamburg 2021 dir: pydata-hamburg-2021 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qbRG8gBRkosFfhWrObasQF related_urls: - label: Conference Website url: https://www.meetup.com/pydata-hamburg/ language: eng dates: begin: 2020-11-03 end: 2021-03-03 default: 2021-03-03 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: # ISO_639-3 language codes https://en.wikipedia.org/wiki/ISO_639-3 # languages = { # 'ita': 'Italian', # 'zho': 'Chinese', # 'por': 'Portuguese', # 'ukr': 'Ukrainian', # 'deu': 'German', # 'eng': 'English', # 'rus': 'Russian', # 'fra': 'French', # 'spa': 'Spanish', # 'eus': 'Basque', # 'cat': 'Catalan', # 'glg': 'Galician', # 'kor': 'Korean', # 'lit': 'Lithuanian', # 'jpn': 'Japanese', # 'ces': 'Czech', # 'pol': 'Polish', # 'heb': 'Hebrew', # 'tha': 'Thai', # } ~~~ Scraped with [pyvideo_scrape](https://github.com/pyvideo/pyvideo_scrape) --- pydata-yerevan-2023/category.json | 3 ++ ...impactful-dashboards-for-your-clients.json | 39 ++++++++++++++ ...akov-revolutionizing-cancer-treatment.json | 39 ++++++++++++++ ...e-learning-approaches-in-neuroscience.json | 39 ++++++++++++++ ...manov-performance-of-vector-databases.json | 43 ++++++++++++++++ ...data-warehouse-in-your-python-process.json | 39 ++++++++++++++ ...or-building-large-language-model-apps.json | 47 +++++++++++++++++ ...th-reasoning-without-proprietary-data.json | 43 ++++++++++++++++ ...e-amount-of-boilerplate-code-required.json | 43 ++++++++++++++++ ...pervised-learning-for-computer-vision.json | 39 ++++++++++++++ ...hulevich-llm-generated-text-detection.json | 51 +++++++++++++++++++ 11 files changed, 425 insertions(+) create mode 100644 pydata-yerevan-2023/category.json create mode 100644 pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json create mode 100644 pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json create mode 100644 pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json create mode 100644 pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json create mode 100644 pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json create mode 100644 pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json create mode 100644 pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json create mode 100644 pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json create mode 100644 pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json create mode 100644 pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json diff --git a/pydata-yerevan-2023/category.json b/pydata-yerevan-2023/category.json new file mode 100644 index 000000000..0038b9eb3 --- /dev/null +++ b/pydata-yerevan-2023/category.json @@ -0,0 +1,3 @@ +{ + "title": "PyData Yerevan 2023" +} diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json new file mode 100644 index 000000000..54f6a67b9 --- /dev/null +++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json @@ -0,0 +1,39 @@ +{ + "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3089, + "language": "eng", + "recorded": "2024-05-22", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/DZjCrLJ1xlk/maxresdefault.jpg", + "title": "Adam Kulidjian - Crafting Impactful Dashboards for Your Clients", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=DZjCrLJ1xlk" + } + ] +} diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json new file mode 100644 index 000000000..6695989b3 --- /dev/null +++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json @@ -0,0 +1,39 @@ +{ + "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL). Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 6191, + "language": "eng", + "recorded": "2024-11-07", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/Xik80kYLD5c/maxresdefault.jpg", + "title": "Aleksandr Sarachakov - Revolutionizing Cancer Treatment", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Xik80kYLD5c" + } + ] +} diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json new file mode 100644 index 000000000..cd95f35d6 --- /dev/null +++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json @@ -0,0 +1,39 @@ +{ + "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3697, + "language": "eng", + "recorded": "2024-02-28", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/i-8IeS9N7wA/maxresdefault.jpg", + "title": "Aleksei Gorin - Machine Learning Approaches in Neuroscience", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=i-8IeS9N7wA" + } + ] +} diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json new file mode 100644 index 000000000..afc54cfdd --- /dev/null +++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json @@ -0,0 +1,43 @@ +{ + "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n \n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2588, + "language": "eng", + "recorded": "2024-01-22", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://shorturl.at/CGKL1", + "url": "https://shorturl.at/CGKL1" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/-MYYB0QjV6I/maxresdefault.jpg", + "title": "Egor Romanov - Performance of Vector Databases", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-MYYB0QjV6I" + } + ] +} diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json new file mode 100644 index 000000000..3ad0856f9 --- /dev/null +++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json @@ -0,0 +1,39 @@ +{ + "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3326, + "language": "eng", + "recorded": "2023-10-23", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/q_SKaOeRiOI/maxresdefault.jpg", + "title": "G\u00e1bor Sz\u00e1rnyas - DuckDB: The Power of a Data Warehouse in your Python Process", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=q_SKaOeRiOI" + } + ] +} diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json new file mode 100644 index 000000000..8cf437081 --- /dev/null +++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json @@ -0,0 +1,47 @@ +{ + "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n \nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 4235, + "language": "eng", + "recorded": "2024-04-12", + "related_urls": [ + { + "label": "https://shorturl.at/ciovF", + "url": "https://shorturl.at/ciovF" + }, + { + "label": "https://github.com/kajarenc/PyData-March-Langchain", + "url": "https://github.com/kajarenc/PyData-March-Langchain" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/YNixBsPt7Ds/maxresdefault.jpg", + "title": "Gor Hayrapetyan & Karen Javadyan - Langchain: A Framework for Building Large Language Model Apps", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YNixBsPt7Ds" + } + ] +} diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json new file mode 100644 index 000000000..5520b6746 --- /dev/null +++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json @@ -0,0 +1,43 @@ +{ + "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3671, + "language": "eng", + "recorded": "2024-07-24", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://shorturl.at/GRUFi", + "url": "https://shorturl.at/GRUFi" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/prPLAxYF1bU/maxresdefault.jpg", + "title": "Ivan Moshkov & Daria Gitman - How to Build an LLM for Math Reasoning without Proprietary Data?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=prPLAxYF1bU" + } + ] +} diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json new file mode 100644 index 000000000..a25a62e6d --- /dev/null +++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json @@ -0,0 +1,43 @@ +{ + "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2296, + "language": "eng", + "recorded": "2023-11-29", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://lnkd.in/eHE-KjNC", + "url": "https://lnkd.in/eHE-KjNC" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/3L80rxyofFU/maxresdefault.jpg", + "title": "Nikolay Karpov - How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3L80rxyofFU" + } + ] +} diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json new file mode 100644 index 000000000..ec0f9efb5 --- /dev/null +++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json @@ -0,0 +1,39 @@ +{ + "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2185, + "language": "eng", + "recorded": "2023-12-16", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/vICrdSUS9vg/maxresdefault.jpg", + "title": "Vahan Huroyan - Recent Developments in Self-Supervised Learning for Computer Vision", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=vICrdSUS9vg" + } + ] +} diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json new file mode 100644 index 000000000..2a6e51587 --- /dev/null +++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json @@ -0,0 +1,51 @@ +{ + "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3116, + "language": "eng", + "recorded": "2024-04-27", + "related_urls": [ + { + "label": "https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector", + "url": "https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector" + }, + { + "label": "https://shorturl.at/fhmpv", + "url": "https://shorturl.at/fhmpv" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/superannotateai/generated_text_detector", + "url": "https://github.com/superannotateai/generated_text_detector" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/hJ4NjViBF98/maxresdefault.jpg", + "title": "Yuri Orshulevich - LLM-Generated Text Detection", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hJ4NjViBF98" + } + ] +} From 61bb515bc56f4cde65593e4df99cab4a1d08b75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Tue, 24 Jun 2025 18:13:36 +0000 Subject: [PATCH 2/3] Add speaker information --- ...impactful-dashboards-for-your-clients.json | 21 +++--------------- ...akov-revolutionizing-cancer-treatment.json | 21 +++--------------- ...e-learning-approaches-in-neuroscience.json | 21 +++--------------- ...manov-performance-of-vector-databases.json | 21 +++--------------- ...data-warehouse-in-your-python-process.json | 21 +++--------------- ...or-building-large-language-model-apps.json | 22 ++++--------------- ...th-reasoning-without-proprietary-data.json | 22 ++++--------------- ...e-amount-of-boilerplate-code-required.json | 21 +++--------------- ...pervised-learning-for-computer-vision.json | 21 +++--------------- ...hulevich-llm-generated-text-detection.json | 21 +++--------------- 10 files changed, 32 insertions(+), 180 deletions(-) diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json index 54f6a67b9..2245535a4 100644 --- a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json +++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json @@ -10,26 +10,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Adam Kulidjian" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/DZjCrLJ1xlk/maxresdefault.jpg", - "title": "Adam Kulidjian - Crafting Impactful Dashboards for Your Clients", + "title": "Crafting Impactful Dashboards for Your Clients", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json index 6695989b3..b4143e244 100644 --- a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json +++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json @@ -10,26 +10,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Aleksandr Sarachakov" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Xik80kYLD5c/maxresdefault.jpg", - "title": "Aleksandr Sarachakov - Revolutionizing Cancer Treatment", + "title": "Revolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json index cd95f35d6..56894d3c3 100644 --- a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json +++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json @@ -10,26 +10,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Aleksei Gorin" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/i-8IeS9N7wA/maxresdefault.jpg", - "title": "Aleksei Gorin - Machine Learning Approaches in Neuroscience", + "title": "Machine Learning Approaches in Neuroscience", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json index afc54cfdd..714d7b741 100644 --- a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json +++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json @@ -14,26 +14,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Egor Romanov" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-MYYB0QjV6I/maxresdefault.jpg", - "title": "Egor Romanov - Performance of Vector Databases", + "title": "Performance of Vector Databases", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json index 3ad0856f9..c0f1a813e 100644 --- a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json +++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json @@ -10,26 +10,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Gábor Szárnyas" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/q_SKaOeRiOI/maxresdefault.jpg", - "title": "G\u00e1bor Sz\u00e1rnyas - DuckDB: The Power of a Data Warehouse in your Python Process", + "title": "DuckDB: The Power of a Data Warehouse in your Python Process", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json index 8cf437081..da9c0ec6c 100644 --- a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json +++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json @@ -18,26 +18,12 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Gor Hayrapetyan", + "Karen Javadyan" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/YNixBsPt7Ds/maxresdefault.jpg", - "title": "Gor Hayrapetyan & Karen Javadyan - Langchain: A Framework for Building Large Language Model Apps", + "title": "Langchain: A Framework for Building Large Language Model Apps", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json index 5520b6746..8caac471b 100644 --- a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json +++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json @@ -14,26 +14,12 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Ivan Moshkov", + "Daria Gitman" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/prPLAxYF1bU/maxresdefault.jpg", - "title": "Ivan Moshkov & Daria Gitman - How to Build an LLM for Math Reasoning without Proprietary Data?", + "title": "How to Build an LLM for Math Reasoning without Proprietary Data?", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json index a25a62e6d..cb935ac1d 100644 --- a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json +++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json @@ -14,26 +14,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Nikolay Karpov" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3L80rxyofFU/maxresdefault.jpg", - "title": "Nikolay Karpov - How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?", + "title": "How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json index ec0f9efb5..0b55798c5 100644 --- a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json +++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json @@ -10,26 +10,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Vahan Huroyan" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/vICrdSUS9vg/maxresdefault.jpg", - "title": "Vahan Huroyan - Recent Developments in Self-Supervised Learning for Computer Vision", + "title": "Recent Developments in Self-Supervised Learning for Computer Vision", "videos": [ { "type": "youtube", diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json index 2a6e51587..3a8209e1f 100644 --- a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json +++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json @@ -22,26 +22,11 @@ } ], "speakers": [ - "TODO" - ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" + "Yuri Orshulevich" ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/hJ4NjViBF98/maxresdefault.jpg", - "title": "Yuri Orshulevich - LLM-Generated Text Detection", + "title": "LLM-Generated Text Detection", "videos": [ { "type": "youtube", From 8f1921959052f4e8933a79f88e052ed85d843fba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Tue, 24 Jun 2025 18:17:48 +0000 Subject: [PATCH 3/3] Fix conflicting characters --- ...ulidjian-crafting-impactful-dashboards-for-your-clients.json | 2 +- .../aleksandr-sarachakov-revolutionizing-cancer-treatment.json | 2 +- ...eksei-gorin-machine-learning-approaches-in-neuroscience.json | 2 +- .../videos/egor-romanov-performance-of-vector-databases.json | 2 +- ...db-the-power-of-a-data-warehouse-in-your-python-process.json | 2 +- ...hain-a-framework-for-building-large-language-model-apps.json | 2 +- ...uild-an-llm-for-math-reasoning-without-proprietary-data.json | 2 +- ...ataset-minimize-the-amount-of-boilerplate-code-required.json | 2 +- ...opments-in-self-supervised-learning-for-computer-vision.json | 2 +- .../videos/yuri-orshulevich-llm-generated-text-detection.json | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json index 2245535a4..1673221e5 100644 --- a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json +++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json @@ -1,5 +1,5 @@ { - "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 3089, "language": "eng", "recorded": "2024-05-22", diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json index b4143e244..d71ed03a5 100644 --- a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json +++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json @@ -1,5 +1,5 @@ { - "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL). Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL). Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 6191, "language": "eng", "recorded": "2024-11-07", diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json index 56894d3c3..e35f90a2a 100644 --- a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json +++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json @@ -1,5 +1,5 @@ { - "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 3697, "language": "eng", "recorded": "2024-02-28", diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json index 714d7b741..b68f35e44 100644 --- a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json +++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json @@ -1,5 +1,5 @@ { - "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n \n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n\n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 2588, "language": "eng", "recorded": "2024-01-22", diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json index c0f1a813e..4ada29d98 100644 --- a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json +++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json @@ -1,5 +1,5 @@ { - "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 3326, "language": "eng", "recorded": "2023-10-23", diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json index da9c0ec6c..4541617ac 100644 --- a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json +++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json @@ -1,5 +1,5 @@ { - "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n \nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n\nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 4235, "language": "eng", "recorded": "2024-04-12", diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json index 8caac471b..66465a45f 100644 --- a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json +++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json @@ -1,5 +1,5 @@ { - "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 3671, "language": "eng", "recorded": "2024-07-24", diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json index cb935ac1d..6b1252eca 100644 --- a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json +++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json @@ -1,5 +1,5 @@ { - "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 2296, "language": "eng", "recorded": "2023-11-29", diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json index 0b55798c5..55d5d2f29 100644 --- a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json +++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json @@ -1,5 +1,5 @@ { - "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 2185, "language": "eng", "recorded": "2023-12-16", diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json index 3a8209e1f..57ce97b4d 100644 --- a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json +++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json @@ -1,5 +1,5 @@ { - "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", "duration": 3116, "language": "eng", "recorded": "2024-04-27",