From c03cad9249fd72a256f6ac0df231ea779191ec69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?=
 <14986783+ELC@users.noreply.github.com>
Date: Wed, 18 Jun 2025 22:57:40 -0300
Subject: [PATCH 1/3] Scraped pydata-yerevan-2023
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #xxx

Event config:
~~~yaml
repo_dir: W:\Repositories\pyvideo-data

# Copy the event template here and adapt to the event parameters

# Only repo_dir: and events: are loaded

# =============================================================================
events:
  # - title: PyData Virginia 2025
  #   dir: pydata-virginia-2025
  #   youtube_list:
  #     - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qLS7Mk-jI9jhb4t5UY6yDW
  #   related_urls:
  #     - label: Conference Website
  #       url: https://pydata.org/virginia2025
  #   language: eng
  #   dates:
  #     begin: 2025-04-18
  #     end: 2025-04-19
  #     default: 2025-04-18
  #   minimal_download: false
  #   issue: xxx
  #   overwrite:
  #     # all: true # takes precedence over add_new_files and existing_files_fields
  #     add_new_files: true
  #     existing_files_fields:
  #       - duration
  #       - thumbnail_url
  #       - videos
  #       - description
  #       - language
  #       - recorded
  #       - related_urls
  #       - speakers
  #       - tags
  #       - title
  #   tags:

  # - title: PyData Global 2024
  #   dir: pydata-global-2024
  #   youtube_list:
  #     - https://www.youtube.com/playlist?list=PLGVZCDnMOq0otKlHvES9iBFtVQ71yZhed
  #   related_urls:
  #     - label: Conference Website
  #       url: https://pydata.org/global2024
  #   language: eng
  #   dates:
  #     begin: 2024-12-03
  #     end: 2024-12-05
  #     default: 2024-12-03
  #   minimal_download: false
  #   issue: xxx
  #   overwrite:
  #     # all: true # takes precedence over add_new_files and existing_files_fields
  #     add_new_files: true
  #     existing_files_fields:
  #       - duration
  #       - thumbnail_url
  #       - videos
  #       - description
  #       - language
  #       - recorded
  #       - related_urls
  #       - speakers
  #       - tags
  #       - title
  #   tags:

  - title: PyData New York City 2024
    dir: pydata-new-york-city-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0ohEIZ-_wM2W_xqSVjyA3dC
    related_urls:
      - label: Conference Website
        url: https://pydata.org/nyc2024
    language: eng
    dates:
      begin: 2024-11-06
      end: 2024-11-08
      default: 2024-11-06
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Tel Aviv 2024
    dir: pydata-tel-avid-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pRsGPxDvLZfuufNgqREc0a
    related_urls:
      - label: Conference Website
        url: https://pydata.org/telaviv2024/
    language: eng
    dates:
      begin: 2024-11-04
      end: 2024-11-04
      default: 2024-11-04
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Paris 2024
    dir: pydata-paris-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pKya8gksd00ennKuyoH7v7
    related_urls:
      - label: Conference Website
        url: https://pydata.org/paris2024
    language: eng
    dates:
      begin: 2024-09-25
      end: 2024-09-26
      default: 2024-09-25
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Amsterdam 2024
    dir: pydata-amsterdam-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0reU2lzNZCn9obkyRVaSnpF
    related_urls:
      - label: Conference Website
        url: https://web.archive.org/web/20240822042916/https://amsterdam.pydata.org/
    language: eng
    dates:
      begin: 2024-09-18
      end: 2024-09-20
      default: 2024-09-18
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Vermont 2024
    dir: pydata-vermont-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pME_xSRdmoYFzhlsHJYM8I
    related_urls:
      - label: Conference Website
        url: https://pydata.org/vermont2024/
    language: eng
    dates:
      begin: 2024-07-29
      end: 2024-07-30
      default: 2024-07-29
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Eindhoven 2024
    dir: pydata-eindhoven-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0q7a2aoNP1au_1egfZEjGL6
    related_urls:
      - label: Conference Website
        url: https://pydata.org/eindhoven2024/
    language: eng
    dates:
      begin: 2024-07-11
      end: 2024-07-11
      default: 2024-07-11
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData London 2024
    dir: pydata-london-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rrhYTNedKKuJ9716fEaAdK
    related_urls:
      - label: Conference Website
        url: https://pydata.org/london2024/
    language: eng
    dates:
      begin: 2024-06-14
      end: 2024-06-16
      default: 2024-06-14
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Berlin 2024
    dir: pydata-berlin-2024
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0r2tGyr-hjbnCrjXRkCMvwB
    related_urls:
      - label: Conference Website
        url: https://2024.pycon.de/
    language: eng
    dates:
      begin: 2024-06-14
      end: 2024-06-16
      default: 2024-06-14
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Global 2023
    dir: pydata-global-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0poULd1C4oUdPbPkTe4poJx
    related_urls:
      - label: Conference Website
        url: https://pydata.org/global2023/
    language: eng
    dates:
      begin: 2023-12-06
      end: 2023-12-08
      default: 2023-12-06
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Eindhoven 2023
    dir: pydata-eindhoven-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qkbJjIfppGO44yhDV2i4gR
    related_urls:
      - label: Conference Website
        url: https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023
    language: eng
    dates:
      begin: 2023-11-30
      end: 2023-11-30
      default: 2023-11-30
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData New York City 2023
    dir: pydata-new-york-city-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0o79mT1hHyqtFDSNzXXSYQM
    related_urls:
      - label: Conference Website
        url: https://pydata.org/nyc2023/
    language: eng
    dates:
      begin: 2023-11-01
      end: 2023-11-03
      default: 2023-11-01
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Amsterdam 2023
    dir: pydata-amsterdam-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pADyz2VboxPFIdrsozlENg
    related_urls:
      - label: Conference Website
        url: https://amsterdam2023.pydata.org/cfp/schedule/
    language: eng
    dates:
      begin: 2023-09-14
      end: 2023-09-16
      default: 2023-09-14
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Seattle 2023
    dir: pydata-seattle-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0q81_-rt5jzJ--ZEgcNArKb
    related_urls:
      - label: Conference Website
        url: https://pydata.org/seattle2023/
    language: eng
    dates:
      begin: 2023-04-26
      end: 2023-04-28
      default: 2023-04-26
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Berlin 2023
    dir: pydata-berlin-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0peDguAzds7kVmBr8avp46K
    related_urls:
      - label: Conference Website
        url: https://2023.pycon.de/
    language: eng
    dates:
      begin: 2023-04-17
      end: 2023-04-19
      default: 2023-04-17
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Yerevan 2023
    dir: pydata-yerevan-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pJKftCB2BtalTDE-2xS20g
    language: eng
    dates:
      begin: 2023-10-23
      end: 2024-11-07
      default: 2023-10-23
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData TrÃ³jmiasto 2023
    dir: pydata-trojmiasto-2023
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qS0mI7s9tpXnS-XV5l_Ibs
    related_urls:
      - label: Conference Website
        url: https://www.meetup.com/pl-PL/pydata-trojmiasto/
    language: eng
    dates:
      begin: 2023-10-24
      end: 2023-10-24
      default: 2023-10-24
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Tel Avid 2022
    dir: pydata-tel-avid-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0p6o_fjjdNPqy1rps49z2S0
    related_urls:
      - label: Conference Website
        url: https://pydata.org/telaviv2022/
    language: eng
    dates:
      begin: 2022-12-13
      end: 2022-12-13
      default: 2022-12-13
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Eindhoven 2022
    dir: pydata-eindhoven-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pI60MsrFpHcII1qWm7drmZ
    related_urls:
      - label: Conference Website
        url: https://pydata.org/eindhoven2022/
    language: eng
    dates:
      begin: 2022-12-02
      end: 2022-12-02
      default: 2022-12-02
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Global 2022
    dir: pydata-global-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qgYUt0yn7F80wmzCnj2dEq
    related_urls:
      - label: Conference Website
        url: https://pydata.org/global2022/
    language: eng
    dates:
      begin: 2022-12-01
      end: 2022-12-03
      default: 2022-12-01
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData New York City 2022
    dir: pydata-new-york-city-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0opPc5-dp6ZDCFvOqDBlUuv
    related_urls:
      - label: Conference Website
        url: https://pydata.org/nyc2022/
    language: eng
    dates:
      begin: 2022-11-09
      end: 2022-11-11
      default: 2022-11-09
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Yerevan 2022
    dir: pydata-yerevan-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qWwVVDmdOw6oxAlqqH8Ca-
    related_urls:
      - label: Conference Website
        url: https://pydata.org/yerevan2022/
    language: eng
    dates:
      begin: 2022-08-12
      end: 2022-08-13
      default: 2022-08-12
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData London 2022
    dir: pydata-london-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qT0MXnci7VBSF-U-0WaQ-w
    related_urls:
      - label: Conference Website
        url: https://pydata.org/london2022/
    language: eng
    dates:
      begin: 2022-06-17
      end: 2022-06-19
      default: 2022-06-17
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Berlin 2022
    dir: pydata-berlin-2022
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0p0Fal8_YKg6fPXnf3iPtwD
    related_urls:
      - label: Conference Website
        url: https://2022.pycon.de/
    language: eng
    dates:
      begin: 2022-04-11
      end: 2022-04-13
      default: 2022-04-11
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Global 2021
    dir: pydata-global-2021
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rHb3JXG6puQnUAclFFZMlh
    related_urls:
      - label: Conference Website
        url: https://pydata.org/global2021/
    language: eng
    dates:
      begin: 2021-10-28
      end: 2021-10-30
      default: 2021-10-28
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Eindhoven 2021
    dir: pydata-eindhoven-2021
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rBKcoKoaWJiMrDGdNr2_S0
    related_urls:
      - label: Conference Website
        url: https://pydata.org/eindhoven2021/
    language: eng
    dates:
      begin: 2021-11-12
      end: 2021-11-12
      default: 2021-11-12
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Global 2020
    dir: pydata-global-2020
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0r0eC9BnITmYJ786p9Y1Q8D
    related_urls:
      - label: Conference Website
        url: https://pydataglobal.github.io/
    language: eng
    dates:
      begin: 2020-11-11
      end: 2020-11-15
      default: 2020-11-11
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Eindhoven 2020
    dir: pydata-eindhoven-2020
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qpKjuGgNOgtOxIuATvnqEr
    related_urls:
      - label: Conference Website
        url: https://pydata.org/eindhoven2020/schedule/
    language: eng
    dates:
      begin: 2020-10-07
      end: 2020-10-09
      default: 2020-10-07
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Amsterdam 2020
    dir: pydata-amsterdam-2020
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0oX4ymLgldSvpfiZj-S8-fH
    related_urls:
      - label: Conference Website
        url: https://datasciencedistrict.nl/pydata-festival-amsterda/
    language: eng
    dates:
      begin: 2020-06-15
      end: 2020-06-20
      default: 2020-06-15
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData South Africa 2018
    dir: pydata-south-africa-2018
    youtube_list:
      - https://www.youtube.com/watch?v=Lvw3Lp3KrTM&list=PLGjWYNrNnSuc78h5x23A5mLAzWlCl9LGf
    related_urls:
      - label: Conference Website
        url: https://2018.za.pycon.org/
    language: eng
    dates:
      begin: 2018-10-11
      end: 2018-10-12
      default: 2018-10-11
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

  - title: PyData Hamburg 2021
    dir: pydata-hamburg-2021
    youtube_list:
      - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qbRG8gBRkosFfhWrObasQF
    related_urls:
      - label: Conference Website
        url: https://www.meetup.com/pydata-hamburg/
    language: eng
    dates:
      begin: 2020-11-03
      end: 2021-03-03
      default: 2021-03-03
    minimal_download: false
    issue: xxx
    overwrite:
      # all: true # takes precedence over add_new_files and existing_files_fields
      add_new_files: true
      existing_files_fields:
        - duration
        - thumbnail_url
        - videos
        - description
        - language
        - recorded
        - related_urls
        - speakers
        - tags
        - title
    tags:

# ISO_639-3 language codes https://en.wikipedia.org/wiki/ISO_639-3

# languages = {
#     'ita': 'Italian',
#     'zho': 'Chinese',
#     'por': 'Portuguese',
#     'ukr': 'Ukrainian',
#     'deu': 'German',
#     'eng': 'English',
#     'rus': 'Russian',
#     'fra': 'French',
#     'spa': 'Spanish',
#     'eus': 'Basque',
#     'cat': 'Catalan',
#     'glg': 'Galician',
#     'kor': 'Korean',
#     'lit': 'Lithuanian',
#     'jpn': 'Japanese',
#     'ces': 'Czech',
#     'pol': 'Polish',
#     'heb': 'Hebrew',
#     'tha': 'Thai',
# }

~~~

Scraped with [pyvideo_scrape](https://github.com/pyvideo/pyvideo_scrape)
---
 pydata-yerevan-2023/category.json             |  3 ++
 ...impactful-dashboards-for-your-clients.json | 39 ++++++++++++++
 ...akov-revolutionizing-cancer-treatment.json | 39 ++++++++++++++
 ...e-learning-approaches-in-neuroscience.json | 39 ++++++++++++++
 ...manov-performance-of-vector-databases.json | 43 ++++++++++++++++
 ...data-warehouse-in-your-python-process.json | 39 ++++++++++++++
 ...or-building-large-language-model-apps.json | 47 +++++++++++++++++
 ...th-reasoning-without-proprietary-data.json | 43 ++++++++++++++++
 ...e-amount-of-boilerplate-code-required.json | 43 ++++++++++++++++
 ...pervised-learning-for-computer-vision.json | 39 ++++++++++++++
 ...hulevich-llm-generated-text-detection.json | 51 +++++++++++++++++++
 11 files changed, 425 insertions(+)
 create mode 100644 pydata-yerevan-2023/category.json
 create mode 100644 pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
 create mode 100644 pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
 create mode 100644 pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
 create mode 100644 pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
 create mode 100644 pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
 create mode 100644 pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
 create mode 100644 pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
 create mode 100644 pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
 create mode 100644 pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
 create mode 100644 pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json

diff --git a/pydata-yerevan-2023/category.json b/pydata-yerevan-2023/category.json
new file mode 100644
index 000000000..0038b9eb3
--- /dev/null
+++ b/pydata-yerevan-2023/category.json
@@ -0,0 +1,3 @@
+{
+  "title": "PyData Yerevan 2023"
+}
diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
new file mode 100644
index 000000000..54f6a67b9
--- /dev/null
+++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
@@ -0,0 +1,39 @@
+{
+  "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 3089,
+  "language": "eng",
+  "recorded": "2024-05-22",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/DZjCrLJ1xlk/maxresdefault.jpg",
+  "title": "Adam Kulidjian - Crafting Impactful Dashboards for Your Clients",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=DZjCrLJ1xlk"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
new file mode 100644
index 000000000..6695989b3
--- /dev/null
+++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
@@ -0,0 +1,39 @@
+{
+  "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on  \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL).  Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 6191,
+  "language": "eng",
+  "recorded": "2024-11-07",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/Xik80kYLD5c/maxresdefault.jpg",
+  "title": "Aleksandr Sarachakov - Revolutionizing Cancer Treatment",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=Xik80kYLD5c"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
new file mode 100644
index 000000000..cd95f35d6
--- /dev/null
+++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
@@ -0,0 +1,39 @@
+{
+  "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 3697,
+  "language": "eng",
+  "recorded": "2024-02-28",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/i-8IeS9N7wA/maxresdefault.jpg",
+  "title": "Aleksei Gorin - Machine Learning Approaches in Neuroscience",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=i-8IeS9N7wA"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
new file mode 100644
index 000000000..afc54cfdd
--- /dev/null
+++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
@@ -0,0 +1,43 @@
+{
+  "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n \n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 2588,
+  "language": "eng",
+  "recorded": "2024-01-22",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    },
+    {
+      "label": "https://shorturl.at/CGKL1",
+      "url": "https://shorturl.at/CGKL1"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/-MYYB0QjV6I/maxresdefault.jpg",
+  "title": "Egor Romanov - Performance of Vector Databases",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=-MYYB0QjV6I"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
new file mode 100644
index 000000000..3ad0856f9
--- /dev/null
+++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
@@ -0,0 +1,39 @@
+{
+  "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 3326,
+  "language": "eng",
+  "recorded": "2023-10-23",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/q_SKaOeRiOI/maxresdefault.jpg",
+  "title": "G\u00e1bor Sz\u00e1rnyas - DuckDB: The Power of a Data Warehouse in your Python Process",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=q_SKaOeRiOI"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
new file mode 100644
index 000000000..8cf437081
--- /dev/null
+++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
@@ -0,0 +1,47 @@
+{
+  "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n \nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 4235,
+  "language": "eng",
+  "recorded": "2024-04-12",
+  "related_urls": [
+    {
+      "label": "https://shorturl.at/ciovF",
+      "url": "https://shorturl.at/ciovF"
+    },
+    {
+      "label": "https://github.com/kajarenc/PyData-March-Langchain",
+      "url": "https://github.com/kajarenc/PyData-March-Langchain"
+    },
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/YNixBsPt7Ds/maxresdefault.jpg",
+  "title": "Gor Hayrapetyan & Karen Javadyan - Langchain: A Framework for Building Large Language Model Apps",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=YNixBsPt7Ds"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
new file mode 100644
index 000000000..5520b6746
--- /dev/null
+++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
@@ -0,0 +1,43 @@
+{
+  "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 3671,
+  "language": "eng",
+  "recorded": "2024-07-24",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    },
+    {
+      "label": "https://shorturl.at/GRUFi",
+      "url": "https://shorturl.at/GRUFi"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/prPLAxYF1bU/maxresdefault.jpg",
+  "title": "Ivan Moshkov & Daria Gitman - How to Build an LLM for Math Reasoning without Proprietary Data?",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=prPLAxYF1bU"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
new file mode 100644
index 000000000..a25a62e6d
--- /dev/null
+++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
@@ -0,0 +1,43 @@
+{
+  "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 2296,
+  "language": "eng",
+  "recorded": "2023-11-29",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    },
+    {
+      "label": "https://lnkd.in/eHE-KjNC",
+      "url": "https://lnkd.in/eHE-KjNC"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/3L80rxyofFU/maxresdefault.jpg",
+  "title": "Nikolay Karpov - How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=3L80rxyofFU"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
new file mode 100644
index 000000000..ec0f9efb5
--- /dev/null
+++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
@@ -0,0 +1,39 @@
+{
+  "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 2185,
+  "language": "eng",
+  "recorded": "2023-12-16",
+  "related_urls": [
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/vICrdSUS9vg/maxresdefault.jpg",
+  "title": "Vahan Huroyan - Recent Developments in Self-Supervised Learning for Computer Vision",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=vICrdSUS9vg"
+    }
+  ]
+}
diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
new file mode 100644
index 000000000..2a6e51587
--- /dev/null
+++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
@@ -0,0 +1,51 @@
+{
+  "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "duration": 3116,
+  "language": "eng",
+  "recorded": "2024-04-27",
+  "related_urls": [
+    {
+      "label": "https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector",
+      "url": "https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector"
+    },
+    {
+      "label": "https://shorturl.at/fhmpv",
+      "url": "https://shorturl.at/fhmpv"
+    },
+    {
+      "label": "https://github.com/numfocus/YouTubeVideoTimestamps",
+      "url": "https://github.com/numfocus/YouTubeVideoTimestamps"
+    },
+    {
+      "label": "https://github.com/superannotateai/generated_text_detector",
+      "url": "https://github.com/superannotateai/generated_text_detector"
+    }
+  ],
+  "speakers": [
+    "TODO"
+  ],
+  "tags": [
+    "Education",
+    "Julia",
+    "NumFOCUS",
+    "Opensource",
+    "PyData",
+    "Python",
+    "Tutorial",
+    "coding",
+    "how to program",
+    "learn",
+    "learn to code",
+    "python 3",
+    "scientific programming",
+    "software"
+  ],
+  "thumbnail_url": "https://i.ytimg.com/vi/hJ4NjViBF98/maxresdefault.jpg",
+  "title": "Yuri Orshulevich - LLM-Generated Text Detection",
+  "videos": [
+    {
+      "type": "youtube",
+      "url": "https://www.youtube.com/watch?v=hJ4NjViBF98"
+    }
+  ]
+}

From 61bb515bc56f4cde65593e4df99cab4a1d08b75d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?=
 <14986783+ELC@users.noreply.github.com>
Date: Tue, 24 Jun 2025 18:13:36 +0000
Subject: [PATCH 2/3] Add speaker information

---
 ...impactful-dashboards-for-your-clients.json | 21 +++---------------
 ...akov-revolutionizing-cancer-treatment.json | 21 +++---------------
 ...e-learning-approaches-in-neuroscience.json | 21 +++---------------
 ...manov-performance-of-vector-databases.json | 21 +++---------------
 ...data-warehouse-in-your-python-process.json | 21 +++---------------
 ...or-building-large-language-model-apps.json | 22 ++++---------------
 ...th-reasoning-without-proprietary-data.json | 22 ++++---------------
 ...e-amount-of-boilerplate-code-required.json | 21 +++---------------
 ...pervised-learning-for-computer-vision.json | 21 +++---------------
 ...hulevich-llm-generated-text-detection.json | 21 +++---------------
 10 files changed, 32 insertions(+), 180 deletions(-)

diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
index 54f6a67b9..2245535a4 100644
--- a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
+++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
@@ -10,26 +10,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Adam Kulidjian"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/DZjCrLJ1xlk/maxresdefault.jpg",
-  "title": "Adam Kulidjian - Crafting Impactful Dashboards for Your Clients",
+  "title": "Crafting Impactful Dashboards for Your Clients",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
index 6695989b3..b4143e244 100644
--- a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
+++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
@@ -10,26 +10,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Aleksandr Sarachakov"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/Xik80kYLD5c/maxresdefault.jpg",
-  "title": "Aleksandr Sarachakov - Revolutionizing Cancer Treatment",
+  "title": "Revolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
index cd95f35d6..56894d3c3 100644
--- a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
+++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
@@ -10,26 +10,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Aleksei Gorin"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/i-8IeS9N7wA/maxresdefault.jpg",
-  "title": "Aleksei Gorin - Machine Learning Approaches in Neuroscience",
+  "title": "Machine Learning Approaches in Neuroscience",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
index afc54cfdd..714d7b741 100644
--- a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
+++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
@@ -14,26 +14,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Egor Romanov"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/-MYYB0QjV6I/maxresdefault.jpg",
-  "title": "Egor Romanov - Performance of Vector Databases",
+  "title": "Performance of Vector Databases",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
index 3ad0856f9..c0f1a813e 100644
--- a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
+++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
@@ -10,26 +10,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Gábor Szárnyas"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/q_SKaOeRiOI/maxresdefault.jpg",
-  "title": "G\u00e1bor Sz\u00e1rnyas - DuckDB: The Power of a Data Warehouse in your Python Process",
+  "title": "DuckDB: The Power of a Data Warehouse in your Python Process",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
index 8cf437081..da9c0ec6c 100644
--- a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
+++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
@@ -18,26 +18,12 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Gor Hayrapetyan",
+    "Karen Javadyan"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/YNixBsPt7Ds/maxresdefault.jpg",
-  "title": "Gor Hayrapetyan & Karen Javadyan - Langchain: A Framework for Building Large Language Model Apps",
+  "title": "Langchain: A Framework for Building Large Language Model Apps",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
index 5520b6746..8caac471b 100644
--- a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
+++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
@@ -14,26 +14,12 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Ivan Moshkov",
+    "Daria Gitman"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/prPLAxYF1bU/maxresdefault.jpg",
-  "title": "Ivan Moshkov & Daria Gitman - How to Build an LLM for Math Reasoning without Proprietary Data?",
+  "title": "How to Build an LLM for Math Reasoning without Proprietary Data?",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
index a25a62e6d..cb935ac1d 100644
--- a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
+++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
@@ -14,26 +14,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Nikolay Karpov"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/3L80rxyofFU/maxresdefault.jpg",
-  "title": "Nikolay Karpov - How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?",
+  "title": "How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
index ec0f9efb5..0b55798c5 100644
--- a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
+++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
@@ -10,26 +10,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Vahan Huroyan"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/vICrdSUS9vg/maxresdefault.jpg",
-  "title": "Vahan Huroyan - Recent Developments in Self-Supervised Learning for Computer Vision",
+  "title": "Recent Developments in Self-Supervised Learning for Computer Vision",
   "videos": [
     {
       "type": "youtube",
diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
index 2a6e51587..3a8209e1f 100644
--- a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
+++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
@@ -22,26 +22,11 @@
     }
   ],
   "speakers": [
-    "TODO"
-  ],
-  "tags": [
-    "Education",
-    "Julia",
-    "NumFOCUS",
-    "Opensource",
-    "PyData",
-    "Python",
-    "Tutorial",
-    "coding",
-    "how to program",
-    "learn",
-    "learn to code",
-    "python 3",
-    "scientific programming",
-    "software"
+    "Yuri Orshulevich"
   ],
+  "tags": [],
   "thumbnail_url": "https://i.ytimg.com/vi/hJ4NjViBF98/maxresdefault.jpg",
-  "title": "Yuri Orshulevich - LLM-Generated Text Detection",
+  "title": "LLM-Generated Text Detection",
   "videos": [
     {
       "type": "youtube",

From 8f1921959052f4e8933a79f88e052ed85d843fba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?=
 <14986783+ELC@users.noreply.github.com>
Date: Tue, 24 Jun 2025 18:17:48 +0000
Subject: [PATCH 3/3] Fix conflicting characters

---
 ...ulidjian-crafting-impactful-dashboards-for-your-clients.json | 2 +-
 .../aleksandr-sarachakov-revolutionizing-cancer-treatment.json  | 2 +-
 ...eksei-gorin-machine-learning-approaches-in-neuroscience.json | 2 +-
 .../videos/egor-romanov-performance-of-vector-databases.json    | 2 +-
 ...db-the-power-of-a-data-warehouse-in-your-python-process.json | 2 +-
 ...hain-a-framework-for-building-large-language-model-apps.json | 2 +-
 ...uild-an-llm-for-math-reasoning-without-proprietary-data.json | 2 +-
 ...ataset-minimize-the-amount-of-boilerplate-code-required.json | 2 +-
 ...opments-in-self-supervised-learning-for-computer-vision.json | 2 +-
 .../videos/yuri-orshulevich-llm-generated-text-detection.json   | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
index 2245535a4..1673221e5 100644
--- a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
+++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json
@@ -1,5 +1,5 @@
 {
-  "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 3089,
   "language": "eng",
   "recorded": "2024-05-22",
diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
index b4143e244..d71ed03a5 100644
--- a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
+++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json
@@ -1,5 +1,5 @@
 {
-  "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on  \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL).  Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on  \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL).  Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 6191,
   "language": "eng",
   "recorded": "2024-11-07",
diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
index 56894d3c3..e35f90a2a 100644
--- a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
+++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json
@@ -1,5 +1,5 @@
 {
-  "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 3697,
   "language": "eng",
   "recorded": "2024-02-28",
diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
index 714d7b741..b68f35e44 100644
--- a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
+++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json
@@ -1,5 +1,5 @@
 {
-  "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n \n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n\n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 2588,
   "language": "eng",
   "recorded": "2024-01-22",
diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
index c0f1a813e..4ada29d98 100644
--- a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
+++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json
@@ -1,5 +1,5 @@
 {
-  "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 3326,
   "language": "eng",
   "recorded": "2023-10-23",
diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
index da9c0ec6c..4541617ac 100644
--- a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
+++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json
@@ -1,5 +1,5 @@
 {
-  "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n \nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n\nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 4235,
   "language": "eng",
   "recorded": "2024-04-12",
diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
index 8caac471b..66465a45f 100644
--- a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
+++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json
@@ -1,5 +1,5 @@
 {
-  "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 3671,
   "language": "eng",
   "recorded": "2024-07-24",
diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
index cb935ac1d..6b1252eca 100644
--- a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
+++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json
@@ -1,5 +1,5 @@
 {
-  "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 2296,
   "language": "eng",
   "recorded": "2023-11-29",
diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
index 0b55798c5..55d5d2f29 100644
--- a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
+++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json
@@ -1,5 +1,5 @@
 {
-  "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 2185,
   "language": "eng",
   "recorded": "2023-12-16",
diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
index 3a8209e1f..57ce97b4d 100644
--- a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
+++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json
@@ -1,5 +1,5 @@
 {
-  "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n--\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
+  "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps",
   "duration": 3116,
   "language": "eng",
   "recorded": "2024-04-27",