diff --git a/pydata-yerevan-2023/category.json b/pydata-yerevan-2023/category.json new file mode 100644 index 000000000..0038b9eb3 --- /dev/null +++ b/pydata-yerevan-2023/category.json @@ -0,0 +1,3 @@ +{ + "title": "PyData Yerevan 2023" +} diff --git a/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json new file mode 100644 index 000000000..1673221e5 --- /dev/null +++ b/pydata-yerevan-2023/videos/adam-kulidjian-crafting-impactful-dashboards-for-your-clients.json @@ -0,0 +1,24 @@ +{ + "description": "Adam Kulidjian, Chief Technology Officer at Zyphr Solutions Inc., provides a talk on \u201cCrafting Impactful Dashboards for Your Clients.\u201d\n\nCommunicating trends, patterns, and insights through data is integral to understanding the world quantitatively. This phenomenon is used in data science, business intelligence, data analytics, and generally across all scientific disciplines.\n\nA dashboard, particularly one that houses data visualization, is the most common way to do it. With the increased accessibility of dashboard creation tools to people using data, there is a need to effectively communicate data, tell compelling stories, and create affordances that allow others to explore the data themselves.\n\nThe talk offers a handful of heuristics and pragmatic questions that will help you build a better dashboard, regardless of your clients, industry, or use case.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3089, + "language": "eng", + "recorded": "2024-05-22", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Adam Kulidjian" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/DZjCrLJ1xlk/maxresdefault.jpg", + "title": "Crafting Impactful Dashboards for Your Clients", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=DZjCrLJ1xlk" + } + ] +} diff --git a/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json new file mode 100644 index 000000000..d71ed03a5 --- /dev/null +++ b/pydata-yerevan-2023/videos/aleksandr-sarachakov-revolutionizing-cancer-treatment.json @@ -0,0 +1,24 @@ +{ + "description": "Aleksandr Sarachakov, Biomedical Imaging Team Lead at BostonGene, provides a talk on \u201cRevolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging.\u201d \n\nZarr and AnnData, Python-based technologies, are revolutionizing the landscape of biomedical image processing, especially when paired with self-supervised learning (SSL). Zarr, a chunked and compressed data storage format, enables the efficient handling of datasets found in biomedical applications. AnnData, a specialized framework for multi-dimensional annotated data, is crucial in managing and analyzing large-scale biomedical datasets.\n\nIn the context of SSL, these technologies boost the processing speed and reduce the computational load for handling high-resolution images and complex datasets. Zarr's ability to store multi-terabyte data in distributed and parallelized environments allows for faster processing and analysis of biomedical images. AnnData complements this by providing structured, annotated data that SSL models can efficiently learn from without extensive labeling. This combination reduces memory usage, making it feasible to handle biomedical images on a large scale. These advancements are pivotal for applications like cancer diagnosis, where rapid, accurate image analysis is critical.\n\nDuring the talk, our speaker explores:\n- how Zarr and AnnData facilitate scalable biomedical image processing, \n- outline their integration with SSL for cutting-edge research, \n- and discuss future developments in optimizing biomedical workflows.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 6191, + "language": "eng", + "recorded": "2024-11-07", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Aleksandr Sarachakov" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Xik80kYLD5c/maxresdefault.jpg", + "title": "Revolutionizing Cancer Treatment: Harnessing AI, Zarr, and AnnData for High-Speed Biomedical Imaging", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Xik80kYLD5c" + } + ] +} diff --git a/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json new file mode 100644 index 000000000..e35f90a2a --- /dev/null +++ b/pydata-yerevan-2023/videos/aleksei-gorin-machine-learning-approaches-in-neuroscience.json @@ -0,0 +1,24 @@ +{ + "description": "Dr. Aleksei Gorin, a Neurobiologist and Senior Scientist at Emonomy, provides a talk on \u201cMachine Learning Approaches in Neuroscience.\u201d\n\nAlong with the growth of artificial intelligence and machine learning methodologies, neurobiologists are adopting modern machine learning techniques to tackle a broad spectrum of challenges. Those range from early disease diagnosis to the development of software capable of modeling behavior and natural neural networks.\n\nDuring the talk, Dr. Gorin explores the latest endeavors for integrating machine learning into neuroscience while:\n-discussing the achieved outcomes and their implications for the evolution of brain science methodologies,\n-examining key libraries in computational neuroscience, their role, and offering solutions in data analysis processes.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3697, + "language": "eng", + "recorded": "2024-02-28", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Aleksei Gorin" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/i-8IeS9N7wA/maxresdefault.jpg", + "title": "Machine Learning Approaches in Neuroscience", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=i-8IeS9N7wA" + } + ] +} diff --git a/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json new file mode 100644 index 000000000..b68f35e44 --- /dev/null +++ b/pydata-yerevan-2023/videos/egor-romanov-performance-of-vector-databases.json @@ -0,0 +1,28 @@ +{ + "description": "Egor Romanov, Software Engineer at Supabase, provides a talk on \u201cPerformance of Vector Databases.\u201d\n\nThe talk delves into vector databases' performance, challenges, and potentialities and discovers their role in advancing AI applications like Retrieval-Augmented Generation (RAG).\n\nHigh-dimensional embeddings are integral to numerous machine learning applications, transforming raw data into compact representations for diverse algorithms. Vector databases are essential in managing and utilizing these vectors. \n\nTheir main purpose includes aiding operations, including distance computations, similarity evaluations, and nearest-neighbor searches within high-dimensional spaces. RAG leverages these embedding stores, unlocking significant potentialities in the AI domain. \n\nDuring the talk, Egor Romanov:\n\n- explores the process of creating a provider for Postgres, integrated with pgvector, within a Python performance evaluation framework,\n- conducts a similarity search test simulation showcasing the latent performance potential.\n\nAccess the talk notes at: https://shorturl.at/CGKL1\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2588, + "language": "eng", + "recorded": "2024-01-22", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://shorturl.at/CGKL1", + "url": "https://shorturl.at/CGKL1" + } + ], + "speakers": [ + "Egor Romanov" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/-MYYB0QjV6I/maxresdefault.jpg", + "title": "Performance of Vector Databases", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-MYYB0QjV6I" + } + ] +} diff --git a/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json new file mode 100644 index 000000000..4ada29d98 --- /dev/null +++ b/pydata-yerevan-2023/videos/gabor-szarnyas-duckdb-the-power-of-a-data-warehouse-in-your-python-process.json @@ -0,0 +1,24 @@ +{ + "description": "G\u00e1bor Sz\u00e1rnyas, a Developer Relations Advocate and Technical Writer at DuckDB Labs, provides a talk on \u201cDuckDB: The Power of a Data Warehouse in your Python Process.\u201d\n\nDuckDB is an in-process analytical database management system, a powerful data warehouse engine running inside the Python process without any setup or communication overhead.\n\nIt is an open-source and highly portable system available as a command line tool with R, NodeJS, and Julia clients;\n- which loads data from many formats, such as CSV and Parquet, as well as pandas data frames,\n-its speed and features allow it to tackle a remarkable number of use cases in data science \u2013 including data wrangling and running complex ad-hoc SQL queries \u2013 while running on a laptop.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3326, + "language": "eng", + "recorded": "2023-10-23", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Gábor Szárnyas" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/q_SKaOeRiOI/maxresdefault.jpg", + "title": "DuckDB: The Power of a Data Warehouse in your Python Process", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=q_SKaOeRiOI" + } + ] +} diff --git a/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json new file mode 100644 index 000000000..4541617ac --- /dev/null +++ b/pydata-yerevan-2023/videos/gor-hayrapetyan-karen-javadyan-langchain-a-framework-for-building-large-language-model-apps.json @@ -0,0 +1,33 @@ +{ + "description": "Gor Hayrapetyan, Lead/Senior Data Engineer at Microsoft, Estonia, and Karen Javadyan, Software Engineer at Snowflake, provide a talk on \u201cLangchain: A Framework for Building Large Language Model Apps.\u201d\n\nThe landscape of Large Language Models (LLM) and the libraries supporting them has recently had rapid evolution.\n\nDuring the talk, you will get a brief introduction to LLMs and learn about the current framework of LLM applications. Following this, you will discover Langchain features and concepts, including:\n- Integrations with different LLM models,\n- Chains,\n- Retrievers, \n- Tools,\n- Agents.\n\nTo put Langchain usage into perspective, the talk will also reflect on the RAG technique to expose LLM to your data.\n\nGitHub Repo: https://github.com/kajarenc/PyData-March-Langchain\n\nSlides: https://shorturl.at/ciovF\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 4235, + "language": "eng", + "recorded": "2024-04-12", + "related_urls": [ + { + "label": "https://shorturl.at/ciovF", + "url": "https://shorturl.at/ciovF" + }, + { + "label": "https://github.com/kajarenc/PyData-March-Langchain", + "url": "https://github.com/kajarenc/PyData-March-Langchain" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Gor Hayrapetyan", + "Karen Javadyan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/YNixBsPt7Ds/maxresdefault.jpg", + "title": "Langchain: A Framework for Building Large Language Model Apps", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YNixBsPt7Ds" + } + ] +} diff --git a/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json new file mode 100644 index 000000000..66465a45f --- /dev/null +++ b/pydata-yerevan-2023/videos/ivan-moshkov-daria-gitman-how-to-build-an-llm-for-math-reasoning-without-proprietary-data.json @@ -0,0 +1,29 @@ +{ + "description": "Ivan Moshkov, Deep Learning Engineer at NVIDIA, and Daria Gitman, Conversational AI Research Intern at NVIDIA provide a talk on \"How to Build an LLM for Math Reasoning without Proprietary Data?\"\n\nRecent research has shown the value of synthetically generated datasets in training LLMs to acquire targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA and MAmmoTH rely on outputs from closed-source LLMs that have commercially restrictive licenses. One key reason limiting the use of open-source LLMs in data generation pipelines is the gap in the mathematical skills between the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. \n\nIn their research, Ivan and Daria constructed OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs using recent progress in open-source LLMs, proposed prompting novelty, and brute-force scaling. Their best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a competitive score of 84.6% on GSM8K and 50.7% on MATH, comparable to top GPT-distilled models. \n\nDuring the talk, Ivan introduces the challenge of math reasoning in Natural Language Processing and discusses the process of creating their synthetic dataset. Following this, Daria explores the Data Explorer tool and shares key insights extracted from the data using this tool. \n\nSlides: https://shorturl.at/GRUFi \n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3671, + "language": "eng", + "recorded": "2024-07-24", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://shorturl.at/GRUFi", + "url": "https://shorturl.at/GRUFi" + } + ], + "speakers": [ + "Ivan Moshkov", + "Daria Gitman" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/prPLAxYF1bU/maxresdefault.jpg", + "title": "How to Build an LLM for Math Reasoning without Proprietary Data?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=prPLAxYF1bU" + } + ] +} diff --git a/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json new file mode 100644 index 000000000..6b1252eca --- /dev/null +++ b/pydata-yerevan-2023/videos/nikolay-karpov-how-to-prepare-a-speech-dataset-minimize-the-amount-of-boilerplate-code-required.json @@ -0,0 +1,28 @@ +{ + "description": "Nikolay Karpov, a Senior Research Scientist at NVIDIA NeMo, provides a talk on \u201cHow to prepare a speech dataset and minimize the amount of boilerplate code required?\u201d \n\nProcessing a lot of data for training neural models requires more effort than neural network engineering and training. Nvidia NeMo team has made a Speech Data Processor tool to simplify the process: https://lnkd.in/eHE-KjNC\n\nDuring the talk, you will explore the steps for speech dataset preparation, including:\n\n-Video-to-audio conversion,\n-Metadata parsing,\n-Audio and text language identification,\n-Speech recognition,\n-Text normalization,\n-Filtration by metrics and regular expression.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2296, + "language": "eng", + "recorded": "2023-11-29", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://lnkd.in/eHE-KjNC", + "url": "https://lnkd.in/eHE-KjNC" + } + ], + "speakers": [ + "Nikolay Karpov" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/3L80rxyofFU/maxresdefault.jpg", + "title": "How to Prepare a Speech Dataset & Minimize the Amount of Boilerplate Code Required?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3L80rxyofFU" + } + ] +} diff --git a/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json new file mode 100644 index 000000000..55d5d2f29 --- /dev/null +++ b/pydata-yerevan-2023/videos/vahan-huroyan-recent-developments-in-self-supervised-learning-for-computer-vision.json @@ -0,0 +1,24 @@ +{ + "description": "Vahan Huroyan, a Machine Learning Researcher at the YerevaNN research lab, provides a talk on \u201cRecent Developments in Self-Supervised Learning for Computer Vision.\u201d\n\nDuring the talk, you will discover the latest developments in self-supervised learning (SSL) for computer vision and delve into its challenges and future directions in this field.\n\nSelf-supervised learning (SSL) is a powerful tool for training computer vision models without the need for extensive labeled data. SSL methods are designed around pretext tasks that force models to learn valuable representations from the training data. These learned representations can then be transferred to downstream tasks, such as:\n-image classification, \n-object detection, \n-and segmentation, with minimal fine-tuning\n\nThe two main approaches of SSL include Contrastive learning and Masked image modeling:\n\n1. Contrastive learning is a general SSL framework that trains models to distinguish between positive and negative pairs of images.\n2. Masked image modeling is a specific type of SSL that involves masking random patches of an image and training the model to predict the masked pixels.\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2185, + "language": "eng", + "recorded": "2023-12-16", + "related_urls": [ + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Vahan Huroyan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/vICrdSUS9vg/maxresdefault.jpg", + "title": "Recent Developments in Self-Supervised Learning for Computer Vision", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=vICrdSUS9vg" + } + ] +} diff --git a/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json new file mode 100644 index 000000000..57ce97b4d --- /dev/null +++ b/pydata-yerevan-2023/videos/yuri-orshulevich-llm-generated-text-detection.json @@ -0,0 +1,36 @@ +{ + "description": "Yuri Orshulevich, Senior NLP Engineer at SuperAnnotate, provides a talk on \"LLM-Generated Text Detection.\" \n\nIn the evolving AI and NLP landscape, the rise of Large Language Models (LLMs) has led to a new era of text generation. Identifying machine-generated content presents a significant challenge with the growing complexity of these models.\n\nThe talk emphasizes the importance of distinguishing machine-generated vs human-written text by exploring possible problems and ways to avoid them. \n\nDuring the talk, you will:\n-Explore the significance of text detection in various domains and ongoing efforts, including competitions. \n-Evaluate the current progress in this area, highlighting the apparent simplicity of a task, which in reality is complex. \n-Examine existing open-source solutions, their limitations, and areas for improvement.\n-Be introduced to a high-quality benchmark that provides a clear way to measure performance.\n\nSlides: https://shorturl.at/fhmpv\n\nGitHub repo: https://github.com/superannotateai/generated_text_detector\n\nHuggingFace model: https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector\n-\nwww.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3116, + "language": "eng", + "recorded": "2024-04-27", + "related_urls": [ + { + "label": "https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector", + "url": "https://huggingface.co/SuperAnnotate/roberta-large-llm-content-detector" + }, + { + "label": "https://shorturl.at/fhmpv", + "url": "https://shorturl.at/fhmpv" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/superannotateai/generated_text_detector", + "url": "https://github.com/superannotateai/generated_text_detector" + } + ], + "speakers": [ + "Yuri Orshulevich" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/hJ4NjViBF98/maxresdefault.jpg", + "title": "LLM-Generated Text Detection", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hJ4NjViBF98" + } + ] +}