Skip to content

Commit ebdb87a

Browse files
committed
Clase 10 - Reto
1 parent ea52a88 commit ebdb87a

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

src/platzi_news/analysis/analyzer.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
from __future__ import annotations
44

5+
import itertools
56
import json
67
import logging
8+
from collections import defaultdict
79

810
from openai import OpenAI
911

@@ -79,7 +81,7 @@ def analyze(self, articles: list[Article], question: str) -> str:
7981
content = response.choices[0].message.content
8082
if content is None:
8183
raise AnalysisError("OpenAI returned empty response")
82-
answer = content.strip()
84+
answer: str = content.strip()
8385
logger.info("Successfully received analysis from OpenAI")
8486
return answer
8587
except Exception as e:
@@ -130,3 +132,22 @@ def find_duplicate_titles(articles: list[Article]) -> list[tuple[Article, Articl
130132
if articles[i].title == articles[j].title:
131133
duplicates.append((articles[i], articles[j]))
132134
return duplicates
135+
136+
137+
def find_duplicate_titles_improved(
138+
articles: list[Article],
139+
) -> list[tuple[Article, Article]]:
140+
"""Find articles with duplicate titles using efficient dictionary-based approach."""
141+
142+
title_to_articles = defaultdict(list)
143+
for article in articles:
144+
title_to_articles[article.title].append(article)
145+
146+
duplicates = []
147+
for articles_with_same_title in title_to_articles.values():
148+
if len(articles_with_same_title) > 1:
149+
# Generate all unique pairs for this title
150+
for pair in itertools.combinations(articles_with_same_title, 2):
151+
duplicates.append(pair)
152+
153+
return duplicates

src/timeit_platzi_news.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import timeit
2+
3+
from platzi_news.analysis.analyzer import (
4+
find_duplicate_titles,
5+
find_duplicate_titles_improved,
6+
)
7+
from platzi_news.core.models import Article
8+
9+
10+
def create_test_articles(n: int) -> list[Article]:
11+
"""Create a list of articles with some duplicate titles for testing."""
12+
return [
13+
Article(
14+
title=f"Title {i % (n // 10) if n > 10 else i}",
15+
description=f"Description for article {i}",
16+
url=f"https://example.com/article/{i}",
17+
)
18+
for i in range(n)
19+
]
20+
21+
22+
def test_performance() -> None:
23+
"""Test and display performance comparison
24+
between original and improved find_duplicate_titles."""
25+
sizes = [100, 200, 2000, 20000]
26+
27+
print("Comparison: find_duplicate_titles vs find_duplicate_titles_improved")
28+
print("Size\tOriginal (O(n^2))\tImproved (O(n))\tSpeedup\tDuplicates")
29+
print("-" * 80)
30+
31+
for size in sizes:
32+
articles = create_test_articles(size)
33+
34+
time_original = timeit.timeit(
35+
lambda: find_duplicate_titles(articles),
36+
number=1,
37+
)
38+
time_improved = timeit.timeit(
39+
lambda: find_duplicate_titles_improved(articles),
40+
number=1,
41+
)
42+
43+
# Get duplicates count (should be same for both)
44+
duplicates = find_duplicate_titles(articles)
45+
speedup = time_original / time_improved if time_improved > 0 else float("inf")
46+
47+
print(
48+
f"{size}\t{time_original:.6f}\t\t{time_improved:.6f}\t{speedup:.1f}x\t{len(duplicates)}"
49+
)
50+
51+
52+
if __name__ == "__main__":
53+
test_performance()

0 commit comments

Comments
 (0)