From a446a6f82f78fc70702cef6b5c10bcf33a15d5fc Mon Sep 17 00:00:00 2001
From: Mike7R <mice7r@gmail.com>
Date: Fri, 29 Jan 2016 01:55:28 +0100
Subject: [PATCH 1/2] Fixed regular expresions

---
 regex.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/regex.py b/regex.py
index e8eb2bb..d4c7337 100644
--- a/regex.py
+++ b/regex.py
@@ -27,7 +27,7 @@
 PROXY_HTML = re.compile(r'<tr class=".*?</tr>', flags=re.DOTALL)
 
 # This regex corresponds to the HTML code containing the IP:port of a proxy
-IP_PORT_HTML = re.compile(r'<td><span><style>.*?</td>\s*<td>.*?</td>',
+IP_PORT_HTML = re.compile(r'<td>\s*?<span>\s*?<style>.*?</td>\s*<td>.*?</td>',
 		flags=re.DOTALL)
 
 # This regex is used to find the class which won't be displayed in the IP:port
@@ -48,7 +48,7 @@
 
 # This regex is used to recover the HTML code containing the country in the
 # proxy HTML code
-COUNTRY_HTML = re.compile(r'<span class="country".*?>.*?</span>',
+COUNTRY_HTML = re.compile(r'class="country".*?\/>.*?</span>',
         re.DOTALL)
 
 # This regex is used to recover the country
@@ -70,5 +70,5 @@
 
 # This regex is used to recover the type and anonymity level in the proxy
 # HTML code
-TYPE_ANONYMITY = re.compile(r'<td>(.*?)</td>\s*<td.*?>(.*)</td>')
+TYPE_ANONYMITY = re.compile(r'<td>\s*(.*)\s*</td>\s*<td.*?>\s*(.*)</td>\s*</tr>')
 

From aeaab8ceace8fc2a0211c6b99b8e112c4f805578 Mon Sep 17 00:00:00 2001
From: Mike7R <mice7r@gmail.com>
Date: Fri, 29 Jan 2016 02:15:37 +0100
Subject: [PATCH 2/2] and this the whitespaces

---
 parser.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/parser.py b/parser.py
index d88ca6b..530d5d5 100644
--- a/parser.py
+++ b/parser.py
@@ -60,7 +60,7 @@ def parse_proxy(proxy_html):
     # We get the chunk of code corresponding to the country...
     country_html = regex.COUNTRY_HTML.search(proxy_html).group(0)
     # ...and we parse it
-    country = regex.COUNTRY.search(country_html).group(1)
+    country = regex.COUNTRY.search(country_html).group(1).strip()
 
     # We get the chunk of code corresponding to the speed...
     speed_html = regex.SPEED_HTML.search(proxy_html).group(1)
@@ -75,8 +75,8 @@ def parse_proxy(proxy_html):
     # We get the chunk of code corresponding to the type and anonymity...
     match = regex.TYPE_ANONYMITY.search(proxy_html)
     # ...and we parse it
-    type = match.group(1)
-    anonymity = match.group(2)
+    type = match.group(1).strip()
+    anonymity = match.group(2).strip()
 
     # We return a tuple
     return ip, int(port), type, country, anonymity, speed, connection_time