From d44ef0475675cf03ced41910ff501f8b8ba6bd9f Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 18:33:45 -0500 Subject: [PATCH 1/7] Organize in Python package, add Python 2 compatibility, add setuptools installer, fix utf-8 file encoding. --- README.md | 2 +- ez_setup.py | 364 +++++++++++++++++++++++++++++ hide_my_python.py | 10 +- hmp/__init__.py | 7 + arguments.py => hmp/arguments.py | 9 +- connect.py => hmp/connect.py | 2 +- countries_all => hmp/countries_all | 0 database.py => hmp/database.py | 2 +- parser.py => hmp/parser.py | 4 +- regex.py => hmp/regex.py | 0 setup.py | 72 ++++++ 11 files changed, 462 insertions(+), 10 deletions(-) create mode 100644 ez_setup.py create mode 100644 hmp/__init__.py rename arguments.py => hmp/arguments.py (97%) rename connect.py => hmp/connect.py (99%) rename countries_all => hmp/countries_all (100%) rename database.py => hmp/database.py (98%) rename parser.py => hmp/parser.py (98%) rename regex.py => hmp/regex.py (100%) create mode 100644 setup.py diff --git a/README.md b/README.md index 472c8d5..fba73d6 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ using these proxies. If someone gets pwned, don't look at me. ## REQUIREMENTS -All you need is Python 3, and the Python requests library. +All you need is Python 2 or 3, and the Python requests library. ## USAGE diff --git a/ez_setup.py b/ez_setup.py new file mode 100644 index 0000000..1420c11 --- /dev/null +++ b/ez_setup.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python +"""Bootstrap setuptools installation + +To use setuptools in your package's setup.py, include this +file in the same directory and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +To require a specific version of setuptools, set a download +mirror, or use an alternate download directory, simply supply +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import os +import shutil +import sys +import tempfile +import tarfile +import optparse +import subprocess +import platform +import textwrap + +from distutils import log + +try: + from site import USER_SITE +except ImportError: + USER_SITE = None + +DEFAULT_VERSION = "2.2" +DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/" + +def _python_cmd(*args): + """ + Return True if the command succeeded. + """ + args = (sys.executable,) + args + return subprocess.call(args) == 0 + +def _install(tarball, install_args=()): + # extracting the tarball + tmpdir = tempfile.mkdtemp() + log.warn('Extracting in %s', tmpdir) + old_wd = os.getcwd() + try: + os.chdir(tmpdir) + tar = tarfile.open(tarball) + _extractall(tar) + tar.close() + + # going in the directory + subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) + os.chdir(subdir) + log.warn('Now working in %s', subdir) + + # installing + log.warn('Installing Setuptools') + if not _python_cmd('setup.py', 'install', *install_args): + log.warn('Something went wrong during the installation.') + log.warn('See the error message above.') + # exitcode will be 2 + return 2 + finally: + os.chdir(old_wd) + shutil.rmtree(tmpdir) + + +def _build_egg(egg, tarball, to_dir): + # extracting the tarball + tmpdir = tempfile.mkdtemp() + log.warn('Extracting in %s', tmpdir) + old_wd = os.getcwd() + try: + os.chdir(tmpdir) + tar = tarfile.open(tarball) + _extractall(tar) + tar.close() + + # going in the directory + subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) + os.chdir(subdir) + log.warn('Now working in %s', subdir) + + # building an egg + log.warn('Building a Setuptools egg in %s', to_dir) + _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) + + finally: + os.chdir(old_wd) + shutil.rmtree(tmpdir) + # returning the result + log.warn(egg) + if not os.path.exists(egg): + raise IOError('Could not build the egg.') + + +def _do_download(version, download_base, to_dir, download_delay): + egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg' + % (version, sys.version_info[0], sys.version_info[1])) + if not os.path.exists(egg): + tarball = download_setuptools(version, download_base, + to_dir, download_delay) + _build_egg(egg, tarball, to_dir) + sys.path.insert(0, egg) + + # Remove previously-imported pkg_resources if present (see + # https://bitbucket.org/pypa/setuptools/pull-request/7/ for details). + if 'pkg_resources' in sys.modules: + del sys.modules['pkg_resources'] + + import setuptools + setuptools.bootstrap_install_from = egg + + +def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, + to_dir=os.curdir, download_delay=15): + to_dir = os.path.abspath(to_dir) + rep_modules = 'pkg_resources', 'setuptools' + imported = set(sys.modules).intersection(rep_modules) + try: + import pkg_resources + except ImportError: + return _do_download(version, download_base, to_dir, download_delay) + try: + pkg_resources.require("setuptools>=" + version) + return + except pkg_resources.DistributionNotFound: + return _do_download(version, download_base, to_dir, download_delay) + except pkg_resources.VersionConflict as VC_err: + if imported: + msg = textwrap.dedent(""" + The required version of setuptools (>={version}) is not available, + and can't be installed while this script is running. Please + install a more recent version first, using + 'easy_install -U setuptools'. + + (Currently using {VC_err.args[0]!r}) + """).format(VC_err=VC_err, version=version) + sys.stderr.write(msg) + sys.exit(2) + + # otherwise, reload ok + del pkg_resources, sys.modules['pkg_resources'] + return _do_download(version, download_base, to_dir, download_delay) + +def _clean_check(cmd, target): + """ + Run the command to download target. If the command fails, clean up before + re-raising the error. + """ + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + if os.access(target, os.F_OK): + os.unlink(target) + raise + +def download_file_powershell(url, target): + """ + Download the file at url to target using Powershell (which will validate + trust). Raise an exception if the command cannot complete. + """ + target = os.path.abspath(target) + cmd = [ + 'powershell', + '-Command', + "(new-object System.Net.WebClient).DownloadFile(%(url)r, %(target)r)" % vars(), + ] + _clean_check(cmd, target) + +def has_powershell(): + if platform.system() != 'Windows': + return False + cmd = ['powershell', '-Command', 'echo test'] + devnull = open(os.path.devnull, 'wb') + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except: + return False + finally: + devnull.close() + return True + +download_file_powershell.viable = has_powershell + +def download_file_curl(url, target): + cmd = ['curl', url, '--silent', '--output', target] + _clean_check(cmd, target) + +def has_curl(): + cmd = ['curl', '--version'] + devnull = open(os.path.devnull, 'wb') + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except: + return False + finally: + devnull.close() + return True + +download_file_curl.viable = has_curl + +def download_file_wget(url, target): + cmd = ['wget', url, '--quiet', '--output-document', target] + _clean_check(cmd, target) + +def has_wget(): + cmd = ['wget', '--version'] + devnull = open(os.path.devnull, 'wb') + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except: + return False + finally: + devnull.close() + return True + +download_file_wget.viable = has_wget + +def download_file_insecure(url, target): + """ + Use Python to download the file, even though it cannot authenticate the + connection. + """ + try: + from urllib.request import urlopen + except ImportError: + from urllib2 import urlopen + src = dst = None + try: + src = urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = src.read() + dst = open(target, "wb") + dst.write(data) + finally: + if src: + src.close() + if dst: + dst.close() + +download_file_insecure.viable = lambda: True + +def get_best_downloader(): + downloaders = [ + download_file_powershell, + download_file_curl, + download_file_wget, + download_file_insecure, + ] + + for dl in downloaders: + if dl.viable(): + return dl + +def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, + to_dir=os.curdir, delay=15, + downloader_factory=get_best_downloader): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download + attempt. + + ``downloader_factory`` should be a function taking no arguments and + returning a function for downloading a URL to a target. + """ + # making sure we use the absolute path + to_dir = os.path.abspath(to_dir) + tgz_name = "setuptools-%s.tar.gz" % version + url = download_base + tgz_name + saveto = os.path.join(to_dir, tgz_name) + if not os.path.exists(saveto): # Avoid repeated downloads + log.warn("Downloading %s", url) + downloader = downloader_factory() + downloader(url, saveto) + return os.path.realpath(saveto) + + +def _extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + import copy + import operator + from tarfile import ExtractError + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directories with a safe mode. + directories.append(tarinfo) + tarinfo = copy.copy(tarinfo) + tarinfo.mode = 448 # decimal for oct 0700 + self.extract(tarinfo, path) + + # Reverse sort directories. + directories.sort(key=operator.attrgetter('name'), reverse=True) + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + dirpath = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, dirpath) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError as e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + +def _build_install_args(options): + """ + Build the arguments to 'python setup.py install' on the setuptools package + """ + return ['--user'] if options.user_install else [] + +def _parse_args(): + """ + Parse the command line for options + """ + parser = optparse.OptionParser() + parser.add_option( + '--user', dest='user_install', action='store_true', default=False, + help='install in user site package (requires Python 2.6 or later)') + parser.add_option( + '--download-base', dest='download_base', metavar="URL", + default=DEFAULT_URL, + help='alternative URL from where to download the setuptools package') + parser.add_option( + '--insecure', dest='downloader_factory', action='store_const', + const=lambda: download_file_insecure, default=get_best_downloader, + help='Use internal, non-validating downloader' + ) + options, args = parser.parse_args() + # positional arguments are ignored + return options + +def main(version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + options = _parse_args() + tarball = download_setuptools(download_base=options.download_base, + downloader_factory=options.downloader_factory) + return _install(tarball, _build_install_args(options)) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/hide_my_python.py b/hide_my_python.py index 2bdbf15..4ab0540 100755 --- a/hide_my_python.py +++ b/hide_my_python.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # # HideMyPython! - A parser for the free proxy list on HideMyAss! # @@ -21,10 +21,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from __future__ import print_function + import sys -import arguments -import parser -import database +import hmp.arguments as arguments +import hmp.parser as parser +import hmp.database as database def main(): # We create an argument parser diff --git a/hmp/__init__.py b/hmp/__init__.py new file mode 100644 index 0000000..3d84fc6 --- /dev/null +++ b/hmp/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +try: + __import__('pkg_resources').declare_namespace(__name__) +except ImportError: + from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/arguments.py b/hmp/arguments.py similarity index 97% rename from arguments.py rename to hmp/arguments.py index 7b247e2..e80e0a8 100644 --- a/arguments.py +++ b/hmp/arguments.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # # HideMyPython! - A parser for the free proxy list on HideMyAss! # @@ -23,10 +23,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from __future__ import print_function + import os import sys import argparse +from pkg_resources import resource_filename +countries_all = resource_filename(__name__, 'countries_all') + def create_argument_parser(): arg_parser = argparse.ArgumentParser( prog='hide_my_python', @@ -46,7 +51,7 @@ def create_argument_parser(): # The user can specify a list of countries arg_parser.add_argument('-ct', - default='{0}/countries_all'.format(os.path.dirname(sys.argv[0])), + default=countries_all, dest='countries_file', type=argparse.FileType('r'), help='file containing the countries where the ' 'proxies can be based (default: %(default)s)') diff --git a/connect.py b/hmp/connect.py similarity index 99% rename from connect.py rename to hmp/connect.py index 2852b44..edb8fc9 100644 --- a/connect.py +++ b/hmp/connect.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # # HideMyPython! - A parser for the free proxy list on HideMyAss! # diff --git a/countries_all b/hmp/countries_all similarity index 100% rename from countries_all rename to hmp/countries_all diff --git a/database.py b/hmp/database.py similarity index 98% rename from database.py rename to hmp/database.py index 24e0961..80e6238 100644 --- a/database.py +++ b/hmp/database.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # # HideMyPython! - A parser for the free proxy list on HideMyAss! # diff --git a/parser.py b/hmp/parser.py similarity index 98% rename from parser.py rename to hmp/parser.py index d88ca6b..d5f2601 100644 --- a/parser.py +++ b/hmp/parser.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # # HideMyPython! - A parser for the free proxy list on HideMyAss! # @@ -22,6 +22,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from __future__ import print_function + import re import regex import connect diff --git a/regex.py b/hmp/regex.py similarity index 100% rename from regex.py rename to hmp/regex.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8dbafd7 --- /dev/null +++ b/setup.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys, os +from glob import glob + +# Install setuptools if it isn't available: +try: + import setuptools +except ImportError: + from ez_setup import use_setuptools + use_setuptools() + +from distutils.command.install import INSTALL_SCHEMES +from distutils.command.install_headers import install_headers +from setuptools import find_packages +from setuptools import setup + +NAME = 'hide_my_python' +VERSION = '0.1' +AUTHOR = 'Yannick Méheut' +AUTHOR_EMAIL = 'useless@utouch.fr' +URL = 'https://github.com/the-useless-one/hide_my_python' +MAINTAINER = AUTHOR +MAINTAINER_EMAIL = AUTHOR_EMAIL +DESCRIPTION = 'A parser for the free proxy list on HideMyAss!' +LONG_DESCRIPTION = DESCRIPTION +DOWNLOAD_URL = URL +LICENSE = 'GPLv3' +CLASSIFIERS = [ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Software Development'] +NAMESPACE_PACKAGES = ['hmp'] +PACKAGES = find_packages() + +if __name__ == "__main__": + if os.path.exists('MANIFEST'): + os.remove('MANIFEST') + + # This enables the installation of hmp/__init__.py as a data + # file: + for scheme in INSTALL_SCHEMES.values(): + scheme['data'] = scheme['purelib'] + + setup( + name = NAME, + version = VERSION, + author = AUTHOR, + author_email = AUTHOR_EMAIL, + license = LICENSE, + classifiers = CLASSIFIERS, + description = DESCRIPTION, + long_description = LONG_DESCRIPTION, + url = URL, + maintainer = MAINTAINER, + maintainer_email = MAINTAINER_EMAIL, + namespace_packages = NAMESPACE_PACKAGES, + packages = PACKAGES, + package_data = {'hmp': ['countries_all']}, + scripts = ['hide_my_python.py'], + + # Force installation of __init__.py in namespace package: + data_files = [('hmp', ['hmp/__init__.py'])], + include_package_data = True, + install_requires = [ + 'requests', + ], + ) From 97bf8d85eb15a47226d5504a5f24f78d5104379e Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 18:36:53 -0500 Subject: [PATCH 2/7] Add MANIFEST.in. --- MANIFEST.in | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..aca75b2 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include *.py +include hmp/countries_all +include LICENSE README.md +exclude MANIFEST.in From a78c41821ff6e86148f72f7561b2dee2096f953a Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 18:46:37 -0500 Subject: [PATCH 3/7] Update ez_setup to 7.0. --- ez_setup.py | 216 ++++++++++++++++++++++------------------------------ 1 file changed, 92 insertions(+), 124 deletions(-) diff --git a/ez_setup.py b/ez_setup.py index 1420c11..a523401 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -17,20 +17,26 @@ import shutil import sys import tempfile -import tarfile +import zipfile import optparse import subprocess import platform import textwrap +import contextlib from distutils import log +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + try: from site import USER_SITE except ImportError: USER_SITE = None -DEFAULT_VERSION = "2.2" +DEFAULT_VERSION = "7.0" DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/" def _python_cmd(*args): @@ -40,22 +46,9 @@ def _python_cmd(*args): args = (sys.executable,) + args return subprocess.call(args) == 0 -def _install(tarball, install_args=()): - # extracting the tarball - tmpdir = tempfile.mkdtemp() - log.warn('Extracting in %s', tmpdir) - old_wd = os.getcwd() - try: - os.chdir(tmpdir) - tar = tarfile.open(tarball) - _extractall(tar) - tar.close() - - # going in the directory - subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) - os.chdir(subdir) - log.warn('Now working in %s', subdir) +def _install(archive_filename, install_args=()): + with archive_context(archive_filename): # installing log.warn('Installing Setuptools') if not _python_cmd('setup.py', 'install', *install_args): @@ -63,47 +56,68 @@ def _install(tarball, install_args=()): log.warn('See the error message above.') # exitcode will be 2 return 2 - finally: - os.chdir(old_wd) - shutil.rmtree(tmpdir) -def _build_egg(egg, tarball, to_dir): - # extracting the tarball +def _build_egg(egg, archive_filename, to_dir): + with archive_context(archive_filename): + # building an egg + log.warn('Building a Setuptools egg in %s', to_dir) + _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) + # returning the result + log.warn(egg) + if not os.path.exists(egg): + raise IOError('Could not build the egg.') + + +class ContextualZipFile(zipfile.ZipFile): + """ + Supplement ZipFile class to support context manager for Python 2.6 + """ + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + + def __new__(cls, *args, **kwargs): + """ + Construct a ZipFile or ContextualZipFile as appropriate + """ + if hasattr(zipfile.ZipFile, '__exit__'): + return zipfile.ZipFile(*args, **kwargs) + return super(ContextualZipFile, cls).__new__(cls) + + +@contextlib.contextmanager +def archive_context(filename): + # extracting the archive tmpdir = tempfile.mkdtemp() log.warn('Extracting in %s', tmpdir) old_wd = os.getcwd() try: os.chdir(tmpdir) - tar = tarfile.open(tarball) - _extractall(tar) - tar.close() + with ContextualZipFile(filename) as archive: + archive.extractall() # going in the directory subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) os.chdir(subdir) log.warn('Now working in %s', subdir) - - # building an egg - log.warn('Building a Setuptools egg in %s', to_dir) - _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) + yield finally: os.chdir(old_wd) shutil.rmtree(tmpdir) - # returning the result - log.warn(egg) - if not os.path.exists(egg): - raise IOError('Could not build the egg.') def _do_download(version, download_base, to_dir, download_delay): egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg' % (version, sys.version_info[0], sys.version_info[1])) if not os.path.exists(egg): - tarball = download_setuptools(version, download_base, + archive = download_setuptools(version, download_base, to_dir, download_delay) - _build_egg(egg, tarball, to_dir) + _build_egg(egg, archive, to_dir) sys.path.insert(0, egg) # Remove previously-imported pkg_resources if present (see @@ -116,7 +130,7 @@ def _do_download(version, download_base, to_dir, download_delay): def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, - to_dir=os.curdir, download_delay=15): + to_dir=os.curdir, download_delay=15): to_dir = os.path.abspath(to_dir) rep_modules = 'pkg_resources', 'setuptools' imported = set(sys.modules).intersection(rep_modules) @@ -164,10 +178,16 @@ def download_file_powershell(url, target): trust). Raise an exception if the command cannot complete. """ target = os.path.abspath(target) + ps_cmd = ( + "[System.Net.WebRequest]::DefaultWebProxy.Credentials = " + "[System.Net.CredentialCache]::DefaultCredentials; " + "(new-object System.Net.WebClient).DownloadFile(%(url)r, %(target)r)" + % vars() + ) cmd = [ 'powershell', '-Command', - "(new-object System.Net.WebClient).DownloadFile(%(url)r, %(target)r)" % vars(), + ps_cmd, ] _clean_check(cmd, target) @@ -175,14 +195,11 @@ def has_powershell(): if platform.system() != 'Windows': return False cmd = ['powershell', '-Command', 'echo test'] - devnull = open(os.path.devnull, 'wb') - try: + with open(os.path.devnull, 'wb') as devnull: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except: + except Exception: return False - finally: - devnull.close() return True download_file_powershell.viable = has_powershell @@ -193,14 +210,11 @@ def download_file_curl(url, target): def has_curl(): cmd = ['curl', '--version'] - devnull = open(os.path.devnull, 'wb') - try: + with open(os.path.devnull, 'wb') as devnull: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except: + except Exception: return False - finally: - devnull.close() return True download_file_curl.viable = has_curl @@ -211,14 +225,11 @@ def download_file_wget(url, target): def has_wget(): cmd = ['wget', '--version'] - devnull = open(os.path.devnull, 'wb') - try: + with open(os.path.devnull, 'wb') as devnull: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except: + except Exception: return False - finally: - devnull.close() return True download_file_wget.viable = has_wget @@ -228,45 +239,36 @@ def download_file_insecure(url, target): Use Python to download the file, even though it cannot authenticate the connection. """ + src = urlopen(url) try: - from urllib.request import urlopen - except ImportError: - from urllib2 import urlopen - src = dst = None - try: - src = urlopen(url) - # Read/write all in one block, so we don't create a corrupt file - # if the download is interrupted. + # Read all the data in one block. data = src.read() - dst = open(target, "wb") - dst.write(data) finally: - if src: - src.close() - if dst: - dst.close() + src.close() + + # Write all the data in one block to avoid creating a partial file. + with open(target, "wb") as dst: + dst.write(data) download_file_insecure.viable = lambda: True def get_best_downloader(): - downloaders = [ + downloaders = ( download_file_powershell, download_file_curl, download_file_wget, download_file_insecure, - ] - - for dl in downloaders: - if dl.viable(): - return dl + ) + viable_downloaders = (dl for dl in downloaders if dl.viable()) + return next(viable_downloaders, None) def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, - to_dir=os.curdir, delay=15, - downloader_factory=get_best_downloader): - """Download setuptools from a specified location and return its filename + to_dir=os.curdir, delay=15, downloader_factory=get_best_downloader): + """ + Download setuptools from a specified location and return its filename `version` should be a valid setuptools version number that is available - as an egg for download under the `download_base` URL (which should end + as an sdist for download under the `download_base` URL (which should end with a '/'). `to_dir` is the directory where the egg will be downloaded. `delay` is the number of seconds to pause before an actual download attempt. @@ -276,56 +278,15 @@ def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, """ # making sure we use the absolute path to_dir = os.path.abspath(to_dir) - tgz_name = "setuptools-%s.tar.gz" % version - url = download_base + tgz_name - saveto = os.path.join(to_dir, tgz_name) + zip_name = "setuptools-%s.zip" % version + url = download_base + zip_name + saveto = os.path.join(to_dir, zip_name) if not os.path.exists(saveto): # Avoid repeated downloads log.warn("Downloading %s", url) downloader = downloader_factory() downloader(url, saveto) return os.path.realpath(saveto) - -def _extractall(self, path=".", members=None): - """Extract all members from the archive to the current working - directory and set owner, modification time and permissions on - directories afterwards. `path' specifies a different directory - to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). - """ - import copy - import operator - from tarfile import ExtractError - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 448 # decimal for oct 0700 - self.extract(tarinfo, path) - - # Reverse sort directories. - directories.sort(key=operator.attrgetter('name'), reverse=True) - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def _build_install_args(options): """ Build the arguments to 'python setup.py install' on the setuptools package @@ -349,16 +310,23 @@ def _parse_args(): const=lambda: download_file_insecure, default=get_best_downloader, help='Use internal, non-validating downloader' ) + parser.add_option( + '--version', help="Specify which version to download", + default=DEFAULT_VERSION, + ) options, args = parser.parse_args() # positional arguments are ignored return options -def main(version=DEFAULT_VERSION): +def main(): """Install or upgrade setuptools and EasyInstall""" options = _parse_args() - tarball = download_setuptools(download_base=options.download_base, - downloader_factory=options.downloader_factory) - return _install(tarball, _build_install_args(options)) + archive = download_setuptools( + version=options.version, + download_base=options.download_base, + downloader_factory=options.downloader_factory, + ) + return _install(archive, _build_install_args(options)) if __name__ == '__main__': sys.exit(main()) From 04169feccdd1e73013365f1663470318c9c58660 Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 22:23:50 -0500 Subject: [PATCH 4/7] Add support for saving proxies in CSV file. --- README.md | 9 ++++++++- hide_my_python.py | 42 ++++++++++++++++++++++++++---------------- hmp/arguments.py | 5 +++++ hmp/database.py | 8 ++++++++ 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index fba73d6..7bfb935 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,8 @@ Don't forget to make the script executable with: To see a list of the options, just issue: ./hide_my_python.py -h - usage: hide_my_python [-h] -o DATABASE_FILE [-n NUMBER_OF_PROXIES] + usage: hide_my_python [-h] -o DATABASE_FILE [-f {csv,sqlite3}] + [-n NUMBER_OF_PROXIES] [-ct COUNTRIES_FILE] [-p PORTS [PORTS ...]] [-pr {http,https,socks} [{http,https,socks} ...]] [-a] [-ka] [-s] [-c] [-v] @@ -58,6 +59,7 @@ To see a list of the options, just issue: optional arguments: -h, --help show this help message and exit -o DATABASE_FILE database file where the proxies will be saved + -f {csv,sqlite3} format of output file (default: sqlite3) -n NUMBER_OF_PROXIES maximum number of proxies to retrieve (default: all) -ct COUNTRIES_FILE file containing the countries where the proxies can be based (default: countries_all) @@ -101,6 +103,11 @@ following structure: * `speed`: the speed level of the proxy (type: `TEXT`) * `connection_time`: the connection time of the proxy (type: `TEXT`) +### Database file format + +This option can be used to choose whether to save the proxies +in a sqlite3 file or a CSV file. + ### Number of proxies If this argument is defined, the script will only return the first `n` diff --git a/hide_my_python.py b/hide_my_python.py index 4ab0540..931ea36 100755 --- a/hide_my_python.py +++ b/hide_my_python.py @@ -29,7 +29,7 @@ import hmp.database as database def main(): - # We create an argument parser + # We create an argument parser arg_parser = arguments.create_argument_parser() # We parse the arguments @@ -40,23 +40,33 @@ def main(): if args.verbose: arguments.print_arguments(args) - # We open the database file where the proxies will be stored - connection, cursor = database.initialize_database(args.database_file) + if args.output_format == 'sqlite3': + # We open the database file where the proxies will be stored + connection, cursor = database.initialize_database(args.database_file) - try: - # We generate the proxies - for proxy in parser.generate_proxy(args): - # And we store them in the database - database.insert_in_database(cursor, proxy) - except KeyboardInterrupt: - if args.verbose: - print('') - print('[warn] received interruption signal') - - # We save the changes made to the database, and close the file - connection.commit() - connection.close() + try: + # We generate the proxies + for proxy in parser.generate_proxy(args): + # And we store them in the database + database.insert_in_database(cursor, proxy) + except KeyboardInterrupt: + if args.verbose: + print('') + print('[warn] received interruption signal') + # We save the changes made to the database, and close the file + connection.commit() + connection.close() + else: + f, writer = database.initialize_csv(args.database_file) + try: + for proxy in parser.generate_proxy(args): + database.write_to_csv(writer, proxy) + except KeyboardInterrupt: + if args.verbose: + print('') + print('[warn] received interruption signal') + f.close() return 0 if __name__ == '__main__': diff --git a/hmp/arguments.py b/hmp/arguments.py index e80e0a8..42c9fae 100644 --- a/hmp/arguments.py +++ b/hmp/arguments.py @@ -44,6 +44,11 @@ def create_argument_parser(): required=True, help='database file where the proxies will be saved') + # Output file format: + arg_parser.add_argument('-f', dest='output_format', type=str, + choices=['csv', 'sqlite3'], default='sqlite', + help='format of output file (default: sqlite3)') + # The user can specify a maximum number of proxies to retrieve arg_parser.add_argument('-n', dest='number_of_proxies', type=int, default=0, diff --git a/hmp/database.py b/hmp/database.py index 80e6238..ff7c32e 100644 --- a/hmp/database.py +++ b/hmp/database.py @@ -20,6 +20,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import csv import sys import sqlite3 @@ -56,3 +57,10 @@ def initialize_database(database_file): # We return the connection to the database return connection, cursor +def write_to_csv(writer, proxy): + writer.writerow(proxy) + +def initialize_csv(csv_file): + # Create CSV writer: + f = open(csv_file, 'w') + return f, csv.writer(f) From 10b79f682676b8be896090ab8c432c08442af606 Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 22:37:00 -0500 Subject: [PATCH 5/7] Add support for writing CSV output to screen. --- README.md | 5 +++-- hmp/arguments.py | 10 ++++++---- hmp/database.py | 5 ++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7bfb935..6e5aeaf 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,8 @@ To see a list of the options, just issue: ### Database file -The proxies will be saved in this file. If the file doesn't exist, it will be +The proxies will be saved in this file. If no file is specified, the proxies are +written to the screen in CSV format. If the file doesn't exist, it will be created. If it exists, the proxies will be appended to it (the file won't be overwritten). The database contains only one table, named `proxies`, with the following structure: @@ -106,7 +107,7 @@ following structure: ### Database file format This option can be used to choose whether to save the proxies -in a sqlite3 file or a CSV file. +in a sqlite3 file or a CSV file. Only CSV data may be written to the screen. ### Number of proxies diff --git a/hmp/arguments.py b/hmp/arguments.py index 42c9fae..409ee8e 100644 --- a/hmp/arguments.py +++ b/hmp/arguments.py @@ -41,13 +41,12 @@ def create_argument_parser(): # The user has to specify an output file arg_parser.add_argument('-o', dest='database_file', type=str, - required=True, - help='database file where the proxies will be saved') + help='database file where the proxies will be saved (default: stdout)') # Output file format: arg_parser.add_argument('-f', dest='output_format', type=str, - choices=['csv', 'sqlite3'], default='sqlite', - help='format of output file (default: sqlite3)') + choices=['csv', 'sqlite3'], default='csv', + help='format of output file (default: csv)') # The user can specify a maximum number of proxies to retrieve arg_parser.add_argument('-n', dest='number_of_proxies', type=int, @@ -113,6 +112,9 @@ def process_arguments(args, arg_parser): error_msg = error_msg.format('-n', args.number_of_proxies) arg_parser.error(error_msg) + if not args.database_file and args.output_format == 'sqlite3': + arg_parser.error('cannot write sqlite3 file to stdout') + # We retrieve the countries from the given file args.countries_list = [] for country in args.countries_file.readlines(): diff --git a/hmp/database.py b/hmp/database.py index ff7c32e..c509547 100644 --- a/hmp/database.py +++ b/hmp/database.py @@ -62,5 +62,8 @@ def write_to_csv(writer, proxy): def initialize_csv(csv_file): # Create CSV writer: - f = open(csv_file, 'w') + if csv_file: + f = open(csv_file, 'w') + else: + f = sys.stdout return f, csv.writer(f) From 15c780202167d5e6958a6001f4f36fcf39f8cb1d Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 22:56:51 -0500 Subject: [PATCH 6/7] Add API. --- README.md | 7 ++++++ hmp/api.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 hmp/api.py diff --git a/README.md b/README.md index 6e5aeaf..45fddc4 100644 --- a/README.md +++ b/README.md @@ -246,11 +246,18 @@ parsing. [info] speed: ['Medium', 'High'] [info] retrieved 100/100 proxies +### API + +Other Python programs can access HideMyPython's functionality via the +`hmp.api.get_proxies()` function. Proxies are returned as a list of tuples, each +of which contains the proxy ip, port, type, country, anonymity, and speed. + ## COPYRIGHT HideMyPython! - A parser for the free proxy list on HideMyAss! Yannick Méheut [useless (at) utouch (dot) fr] - Copyright © 2013 +Lev Givon [lev (at) columbia (dot) edu] - Copyright © 2014 This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the diff --git a/hmp/api.py b/hmp/api.py new file mode 100644 index 0000000..d4e212a --- /dev/null +++ b/hmp/api.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# HideMyPython! - A parser for the free proxy list on HideMyAss! +# +# This file provides a programming interface to HideMyPython's functionality. +# +# Copyright (C) 2014 Lev Givon +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import arguments +import parser + +from pkg_resources import resource_filename +countries_all_default = resource_filename(__name__, 'countries_all') + +def get_proxies(number_of_proxies=None, countries_all=None, + ports=None, protocols=['http', 'https', 'socks'], + anonymity=0, keep_alive=False, speed=0, + connection_time=1, verbose=False): + arg_parser = arguments.create_argument_parser() + arg_list = [] + if number_of_proxies is not None: + if not (isinstance(number_of_proxies, int) and number_of_proxies >= 0): + raise ValueError('number of proxies must be an integer >= 0') + arg_list.extend(['-n', str(number_of_proxies)]) + if countries_all is None: + arg_list.extend(['-ct', countries_all_default]) + else: + arg_list.extend(['-ct', countries_all]) + if ports is not None: + try: + iter(ports) + except: + raise ValueError('ports must be an iterable') + arg_list.extend(['-p']+list(map(str, ports))) + try: + iter(protocols) + except: + raise ValueError('protocols must be an iterable') + if not set(protocols).intersection(set(['http', 'https', 'socks'])): + raise ValueError('invalid protocol') + arg_list.extend(['-pr']+list(protocols)) + if anonymity >= 1: + arg_list.append('-'+'a'*anonymity) + if keep_alive: + arg_list.append('-ka') + if speed >= 1: + arg_list.append('-'+'s'*speed) + if verbose: + arg_list.append('-v') + args = arg_parser.parse_args(arg_list) + arguments.process_arguments(args, arg_parser) + return [proxy for proxy in parser.generate_proxy(args)] From 16d5e2370fad16f86f16827c0ba24f70fa553841 Mon Sep 17 00:00:00 2001 From: Lev Givon Date: Sat, 29 Nov 2014 22:58:09 -0500 Subject: [PATCH 7/7] Tweak. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 45fddc4..6d227d0 100644 --- a/README.md +++ b/README.md @@ -257,6 +257,7 @@ of which contains the proxy ip, port, type, country, anonymity, and speed. HideMyPython! - A parser for the free proxy list on HideMyAss! Yannick Méheut [useless (at) utouch (dot) fr] - Copyright © 2013 + Lev Givon [lev (at) columbia (dot) edu] - Copyright © 2014 This program is free software: you can redistribute it and/or modify it