1616
1717import requests
1818
19+ from shutil import copyfileobj
20+
1921
2022FORMAT_CSV = 'csv'
2123FORMAT_XLSX_MULTIPLE = 'xlsx-multiple'
3739 '.xlsx' : FORMAT_XLSX ,
3840 '.xml' : FORMAT_XML ,
3941}
40-
42+ _STRING_FORMATS = { FORMAT_CSV , FORMAT_XML }
4143
4244class Client (object ):
4345 def __init__ (self , api_key , api_url = _API_URL , timeout = _DEFAULT_TIMEOUT ):
4446 self .api_key = api_key
4547 self .api_url = api_url
4648 self .timeout = timeout
4749
48- def xlsx (self , pdf_path , xlsx_path ):
50+ def xlsx (self , pdf_path , xlsx_path = None ):
4951 """
5052 Convenience method to convert PDF to XLSX multiple sheets.
53+
54+ If xlsx_path is None, returns the output as a byte string.
5155 """
5256 return self .xlsx_multiple (pdf_path , xlsx_path )
5357
54- def xlsx_single (self , pdf_path , xlsx_path ):
58+ def xlsx_single (self , pdf_path , xlsx_path = None ):
5559 """
5660 Convenience method to convert PDF to XLSX single sheet.
61+
62+ If xlsx_path is None, returns the output as a byte string.
5763 """
5864 return self .convert (pdf_path , xlsx_path , out_format = FORMAT_XLSX_SINGLE )
5965
60- def xlsx_multiple (self , pdf_path , xlsx_path ):
66+ def xlsx_multiple (self , pdf_path , xlsx_path = None ):
6167 """
6268 Convenience method to convert PDF to XLSX multiple sheets.
69+
70+ If xlsx_path is None, returns the output as a byte string.
6371 """
6472 return self .convert (pdf_path , xlsx_path , out_format = FORMAT_XLSX_MULTIPLE )
6573
66- def xml (self , pdf_path , xml_path ):
74+ def xml (self , pdf_path , xml_path = None ):
6775 """
6876 Convenience method to convert PDF to XML.
77+
78+ If xml_path is None, returns the output as a string.
6979 """
7080 return self .convert (pdf_path , xml_path , out_format = FORMAT_XML )
7181
72- def csv (self , pdf_path , csv_path ):
82+ def csv (self , pdf_path , csv_path = None ):
7383 """
7484 Convenience method to convert PDF to CSV.
85+
86+ If csv_path is None, returns the output as a string.
7587 """
7688 return self .convert (pdf_path , csv_path , out_format = FORMAT_CSV )
7789
78- def convert (self , pdf_path , out_path , out_format = None , query_params = None , ** requests_params ):
90+ def convert (self , pdf_path , out_path = None , out_format = None , query_params = None , ** requests_params ):
7991 """
8092 Convert PDF given by `pdf_path` into `format` at `out_path`.
93+
94+ If `out_path` is None, returns a string containing the contents, or a
95+ bytes for binary output types (e.g, XLSX)
8196 """
8297 (out_path , out_format ) = Client .ensure_format_ext (out_path , out_format )
8398 with open (pdf_path , 'rb' ) as pdf_fo :
84- data = self .dump (pdf_fo , out_format , query_params , ** requests_params )
99+ response = self .request (pdf_fo , out_format , query_params ,
100+ ** requests_params )
101+
102+ if out_path is None :
103+ use_text = out_format in _STRING_FORMATS
104+ return response .text if use_text else response .content
105+
85106 with open (out_path , 'wb' ) as out_fo :
86- for chunk in data :
87- if chunk :
88- out_fo .write (chunk )
107+ converted_fo = response .raw
108+ # Ensure that gzip content is decoded.
109+ converted_fo .decode_content = True
110+ copyfileobj (converted_fo , out_fo )
111+
112+ def dump (self , pdf_fo , out_format = None , query_params = None ,
113+ ** requests_params ):
114+ """
115+ Convert PDF file object given by `pdf_fo` into an output stream iterator.
116+ """
117+ response = self .request (pdf_fo , out_format , query_params ,
118+ ** requests_params )
89119
90- def dump (self , pdf_fo , out_format = None , query_params = None , ** requests_params ):
120+ return response .iter_content (chunk_size = 4096 )
121+
122+ def request (self , pdf_fo , out_format = None , query_params = None ,
123+ ** requests_params ):
91124 """
92- Convert PDF given by `pdf_path` into an output stream iterator .
125+ Convert PDF given by `pdf_path`, returning requests.Response object .
93126 """
94127 if self .api_key == "" :
95128 raise APIException ("Invalid API key" )
@@ -119,7 +152,7 @@ def dump(self, pdf_fo, out_format=None, query_params=None, **requests_params):
119152 raise APIException ("Unknown format requested" )
120153 response .raise_for_status ()
121154
122- return response . iter_content ( chunk_size = 4096 )
155+ return response
123156
124157 def remaining (self , query_params = None , ** requests_params ):
125158 """
@@ -140,7 +173,6 @@ def remaining(self, query_params=None, **requests_params):
140173
141174 return int (response .content )
142175
143-
144176 @staticmethod
145177 def ensure_format_ext (out_path , out_format ):
146178 """
0 commit comments