|
7 | 7 | import simplejson as json |
8 | 8 | import logging |
9 | 9 | import shlex |
| 10 | +import codecs |
10 | 11 | from datetime import datetime |
11 | 12 | from lxml.etree import iterparse |
12 | 13 | from functools import reduce |
@@ -49,7 +50,6 @@ def cleanup(*args): |
49 | 50 |
|
50 | 51 | DUMP_CMD = 'mysqldump -h {host} -P {port} -u {user} --password={password} {db} {table} ' \ |
51 | 52 | '--default-character-set=utf8 -X'.format(**config['mysql']) |
52 | | -# REMOVE_INVALID_CHAR_CMD = 'iconv -f utf-8 -t utf-8 -c' |
53 | 53 |
|
54 | 54 | BINLOG_CFG = {key: config['mysql'][key] for key in ['host', 'port', 'user', 'password', 'db']} |
55 | 55 | BULK_SIZE = config.get('elastic').get('bulk_size') |
@@ -305,15 +305,9 @@ def xml_dump_loader(): |
305 | 305 | stdout=subprocess.PIPE, |
306 | 306 | stderr=subprocess.DEVNULL, |
307 | 307 | close_fds=True) |
308 | | - |
309 | | - # removed_invalid_char = subprocess.Popen( |
310 | | - # shlex.split(REMOVE_INVALID_CHAR_CMD), |
311 | | - # stdin=mysqldump.stdout, |
312 | | - # stdout=subprocess.PIPE, |
313 | | - # stderr=subprocess.DEVNULL, |
314 | | - # close_fds=True) |
315 | | - # return removed_invalid_char.stdout # can be used as file object. (stream io) |
316 | | - return mysqldump.stdout |
| 308 | + stream = codecs.EncodedFile(mysqldump.stdout, data_encoding='utf-8', |
| 309 | + file_encoding='utf-8', errors='xmlcharrefreplace') |
| 310 | + return stream |
317 | 311 |
|
318 | 312 |
|
319 | 313 | def xml_file_loader(filename): |
|
0 commit comments