Skip to content

Commit 70e9413

Browse files
committed
try to fix invalid char
1 parent 6a5fa1c commit 70e9413

File tree

1 file changed

+4
-10
lines changed

1 file changed

+4
-10
lines changed

main.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import simplejson as json
88
import logging
99
import shlex
10+
import codecs
1011
from datetime import datetime
1112
from lxml.etree import iterparse
1213
from functools import reduce
@@ -49,7 +50,6 @@ def cleanup(*args):
4950

5051
DUMP_CMD = 'mysqldump -h {host} -P {port} -u {user} --password={password} {db} {table} ' \
5152
'--default-character-set=utf8 -X'.format(**config['mysql'])
52-
# REMOVE_INVALID_CHAR_CMD = 'iconv -f utf-8 -t utf-8 -c'
5353

5454
BINLOG_CFG = {key: config['mysql'][key] for key in ['host', 'port', 'user', 'password', 'db']}
5555
BULK_SIZE = config.get('elastic').get('bulk_size')
@@ -305,15 +305,9 @@ def xml_dump_loader():
305305
stdout=subprocess.PIPE,
306306
stderr=subprocess.DEVNULL,
307307
close_fds=True)
308-
309-
# removed_invalid_char = subprocess.Popen(
310-
# shlex.split(REMOVE_INVALID_CHAR_CMD),
311-
# stdin=mysqldump.stdout,
312-
# stdout=subprocess.PIPE,
313-
# stderr=subprocess.DEVNULL,
314-
# close_fds=True)
315-
# return removed_invalid_char.stdout # can be used as file object. (stream io)
316-
return mysqldump.stdout
308+
stream = codecs.EncodedFile(mysqldump.stdout, data_encoding='utf-8',
309+
file_encoding='utf-8', errors='xmlcharrefreplace')
310+
return stream
317311

318312

319313
def xml_file_loader(filename):

0 commit comments

Comments
 (0)