Skip to content

Commit

Permalink
Handle unicode differently when running on Python 2
Browse files Browse the repository at this point in the history
  • Loading branch information
bgr committed Nov 30, 2017
1 parent b48f5db commit 98ead88
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions omdbtool.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@
import json

try:
# Python 3
from urllib.request import urlopen
from urllib.parse import urlencode
to_unicode = lambda s: s
mk_trans = str.maketrans
except ImportError:
from urllib import urlopen, urlencode
# Python 2
from urllib2 import urlopen
from urllib import urlencode
to_unicode = lambda s: unicode(s) # noqa
mk_trans = lambda a, b: {ord(ca): ord(cb) for ca, cb in zip(a, b)}


parser = argparse.ArgumentParser(description='Get OMDb data for a movie')
Expand Down Expand Up @@ -135,17 +142,17 @@


# known problematic characters to replace
char_map = str.maketrans(
'–',
'-'
char_map = mk_trans(
u'–',
u'-'
)


def fmt(s):
# get rid of weird characters in output, which also cause errors on Windows
# first use the preferred character mapping for known characters, then fall
# back to encode + decode for unexpected ones
return (s
return (to_unicode(s)
.translate(char_map)
.encode('ascii', errors='replace')
.decode('utf-8'))
Expand Down

0 comments on commit 98ead88

Please sign in to comment.