User:Abacn/detectinvisible.py
pua_ranges = ( (0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD) )
def is_pua_codepoint(c):
return any(a <= c <= b for (a,b) in pua_ranges)
fin = open("in.txt", 'r')
fout = open("out.txt", 'w')
lines = fin.read()
for i in lines.decode('utf-8'):
try:
if is_pua_codepoint(ord(i)):
fout.write('<!--Private Area Character-->')
print "HeiHei"
else:
fout.write(i.encode('utf-8'))
except UnicodeEncodeError:
fout.write('<!--Invisible Character-->')
print "Haha"