def shave_marks_latin(txt):
"""Remove all diacritic marks from Latin base characters"""
norm_txt = unicodedata.normalize('NFD', txt)
latin_base = False
keepers = []
for c in norm_txt:
if unicodedata.combining(c) and latin_base:
continue # ignore diacritic on Latin base char
keepers.append(c)
# if it isn't combining char, it's a new base char
if not unicodedata.combining(c):
latin_base = c in string.ascii_letters
shaved = ''.join(keepers)
return unicodedata.normalize('NFC', shaved)
Function to remove combining marks from Latin characters. import statements are omitted as this is part of the sanitize.py module.
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.