changeset 1051:d3ac6fb10fd5

pages (common/blog): tranform special characters to their ascii equivalent
author Goffi <goffi@goffi.org>
date Thu, 25 Jan 2018 08:34:27 +0100
parents 6c98c0baa038
children cdf0ebed9db7
files src/pages/common/blog/page_meta.py
diffstat 1 files changed, 4 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/pages/common/blog/page_meta.py	Thu Jan 25 08:17:29 2018 +0100
+++ b/src/pages/common/blog/page_meta.py	Thu Jan 25 08:34:27 2018 +0100
@@ -9,6 +9,7 @@
 from sat.core.log import getLogger
 from sat.tools.common.template import safe
 from libervia.server import utils
+import unicodedata
 import re
 import cgi
 log = getLogger('pages/common/blog')
@@ -18,7 +19,7 @@
 template = u"blog/articles.html"
 uri_handlers = {(u'pubsub', u'microblog'): 'microblog_uri'}
 
-RE_TEXT_URL = re.compile(ur'[^a-zA-Zéèêôà,_]+')
+RE_TEXT_URL = re.compile(ur'[^a-zA-Z,_]+')
 TEXT_MAX_LEN = 60
 TEXT_WORD_MIN_LENGHT = 4
 URL_LIMIT_MARK = 90  # if canonical URL is longer than that, text will not be appended
@@ -221,6 +222,8 @@
             # we add text from title or body at the end of URL
             # to make it more readable
             text = item.title or item.content
+            # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567
+            text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore')
             text = RE_TEXT_URL.sub(u' ', text).lower()
             text = u'-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
             while len(text) > TEXT_MAX_LEN: