libervia-web: src/pages/common/blog/page

pages (common/blog): tranform special characters to their ascii equivalent

comparison

equal deleted inserted replaced

-:6c98c0baa038
+:d3ac6fb10fd5
 from libervia.server import session_iface
 from sat.core.i18n import _
 from sat.core.log import getLogger
 from sat.tools.common.template import safe
 from libervia.server import utils
+import unicodedata
 import re
 import cgi
 log = getLogger('pages/common/blog')
 """generic blog (with service/node provided)"""
 name = u'blog'
 template = u"blog/articles.html"
 uri_handlers = {(u'pubsub', u'microblog'): 'microblog_uri'}
-RE_TEXT_URL = re.compile(ur'[^a-zA-Zéèêôà,_]+')
+RE_TEXT_URL = re.compile(ur'[^a-zA-Z,_]+')
 TEXT_MAX_LEN = 60
 TEXT_WORD_MIN_LENGHT = 4
 URL_LIMIT_MARK = 90  # if canonical URL is longer than that, text will not be appended
 def microblog_uri(self, uri_data):
 blog_url = blog_canonical_url
 else:
 # we add text from title or body at the end of URL
 # to make it more readable
 text = item.title or item.content
+# we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567
+text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore')
 text = RE_TEXT_URL.sub(u' ', text).lower()
 text = u'-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
 while len(text) > TEXT_MAX_LEN:
 if u'-' in text:
 text = text.rsplit(u'-', 1)[0]

Mercurial > libervia-web