Mercurial > libervia-web
changeset 1051:d3ac6fb10fd5
pages (common/blog): tranform special characters to their ascii equivalent
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 25 Jan 2018 08:34:27 +0100 |
parents | 6c98c0baa038 |
children | cdf0ebed9db7 |
files | src/pages/common/blog/page_meta.py |
diffstat | 1 files changed, 4 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/src/pages/common/blog/page_meta.py Thu Jan 25 08:17:29 2018 +0100 +++ b/src/pages/common/blog/page_meta.py Thu Jan 25 08:34:27 2018 +0100 @@ -9,6 +9,7 @@ from sat.core.log import getLogger from sat.tools.common.template import safe from libervia.server import utils +import unicodedata import re import cgi log = getLogger('pages/common/blog') @@ -18,7 +19,7 @@ template = u"blog/articles.html" uri_handlers = {(u'pubsub', u'microblog'): 'microblog_uri'} -RE_TEXT_URL = re.compile(ur'[^a-zA-Zéèêôà,_]+') +RE_TEXT_URL = re.compile(ur'[^a-zA-Z,_]+') TEXT_MAX_LEN = 60 TEXT_WORD_MIN_LENGHT = 4 URL_LIMIT_MARK = 90 # if canonical URL is longer than that, text will not be appended @@ -221,6 +222,8 @@ # we add text from title or body at the end of URL # to make it more readable text = item.title or item.content + # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567 + text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore') text = RE_TEXT_URL.sub(u' ', text).lower() text = u'-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) while len(text) > TEXT_MAX_LEN: