# HG changeset patch # User Goffi # Date 1618591489 -7200 # Node ID cffa3ae4d0aa3981b626a75ff313eb14ff5c33d2 # Parent 3dff555fe6915abadf72a3b87ba73b1ccdfa9df2 pages (blog/view): move URL friendly code to backend tools: - the code to render an URL friendly is now in `sat.tools.common.regex` - user friendly extra text is now only displayed when no `-` is found in ID. This is a temporary transition behaviour because new blog items IDs are now user friendly by default, and thus extra text is not wanted anymore. For older IDs it is still needed though, and the presence of `-` is used to guess when an ID is user friendly or not. diff -r 3dff555fe691 -r cffa3ae4d0aa libervia/pages/blog/view/page_meta.py --- a/libervia/pages/blog/view/page_meta.py Fri Apr 16 18:40:17 2021 +0200 +++ b/libervia/pages/blog/view/page_meta.py Fri Apr 16 18:44:49 2021 +0200 @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -import unicodedata -import re import html from libervia.server.constants import Const as C from twisted.words.protocols.jabber import jid @@ -9,9 +7,10 @@ from sat.tools.common.template import safe from sat.tools.common import uri from sat.tools.common import data_format +from sat.tools.common import regex +from sat.core.log import getLogger from libervia.server import utils from libervia.server.utils import SubPage -from sat.core.log import getLogger log = getLogger(__name__) @@ -20,9 +19,6 @@ template = "blog/articles.html" uri_handlers = {('pubsub', 'microblog'): 'microblog_uri'} -RE_TEXT_URL = re.compile(r'[^a-zA-Z,_]+') -TEXT_MAX_LEN = 60 -TEXT_WORD_MIN_LENGHT = 4 URL_LIMIT_MARK = 90 # if canonical URL is longer than that, text will not be appended @@ -247,23 +243,20 @@ blog_canonical_url = '/'.join([blog_base_url_item, utils.quote(item['id'])]) if len(blog_canonical_url) > URL_LIMIT_MARK: blog_url = blog_canonical_url - else: + elif '-' not in item['id']: # we add text from title or body at the end of URL # to make it more human readable - text = item.get('title', item['content']) - # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567 - text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8') - text = RE_TEXT_URL.sub(' ', text).lower() - text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) - while len(text) > TEXT_MAX_LEN: - if '-' in text: - text = text.rsplit('-', 1)[0] - else: - text = text[:TEXT_MAX_LEN] + # we do it only if there is no "-", as a "-" probably means that + # item's id is already user friendly. + # TODO: to be removed, this is only kept for a transition period until + # user friendly item IDs are more common. + text = regex.urlFriendlyText(item.get('title', item['content'])) if text: blog_url = blog_canonical_url + '/' + text else: blog_url = blog_canonical_url + else: + blog_url = blog_canonical_url items_http_uri[item['id']] = self.host.getExtBaseURL(request, blog_url) for tag in item['tags']: