# HG changeset patch
# User Goffi <goffi@goffi.org>
# Date 1618591489 -7200
# Node ID cffa3ae4d0aa3981b626a75ff313eb14ff5c33d2
# Parent  3dff555fe6915abadf72a3b87ba73b1ccdfa9df2
pages (blog/view): move URL friendly code to backend tools:

- the code to render an URL friendly is now in `sat.tools.common.regex`
- user friendly extra text is now only displayed when no `-` is found in ID. This is a
  temporary transition behaviour because new blog items IDs are now user friendly by
  default, and thus extra text is not wanted anymore.
  For older IDs it is still needed though, and the presence of `-` is used to guess when
  an ID is user friendly or not.

diff -r 3dff555fe691 -r cffa3ae4d0aa libervia/pages/blog/view/page_meta.py
--- a/libervia/pages/blog/view/page_meta.py	Fri Apr 16 18:40:17 2021 +0200
+++ b/libervia/pages/blog/view/page_meta.py	Fri Apr 16 18:44:49 2021 +0200
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 
-import unicodedata
-import re
 import html
 from libervia.server.constants import Const as C
 from twisted.words.protocols.jabber import jid
@@ -9,9 +7,10 @@
 from sat.tools.common.template import safe
 from sat.tools.common import uri
 from sat.tools.common import data_format
+from sat.tools.common import regex
+from sat.core.log import getLogger
 from libervia.server import utils
 from libervia.server.utils import SubPage
-from sat.core.log import getLogger
 
 log = getLogger(__name__)
 
@@ -20,9 +19,6 @@
 template = "blog/articles.html"
 uri_handlers = {('pubsub', 'microblog'): 'microblog_uri'}
 
-RE_TEXT_URL = re.compile(r'[^a-zA-Z,_]+')
-TEXT_MAX_LEN = 60
-TEXT_WORD_MIN_LENGHT = 4
 URL_LIMIT_MARK = 90  # if canonical URL is longer than that, text will not be appended
 
 
@@ -247,23 +243,20 @@
         blog_canonical_url = '/'.join([blog_base_url_item, utils.quote(item['id'])])
         if len(blog_canonical_url) > URL_LIMIT_MARK:
             blog_url = blog_canonical_url
-        else:
+        elif '-' not in item['id']:
             # we add text from title or body at the end of URL
             # to make it more human readable
-            text = item.get('title', item['content'])
-            # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567
-            text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
-            text = RE_TEXT_URL.sub(' ', text).lower()
-            text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
-            while len(text) > TEXT_MAX_LEN:
-                if '-' in text:
-                    text = text.rsplit('-', 1)[0]
-                else:
-                    text = text[:TEXT_MAX_LEN]
+            # we do it only if there is no "-", as a "-" probably means that
+            # item's id is already user friendly.
+            # TODO: to be removed,  this is only kept for a transition period until
+            #   user friendly item IDs are more common.
+            text = regex.urlFriendlyText(item.get('title', item['content']))
             if text:
                 blog_url = blog_canonical_url + '/' + text
             else:
                 blog_url = blog_canonical_url
+        else:
+            blog_url = blog_canonical_url
 
         items_http_uri[item['id']] = self.host.getExtBaseURL(request, blog_url)
         for tag in item['tags']: