Mercurial > libervia-backend
comparison sat/plugins/plugin_blog_import_dokuwiki.py @ 3028:ab2696e34d29
Python 3 port:
/!\ this is a huge commit
/!\ starting from this commit, SàT is needs Python 3.6+
/!\ SàT maybe be instable or some feature may not work anymore, this will improve with time
This patch port backend, bridge and frontends to Python 3.
Roughly this has been done this way:
- 2to3 tools has been applied (with python 3.7)
- all references to python2 have been replaced with python3 (notably shebangs)
- fixed files not handled by 2to3 (notably the shell script)
- several manual fixes
- fixed issues reported by Python 3 that where not handled in Python 2
- replaced "async" with "async_" when needed (it's a reserved word from Python 3.7)
- replaced zope's "implements" with @implementer decorator
- temporary hack to handle data pickled in database, as str or bytes may be returned,
to be checked later
- fixed hash comparison for password
- removed some code which is not needed anymore with Python 3
- deactivated some code which needs to be checked (notably certificate validation)
- tested with jp, fixed reported issues until some basic commands worked
- ported Primitivus (after porting dependencies like urwid satext)
- more manual fixes
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 13 Aug 2019 19:08:41 +0200 |
parents | 003b8b4b56a7 |
children | 9d0df638c8b4 |
comparison
equal
deleted
inserted
replaced
3027:ff5bcb12ae60 | 3028:ab2696e34d29 |
---|---|
1 #!/usr/bin/env python2 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 # SàT plugin to import dokuwiki blogs | 4 # SàT plugin to import dokuwiki blogs |
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) | 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) |
6 # Copyright (C) 2013-2016 Adrien Cossa (souliane@mailoo.org) | 6 # Copyright (C) 2013-2016 Adrien Cossa (souliane@mailoo.org) |
26 from sat.core import exceptions | 26 from sat.core import exceptions |
27 from sat.tools import xml_tools | 27 from sat.tools import xml_tools |
28 from twisted.internet import threads | 28 from twisted.internet import threads |
29 from collections import OrderedDict | 29 from collections import OrderedDict |
30 import calendar | 30 import calendar |
31 import urllib | 31 import urllib.request, urllib.parse, urllib.error |
32 import urlparse | 32 import urllib.parse |
33 import tempfile | 33 import tempfile |
34 import re | 34 import re |
35 import time | 35 import time |
36 import os.path | 36 import os.path |
37 | 37 |
38 try: | 38 try: |
39 from dokuwiki import DokuWiki, DokuWikiError # this is a new dependency | 39 from dokuwiki import DokuWiki, DokuWikiError # this is a new dependency |
40 except ImportError: | 40 except ImportError: |
41 raise exceptions.MissingModule( | 41 raise exceptions.MissingModule( |
42 u'Missing module dokuwiki, please install it with "pip install dokuwiki"' | 42 'Missing module dokuwiki, please install it with "pip install dokuwiki"' |
43 ) | 43 ) |
44 try: | 44 try: |
45 from PIL import Image # this is already needed by plugin XEP-0054 | 45 from PIL import Image # this is already needed by plugin XEP-0054 |
46 except: | 46 except: |
47 raise exceptions.MissingModule( | 47 raise exceptions.MissingModule( |
48 u"Missing module pillow, please download/install it from https://python-pillow.github.io" | 48 "Missing module pillow, please download/install it from https://python-pillow.github.io" |
49 ) | 49 ) |
50 | 50 |
51 PLUGIN_INFO = { | 51 PLUGIN_INFO = { |
52 C.PI_NAME: "Dokuwiki import", | 52 C.PI_NAME: "Dokuwiki import", |
53 C.PI_IMPORT_NAME: "IMPORT_DOKUWIKI", | 53 C.PI_IMPORT_NAME: "IMPORT_DOKUWIKI", |
56 C.PI_MAIN: "DokuwikiImport", | 56 C.PI_MAIN: "DokuwikiImport", |
57 C.PI_HANDLER: "no", | 57 C.PI_HANDLER: "no", |
58 C.PI_DESCRIPTION: _("""Blog importer for Dokuwiki blog engine."""), | 58 C.PI_DESCRIPTION: _("""Blog importer for Dokuwiki blog engine."""), |
59 } | 59 } |
60 | 60 |
61 SHORT_DESC = D_(u"import posts from Dokuwiki blog engine") | 61 SHORT_DESC = D_("import posts from Dokuwiki blog engine") |
62 | 62 |
63 LONG_DESC = D_( | 63 LONG_DESC = D_( |
64 u"""This importer handle Dokuwiki blog engine. | 64 """This importer handle Dokuwiki blog engine. |
65 | 65 |
66 To use it, you need an admin access to a running Dokuwiki website | 66 To use it, you need an admin access to a running Dokuwiki website |
67 (local or on the Internet). The importer retrieves the data using | 67 (local or on the Internet). The importer retrieves the data using |
68 the XMLRPC Dokuwiki API. | 68 the XMLRPC Dokuwiki API. |
69 | 69 |
127 """Return a unique and constant post id | 127 """Return a unique and constant post id |
128 | 128 |
129 @param post(dict): parsed post data | 129 @param post(dict): parsed post data |
130 @return (unicode): post unique item id | 130 @return (unicode): post unique item id |
131 """ | 131 """ |
132 return unicode(post["id"]) | 132 return str(post["id"]) |
133 | 133 |
134 def getPostUpdated(self, post): | 134 def getPostUpdated(self, post): |
135 """Return the update date. | 135 """Return the update date. |
136 | 136 |
137 @param post(dict): parsed post data | 137 @param post(dict): parsed post data |
138 @return (unicode): update date | 138 @return (unicode): update date |
139 """ | 139 """ |
140 return unicode(post["mtime"]) | 140 return str(post["mtime"]) |
141 | 141 |
142 def getPostPublished(self, post): | 142 def getPostPublished(self, post): |
143 """Try to parse the date from the message ID, else use "mtime". | 143 """Try to parse the date from the message ID, else use "mtime". |
144 | 144 |
145 The date can be extracted if the message ID looks like one of: | 145 The date can be extracted if the message ID looks like one of: |
146 - namespace:YYMMDD_short_title | 146 - namespace:YYMMDD_short_title |
147 - namespace:YYYYMMDD_short_title | 147 - namespace:YYYYMMDD_short_title |
148 @param post (dict): parsed post data | 148 @param post (dict): parsed post data |
149 @return (unicode): publication date | 149 @return (unicode): publication date |
150 """ | 150 """ |
151 id_, default = unicode(post["id"]), unicode(post["mtime"]) | 151 id_, default = str(post["id"]), str(post["mtime"]) |
152 try: | 152 try: |
153 date = id_.split(":")[-1].split("_")[0] | 153 date = id_.split(":")[-1].split("_")[0] |
154 except KeyError: | 154 except KeyError: |
155 return default | 155 return default |
156 try: | 156 try: |
158 except ValueError: | 158 except ValueError: |
159 try: | 159 try: |
160 time_struct = time.strptime(date, "%Y%m%d") | 160 time_struct = time.strptime(date, "%Y%m%d") |
161 except ValueError: | 161 except ValueError: |
162 return default | 162 return default |
163 return unicode(calendar.timegm(time_struct)) | 163 return str(calendar.timegm(time_struct)) |
164 | 164 |
165 def processPost(self, post, profile_jid): | 165 def processPost(self, post, profile_jid): |
166 """Process a single page. | 166 """Process a single page. |
167 | 167 |
168 @param post (dict): parsed post data | 168 @param post (dict): parsed post data |
233 self.processPost(page, profile_jid) | 233 self.processPost(page, profile_jid) |
234 count += 1 | 234 count += 1 |
235 if count >= self.limit: | 235 if count >= self.limit: |
236 break | 236 break |
237 | 237 |
238 return (self.posts_data.itervalues(), len(self.posts_data)) | 238 return (iter(self.posts_data.values()), len(self.posts_data)) |
239 | 239 |
240 def processContent(self, text, backlinks, profile_jid): | 240 def processContent(self, text, backlinks, profile_jid): |
241 """Do text substitutions and file copy. | 241 """Do text substitutions and file copy. |
242 | 242 |
243 @param text (unicode): message content | 243 @param text (unicode): message content |
244 @param backlinks (list[unicode]): list of backlinks | 244 @param backlinks (list[unicode]): list of backlinks |
245 """ | 245 """ |
246 text = text.strip(u"\ufeff") # this is at the beginning of the file (BOM) | 246 text = text.strip("\ufeff") # this is at the beginning of the file (BOM) |
247 | 247 |
248 for backlink in backlinks: | 248 for backlink in backlinks: |
249 src = '/doku.php?id=%s"' % backlink | 249 src = '/doku.php?id=%s"' % backlink |
250 tgt = '/blog/%s/%s" target="#"' % (profile_jid.user, backlink) | 250 tgt = '/blog/%s/%s" target="#"' % (profile_jid.user, backlink) |
251 text = text.replace(src, tgt) | 251 text = text.replace(src, tgt) |
259 if re.match(r"^\w*://", link): # absolute URL to link directly | 259 if re.match(r"^\w*://", link): # absolute URL to link directly |
260 continue | 260 continue |
261 if self.media_repo: | 261 if self.media_repo: |
262 self.moveMedia(link, subs) | 262 self.moveMedia(link, subs) |
263 elif link not in subs: | 263 elif link not in subs: |
264 subs[link] = urlparse.urljoin(self.url, link) | 264 subs[link] = urllib.parse.urljoin(self.url, link) |
265 | 265 |
266 for url, new_url in subs.iteritems(): | 266 for url, new_url in subs.items(): |
267 text = text.replace(url, new_url) | 267 text = text.replace(url, new_url) |
268 return text | 268 return text |
269 | 269 |
270 def moveMedia(self, link, subs): | 270 def moveMedia(self, link, subs): |
271 """Move a media from the DokuWiki host to the new repository. | 271 """Move a media from the DokuWiki host to the new repository. |
272 | 272 |
273 This also updates the hyperlinks to internal media files. | 273 This also updates the hyperlinks to internal media files. |
274 @param link (unicode): media link | 274 @param link (unicode): media link |
275 @param subs (dict): substitutions data | 275 @param subs (dict): substitutions data |
276 """ | 276 """ |
277 url = urlparse.urljoin(self.url, link) | 277 url = urllib.parse.urljoin(self.url, link) |
278 user_media = re.match(r"(/lib/exe/\w+.php\?)(.*)", link) | 278 user_media = re.match(r"(/lib/exe/\w+.php\?)(.*)", link) |
279 thumb_width = None | 279 thumb_width = None |
280 | 280 |
281 if user_media: # media that has been added by the user | 281 if user_media: # media that has been added by the user |
282 params = urlparse.parse_qs(urlparse.urlparse(url).query) | 282 params = urllib.parse.parse_qs(urllib.parse.urlparse(url).query) |
283 try: | 283 try: |
284 media = params["media"][0] | 284 media = params["media"][0] |
285 except KeyError: | 285 except KeyError: |
286 log.warning("No media found in fetch URL: %s" % user_media.group(2)) | 286 log.warning("No media found in fetch URL: %s" % user_media.group(2)) |
287 return | 287 return |
293 except KeyError: | 293 except KeyError: |
294 pass | 294 pass |
295 | 295 |
296 filename = media.replace(":", "/") | 296 filename = media.replace(":", "/") |
297 # XXX: avoid "precondition failed" error (only keep the media parameter) | 297 # XXX: avoid "precondition failed" error (only keep the media parameter) |
298 url = urlparse.urljoin(self.url, "/lib/exe/fetch.php?media=%s" % media) | 298 url = urllib.parse.urljoin(self.url, "/lib/exe/fetch.php?media=%s" % media) |
299 | 299 |
300 elif link.startswith("/lib/plugins/"): | 300 elif link.startswith("/lib/plugins/"): |
301 # other link added by a plugin or something else | 301 # other link added by a plugin or something else |
302 filename = link[13:] | 302 filename = link[13:] |
303 else: # fake alert... there's no media (or we don't handle it yet) | 303 else: # fake alert... there's no media (or we don't handle it yet) |
322 """ | 322 """ |
323 dirname = os.path.dirname(dest) | 323 dirname = os.path.dirname(dest) |
324 if not os.path.exists(dest): | 324 if not os.path.exists(dest): |
325 if not os.path.exists(dirname): | 325 if not os.path.exists(dirname): |
326 os.makedirs(dirname) | 326 os.makedirs(dirname) |
327 urllib.urlretrieve(source, dest) | 327 urllib.request.urlretrieve(source, dest) |
328 log.debug("DokuWiki media file copied to %s" % dest) | 328 log.debug("DokuWiki media file copied to %s" % dest) |
329 | 329 |
330 def createThumbnail(self, source, dest, width): | 330 def createThumbnail(self, source, dest, width): |
331 """Create a thumbnail. | 331 """Create a thumbnail. |
332 | 332 |