Mercurial > libervia-backend
comparison src/plugins/plugin_blog_import_dokuwiki.py @ 1842:9fd517248dc8
plugin blog_import_dokuwiki: refactor to make it look more similar to blog_import_dotclear
author | souliane <souliane@mailoo.org> |
---|---|
date | Thu, 04 Feb 2016 17:36:22 +0100 |
parents | 7717975b3ec3 |
children | a51355982f11 |
comparison
equal
deleted
inserted
replaced
1841:7717975b3ec3 | 1842:9fd517248dc8 |
---|---|
117 self.temp_dir = None | 117 self.temp_dir = None |
118 if self.media_repo: | 118 if self.media_repo: |
119 self.temp_dir = tempfile.mkdtemp() | 119 self.temp_dir = tempfile.mkdtemp() |
120 self.info_msg = _("DokuWiki media files will be downloaded to %s - to finish the import you will need to upload them to %s" % (self.temp_dir, self.media_repo)) | 120 self.info_msg = _("DokuWiki media files will be downloaded to %s - to finish the import you will need to upload them to %s" % (self.temp_dir, self.media_repo)) |
121 else: | 121 else: |
122 self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible from XMPP." % url) | 122 self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible." % url) |
123 self.limit = limit | 123 self.limit = limit |
124 self.posts_data = OrderedDict() | 124 self.posts_data = OrderedDict() |
125 | 125 |
126 def process(self, client, namespace=DEFAULT_NAMESPACE): | 126 def getPostId(self, post): |
127 """Process a namespace or a single page. | 127 """Return a unique and constant post id |
128 | 128 |
129 @param namespace (unicode): DokuWiki namespace (or page) to import | 129 @param post(dict): parsed post data |
130 """ | 130 @return (unicode): post unique item id |
131 profile_jid = client.jid | 131 """ |
132 log.info("Importing data from DokuWiki %s" % self.version) | 132 return unicode(post['id']) |
133 try: | 133 |
134 pages_list = self.pages.list(namespace) | 134 def getPostUpdated(self, post): |
135 except DokuWikiError: | 135 """Return the update date. |
136 log.warning('Could not list Dokuwiki pages: please turn the "display_errors" setting to "Off" in the php.ini of the webserver hosting DokuWiki.') | 136 |
137 return | 137 @param post(dict): parsed post data |
138 | 138 @return (unicode): update date |
139 if not pages_list: # namespace is actually a page? | 139 """ |
140 names = namespace.split(":") | 140 return unicode(post['mtime']) |
141 real_namespace = ":".join(names[0:-1]) | 141 |
142 pages_list = self.pages.list(real_namespace) | 142 def getPostPublished(self, post): |
143 pages_list = [page for page in pages_list if page["id"] == namespace] | |
144 namespace = real_namespace | |
145 | |
146 count = 0 | |
147 for page in pages_list: | |
148 | |
149 # get main information | |
150 id_, updated, published = unicode(page['id']), unicode(page['mtime']), self.getOriginalDate(page) | |
151 | |
152 # manage links | |
153 backlinks = self.pages.backlinks(id_) | |
154 for link in self.pages.links(id_): | |
155 if link["type"] != "extern": | |
156 assert link["type"] == "local" | |
157 page = link["page"] | |
158 backlinks.append(page[1:] if page.startswith(":") else page) | |
159 | |
160 content = self.pages.get(id_) | |
161 content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid) | |
162 | |
163 # XXX: title is already in content_xhtml and difficult to remove, so leave it | |
164 # title = content.split("\n")[0].strip(u"\ufeff= ") | |
165 | |
166 # build the extra data dictionary | |
167 mb_data = {"id": id_, | |
168 "published": published, | |
169 "updated": updated, | |
170 "author": profile_jid.user, | |
171 # "content": content, # when passed, it is displayed in Libervia instead of content_xhtml | |
172 "content_xhtml": content_xhtml, | |
173 # "title": title, | |
174 "allow_comments": "true", | |
175 } | |
176 | |
177 # find out if the message access is public or restricted | |
178 namespace = id_.split(":")[0] | |
179 if namespace and namespace.lower() not in ("public", "/"): | |
180 mb_data["group"] = namespace # roster group must exist | |
181 | |
182 self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]} | |
183 | |
184 count += 1 | |
185 if count >= self.limit : | |
186 break | |
187 | |
188 return (self.posts_data.itervalues(), len(self.posts_data)) | |
189 | |
190 def getOriginalDate(self, page): | |
191 """Try to parse the date from the message ID, else use "mtime". | 143 """Try to parse the date from the message ID, else use "mtime". |
192 | 144 |
193 The date can be extracted if the message ID looks like one of: | 145 The date can be extracted if the message ID looks like one of: |
194 - namespace:YYMMDD_short_title | 146 - namespace:YYMMDD_short_title |
195 - namespace:YYYYMMDD_short_title | 147 - namespace:YYYYMMDD_short_title |
196 @param page (dict): message page | 148 @param post (dict): parsed post data |
197 @return unicode | 149 @return (unicode): publication date |
198 """ | 150 """ |
199 id_, default = unicode(page["id"]), unicode(page["mtime"]) | 151 id_, default = unicode(post["id"]), unicode(post["mtime"]) |
200 try: | 152 try: |
201 date = id_.split(":")[-1].split("_")[0] | 153 date = id_.split(":")[-1].split("_")[0] |
202 except KeyError: | 154 except KeyError: |
203 return default | 155 return default |
204 try: | 156 try: |
208 time_struct = time.strptime(date, "%Y%m%d") | 160 time_struct = time.strptime(date, "%Y%m%d") |
209 except ValueError: | 161 except ValueError: |
210 return default | 162 return default |
211 return unicode(calendar.timegm(time_struct)) | 163 return unicode(calendar.timegm(time_struct)) |
212 | 164 |
165 def processPost(self, post, profile_jid): | |
166 """Process a single page. | |
167 | |
168 @param post (dict): parsed post data | |
169 @param profile_jid | |
170 """ | |
171 # get main information | |
172 id_ = self.getPostId(post) | |
173 updated = self.getPostUpdated(post) | |
174 published = self.getPostPublished(post) | |
175 | |
176 # manage links | |
177 backlinks = self.pages.backlinks(id_) | |
178 for link in self.pages.links(id_): | |
179 if link["type"] != "extern": | |
180 assert link["type"] == "local" | |
181 page = link["page"] | |
182 backlinks.append(page[1:] if page.startswith(":") else page) | |
183 | |
184 content = self.pages.get(id_) | |
185 content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid) | |
186 | |
187 # XXX: title is already in content_xhtml and difficult to remove, so leave it | |
188 # title = content.split("\n")[0].strip(u"\ufeff= ") | |
189 | |
190 # build the extra data dictionary | |
191 mb_data = {"id": id_, | |
192 "published": published, | |
193 "updated": updated, | |
194 "author": profile_jid.user, | |
195 # "content": content, # when passed, it is displayed in Libervia instead of content_xhtml | |
196 "content_xhtml": content_xhtml, | |
197 # "title": title, | |
198 "allow_comments": "true", | |
199 } | |
200 | |
201 # find out if the message access is public or restricted | |
202 namespace = id_.split(":")[0] | |
203 if namespace and namespace.lower() not in ("public", "/"): | |
204 mb_data["group"] = namespace # roster group must exist | |
205 | |
206 self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]} | |
207 | |
208 def process(self, client, namespace=DEFAULT_NAMESPACE): | |
209 """Process a namespace or a single page. | |
210 | |
211 @param namespace (unicode): DokuWiki namespace (or page) to import | |
212 """ | |
213 profile_jid = client.jid | |
214 log.info("Importing data from DokuWiki %s" % self.version) | |
215 try: | |
216 pages_list = self.pages.list(namespace) | |
217 except DokuWikiError: | |
218 log.warning('Could not list Dokuwiki pages: please turn the "display_errors" setting to "Off" in the php.ini of the webserver hosting DokuWiki.') | |
219 return | |
220 | |
221 if not pages_list: # namespace is actually a page? | |
222 names = namespace.split(":") | |
223 real_namespace = ":".join(names[0:-1]) | |
224 pages_list = self.pages.list(real_namespace) | |
225 pages_list = [page for page in pages_list if page["id"] == namespace] | |
226 namespace = real_namespace | |
227 | |
228 count = 0 | |
229 for page in pages_list: | |
230 self.processPost(page, profile_jid) | |
231 count += 1 | |
232 if count >= self.limit : | |
233 break | |
234 | |
235 return (self.posts_data.itervalues(), len(self.posts_data)) | |
213 | 236 |
214 def processContent(self, text, backlinks, profile_jid): | 237 def processContent(self, text, backlinks, profile_jid): |
215 """Do text substitutions and file copy. | 238 """Do text substitutions and file copy. |
216 | 239 |
217 @param text (unicode): message content | 240 @param text (unicode): message content |