Mercurial > prosody-modules
changeset 4255:38da10e4b593
mod_ogp: Update parsing logic to catch more cases
author | Seve Ferrer <seve@delape.net> |
---|---|
date | Wed, 18 Nov 2020 13:48:07 +0100 |
parents | a4e182d7ff0a |
children | c4b9d4ba839b |
files | mod_ogp/mod_ogp.lua mod_ogp/test.lua |
diffstat | 2 files changed, 70 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/mod_ogp/mod_ogp.lua Wed Nov 18 11:16:11 2020 +0100 +++ b/mod_ogp/mod_ogp.lua Wed Nov 18 13:48:07 2020 +0100 @@ -30,32 +30,40 @@ local fastening = st.message({to = to, from = from}):tag("apply-to", {xmlns = "urn:xmpp:fasten:0", id = origin_id}) local found_metadata = false local message_body = "" - for property, content in response_body:gmatch(ogp_pattern) do - module:log("info", property .. "\t" .. content) - fastening:tag( - "meta", - { - xmlns = "http://www.w3.org/1999/xhtml", - property = property, - content = content - } - ):up() - found_metadata = true - message_body = message_body .. property .. "\t" .. content .. "\n" + + local meta_pattern = [[<meta (.-)/?>]] + for match in response_body:gmatch(meta_pattern) do + local property = match:match([[property=%s*["']?(og:.-)["']?%s]]) + if not property then + property = match:match([[property=["']?(og:.-)["']$]]) + end + + local content = match:match([[content=%s*["'](.-)["']%s]]) + if not content then + content = match:match([[content=["']?(.-)["']$]]) + end + if not content then + content = match:match([[content=(.-) property]]) + end + if not content then + content = match:match([[content=(.-)$]]) + end + + if property and content then + module:log("info", property .. "\t" .. content) + fastening:tag( + "meta", + { + xmlns = "http://www.w3.org/1999/xhtml", + property = property, + content = content + } + ):up() + found_metadata = true + message_body = message_body .. property .. "\t" .. content .. "\n" + end end - for content, property in response_body:gmatch(ogp_pattern2) do - module:log("info", property .. "\t" .. content) - fastening:tag( - "meta", - { - xmlns = "http://www.w3.org/1999/xhtml", - property = property, - content = content - } - ):up() - found_metadata = true - message_body = message_body .. property .. "\t" .. content .. "\n" - end + if found_metadata then mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
--- a/mod_ogp/test.lua Wed Nov 18 11:16:11 2020 +0100 +++ b/mod_ogp/test.lua Wed Nov 18 13:48:07 2020 +0100 @@ -1,30 +1,43 @@ local html = [[ -<meta property="og:title" content="Example 1"> -<meta property=og:title content="Example 2"> -<meta property="og:title" content="Example 3" > -<meta property="og:title" content="Example 4" /> -<meta property="og:title" content="Example 5"/> -<meta property=og:title content=Example 6/> -<meta property="og:title" content= "Example 7" /> -<meta property="og:title" itemprop="image primaryImageOfPage" content="Example 8" /> -<meta content="Example 9" property="og:title" > -<meta content="Example 10" property="og:title"> -<meta content="Example 11" property="og:title"/> -<meta content="Example 12" property="og:title" /> -<meta content="Example 13" property=og:title > -<meta content=Example 14 property=og:title > -<meta content= "Example 15" property="og:title" /> -<meta content="Example 16" itemprop="image primaryImageOfPage" property="og:title" /> +<meta property="og:title" content="Example 1 A"> +<meta property=og:title content="Example 2 B"> +<meta property="og:title" content="Example 3 C" > +<meta property="og:title" content="Example 4 D" /> +<meta property="og:title" content="Example 5 E"/> +<meta property=og:title content=Example 6 F/> +<meta property="og:title" content= "Example 7 G" /> +<meta property="og:title" itemprop="image primaryImageOfPage" content="Example 8 H" /> +<meta property='og:title' content='Example 9 I' /> +<meta content="Example 10 J" property="og:title" > +<meta content="Example 11 K" property="og:title"> +<meta content="Example 12 L" property="og:title"/> +<meta content="Example 13 M" property="og:title" /> +<meta content="Example 14 N" property=og:title > +<meta content=Example 15 O property=og:title > +<meta content= "Example 16 P" property="og:title" /> +<meta content="Example 17 Q" itemprop="image primaryImageOfPage" property="og:title" /> +<meta content= 'Example 18 R' property='og:title' /> ]] -local ogp_pattern = [[<meta property=["']?(og:.-)["']? content=%s*["']?(.-)["']?%s-/?>]] -local ogp_pattern2 = [[<meta content=%s*["']?(.-)["']? property=["']?(og:.-)["']?%s-/?>]] + +local meta_pattern = [[<meta (.-)/?>]] +for match in html:gmatch(meta_pattern) do + local property = match:match([[property=%s*["']?(og:.-)["']?%s]]) + if not property then + property = match:match([[property=["']?(og:.-)["']$]]) + end -for property, content in html:gmatch(ogp_pattern) do - print("Pattern 1|", property, content, "|Pattern 1") + local content = match:match([[content=%s*["'](.-)["']%s]]) + if not content then + content = match:match([[content=["']?(.-)["']$]]) + end + if not content then + content = match:match([[content=(.-) property]]) + end + if not content then + content = match:match([[content=(.-)$]]) + end + + print(property, '\t', content, '\t', match .. "|") end -print('-------------------------------------------------------------') -for content, property in html:gmatch(ogp_pattern2) do - print("Pattern 2|", property, content, "|Pattern 2") -end