# HG changeset patch # User Seve Ferrer # Date 1605703687 -3600 # Node ID 38da10e4b593abad0205aeb9c909b61328345e2f # Parent a4e182d7ff0aa610edc02fa7f487c4fbdeca94e4 mod_ogp: Update parsing logic to catch more cases diff -r a4e182d7ff0a -r 38da10e4b593 mod_ogp/mod_ogp.lua --- a/mod_ogp/mod_ogp.lua Wed Nov 18 11:16:11 2020 +0100 +++ b/mod_ogp/mod_ogp.lua Wed Nov 18 13:48:07 2020 +0100 @@ -30,32 +30,40 @@ local fastening = st.message({to = to, from = from}):tag("apply-to", {xmlns = "urn:xmpp:fasten:0", id = origin_id}) local found_metadata = false local message_body = "" - for property, content in response_body:gmatch(ogp_pattern) do - module:log("info", property .. "\t" .. content) - fastening:tag( - "meta", - { - xmlns = "http://www.w3.org/1999/xhtml", - property = property, - content = content - } - ):up() - found_metadata = true - message_body = message_body .. property .. "\t" .. content .. "\n" + + local meta_pattern = [[]] + for match in response_body:gmatch(meta_pattern) do + local property = match:match([[property=%s*["']?(og:.-)["']?%s]]) + if not property then + property = match:match([[property=["']?(og:.-)["']$]]) + end + + local content = match:match([[content=%s*["'](.-)["']%s]]) + if not content then + content = match:match([[content=["']?(.-)["']$]]) + end + if not content then + content = match:match([[content=(.-) property]]) + end + if not content then + content = match:match([[content=(.-)$]]) + end + + if property and content then + module:log("info", property .. "\t" .. content) + fastening:tag( + "meta", + { + xmlns = "http://www.w3.org/1999/xhtml", + property = property, + content = content + } + ):up() + found_metadata = true + message_body = message_body .. property .. "\t" .. content .. "\n" + end end - for content, property in response_body:gmatch(ogp_pattern2) do - module:log("info", property .. "\t" .. content) - fastening:tag( - "meta", - { - xmlns = "http://www.w3.org/1999/xhtml", - property = property, - content = content - } - ):up() - found_metadata = true - message_body = message_body .. property .. "\t" .. content .. "\n" - end + if found_metadata then mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening) diff -r a4e182d7ff0a -r 38da10e4b593 mod_ogp/test.lua --- a/mod_ogp/test.lua Wed Nov 18 11:16:11 2020 +0100 +++ b/mod_ogp/test.lua Wed Nov 18 13:48:07 2020 +0100 @@ -1,30 +1,43 @@ local html = [[ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + ]] -local ogp_pattern = [[]] -local ogp_pattern2 = [[]] + +local meta_pattern = [[]] +for match in html:gmatch(meta_pattern) do + local property = match:match([[property=%s*["']?(og:.-)["']?%s]]) + if not property then + property = match:match([[property=["']?(og:.-)["']$]]) + end -for property, content in html:gmatch(ogp_pattern) do - print("Pattern 1|", property, content, "|Pattern 1") + local content = match:match([[content=%s*["'](.-)["']%s]]) + if not content then + content = match:match([[content=["']?(.-)["']$]]) + end + if not content then + content = match:match([[content=(.-) property]]) + end + if not content then + content = match:match([[content=(.-)$]]) + end + + print(property, '\t', content, '\t', match .. "|") end -print('-------------------------------------------------------------') -for content, property in html:gmatch(ogp_pattern2) do - print("Pattern 2|", property, content, "|Pattern 2") -end