diff db/pubsub.sql @ 430:5a0ada3b61ca

Full-Text Search implementation: /!\ pgsql schema needs to be updated /!\ /!\ Minimal PostgreSQL required version is now 12 /!\ A new options is available to specify main language of a node. By default a `generic` language is used (which uses the `simple` configuration in PostgreSQL). When a node owner changes the language, the index is rebuilt accordingly. It is possible to have item specific language for multilingual nodes (but for the moment the search is done with node language, so the results won't be good). If an item language is explicitely set in `item_languages`, the FTS configuration won't be affected by node FTS language setting. Search is parsed with `websearch_to_tsquery` for now, but this parser doesn't handle prefix matching, so it may be replaced in the future. SetConfiguration now only updates the modified values, this avoid triggering the FTS re-indexing on each config change. `_checkNodeExists` is not called anymore as we can check if a row has been modified to see if the node exists, this avoid a useless query. Item storing has been slighly improved with a useless SELECT and condition removed. To avoid 2 schema updates in a row, the `sat_pubsub_update_5_6.sql` file also prepares the implementation of XEP-0346 by updating nodes with a schema and creating the suitable template nodes.
author Goffi <goffi@goffi.org>
date Fri, 11 Dec 2020 17:18:52 +0100
parents 1dc606612405
children 920440200570
line wrap: on
line diff
--- a/db/pubsub.sql	Thu Dec 10 10:46:34 2020 +0100
+++ b/db/pubsub.sql	Fri Dec 11 17:18:52 2020 +0100
@@ -6,7 +6,7 @@
 CREATE TABLE nodes (
     node_id serial PRIMARY KEY,
     node text NOT NULL,
-	pep text,
+    pep text,
     node_type text NOT NULL DEFAULT 'leaf'
         CHECK (node_type IN ('leaf', 'collection')),
     access_model text NOT NULL DEFAULT 'open'
@@ -14,9 +14,10 @@
     persist_items boolean,
     deliver_payloads boolean NOT NULL DEFAULT TRUE,
     max_items integer NOT NULL DEFAULT 0
-		CHECK (max_items >= 0),
+        CHECK (max_items >= 0),
     serial_ids boolean NOT NULL DEFAULT FALSE,
     consistent_publisher boolean NOT NULL DEFAULT FALSE,
+    fts_language text NOT NULL DEFAULT 'generic',
     send_last_published_item text NOT NULL DEFAULT 'on_sub'
         CHECK (send_last_published_item IN ('never', 'on_sub')),
     publish_model text NOT NULL DEFAULT 'publishers'
@@ -52,11 +53,11 @@
     resource text,
     node_id integer NOT NULL REFERENCES nodes ON delete CASCADE,
     state text NOT NULL DEFAULT 'subscribed'
-    	CHECK (state IN ('subscribed', 'pending', 'unconfigured')),
+        CHECK (state IN ('subscribed', 'pending', 'unconfigured')),
     subscription_type text
-    	CHECK (subscription_type IN (NULL, 'items', 'nodes')),
+        CHECK (subscription_type IN (NULL, 'items', 'nodes')),
     subscription_depth text
-    	CHECK (subscription_depth IN (NULL, '1', 'all')),
+        CHECK (subscription_depth IN (NULL, '1', 'all')),
     UNIQUE (entity_id, resource, node_id));
 
 CREATE TABLE items (
@@ -64,7 +65,9 @@
     node_id integer NOT NULL REFERENCES nodes ON DELETE CASCADE,
     item text NOT NULL,
     publisher text NOT NULL,
-    data xml,
+    data xml NOT NULL,
+    data_fts tsvector GENERATED ALWAYS AS (to_tsvector(data_fts_cfg, data::text)) STORED,
+    data_fts_cfg regconfig NOT NULL DEFAULT 'simple',
     access_model text NOT NULL DEFAULT 'open'
         CHECK (access_model IN ('open', 'publisher-roster', 'whitelist')),
     created timestamp with time zone NOT NULL DEFAULT now(),
@@ -100,9 +103,28 @@
     UNIQUE (item_id,category)
 );
 
+/* Full Text Search */
+CREATE INDEX items_data_fts ON items USING GIN (data_fts);
+
+CREATE OR REPLACE FUNCTION update_data_fts() RETURNS TRIGGER AS
+$$
+BEGIN
+    UPDATE items SET data_fts_cfg=replace(new.fts_language, 'generic', 'simple')::regconfig
+        WHERE items.node_id=new.node_id
+            AND NOT EXISTS(SELECT FROM item_languages AS lang WHERE lang.item_id=items.item_id);
+    RETURN new;
+END;
+$$
+language plpgsql;
+
+CREATE TRIGGER nodes_fts_language_update
+     AFTER UPDATE OF fts_language ON nodes
+     FOR EACH ROW
+     EXECUTE PROCEDURE update_data_fts();
+
 CREATE TABLE metadata (
-	key text PRIMARY KEY,
-	value text
+    key text PRIMARY KEY,
+    value text
 );
 
 INSERT INTO metadata VALUES ('version', '5');