diff sat/memory/sqlite.py @ 3288:780fb8dd07ef

core (memory/sqlite): new database schema (v9): this new schema brings the new `public_id` column, which will be used to serve files via HTTP, splits `mime_type` in 2 (`media_type` and `media_subtype`) to make indexing/media filtering easier, and fixes indexes for `files` table.
author Goffi <goffi@goffi.org>
date Fri, 29 May 2020 21:50:49 +0200
parents 7255286a298a
children 83f25da66bec
line wrap: on
line diff
--- a/sat/memory/sqlite.py	Fri May 29 21:07:10 2020 +0200
+++ b/sat/memory/sqlite.py	Fri May 29 21:50:49 2020 +0200
@@ -38,7 +38,7 @@
 
 log = getLogger(__name__)
 
-CURRENT_DB_VERSION = 8
+CURRENT_DB_VERSION = 9
 
 # XXX: DATABASE schemas are used in the following way:
 #      - 'current' key is for the actual database schema, for a new base
@@ -82,14 +82,16 @@
                                                    ("PRIMARY KEY (namespace, key)",))),
                               ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
                                                    ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
-                              ('files',           (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
+                              ('files',           (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL",
+                                                    "parent TEXT NOT NULL",
                                                     "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
                                                         file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
                                                     "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
-                                                    "namespace TEXT", "mime_type TEXT",
+                                                    "namespace TEXT", "media_type TEXT", "media_subtype TEXT",
                                                     "created DATETIME NOT NULL", "modified DATETIME",
                                                     "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
-                                                   ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
+                                                   ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",
+                                                    "UNIQUE (public_id)"))),
                               )),
                     'INSERT': OrderedDict((
                               ('message_types', (("'chat'",),
@@ -105,8 +107,12 @@
                               ('message', ('history_uid',)),
                               ('subject', ('history_uid',)),
                               ('thread', ('history_uid',)),
-                              ('files', ('profile_id', 'mime_type', 'owner', 'parent'))),
+                              ('files', (('profile_id', 'owner', 'media_type', 'media_subtype'),
+                                         ('profile_id', 'owner', 'parent'))),
+                             )
                     },
+        9:         {'specific': 'update_v9'
+                   },
         8:         {'specific': 'update_v8'
                    },
         7:         {'specific': 'update_v7'
@@ -834,7 +840,7 @@
     @defer.inlineCallbacks
     def getFiles(self, client, file_id=None, version='', parent=None, type_=None,
                  file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None,
-                 owner=None, access=None, projection=None, unique=False):
+                 public_id=None, owner=None, access=None, projection=None, unique=False):
         """retrieve files with with given filters
 
         @param file_id(unicode, None): id of the file
@@ -856,12 +862,19 @@
             query_parts.append('DISTINCT')
         if projection is None:
             projection = ['id', 'version', 'parent', 'type', 'file_hash', 'hash_algo', 'name',
-                          'size', 'namespace', 'mime_type', 'created', 'modified', 'owner',
+                          'size', 'namespace', 'media_type', 'media_subtype', 'public_id', 'created', 'modified', 'owner',
                           'access', 'extra']
         query_parts.append(','.join(projection))
         query_parts.append("FROM files WHERE")
-        filters = ['profile_id=?']
-        args = [self.profiles[client.profile]]
+        if client is not None:
+            filters = ['profile_id=?']
+            args = [self.profiles[client.profile]]
+        else:
+            if public_id is None:
+                raise exceptions.InternalError(
+                    "client can only be omitted when public_id is set")
+            filters = []
+            args = []
 
         if file_id is not None:
             filters.append('id=?')
@@ -888,8 +901,15 @@
             filters.append('namespace=?')
             args.append(namespace)
         if mime_type is not None:
-            filters.append('mime_type=?')
-            args.append(mime_type)
+            if '/' in mime_type:
+                filters.extend('media_type=?', 'media_subtype=?')
+                args.extend(mime_type.split('/', 1))
+            else:
+                filters.append('media_type=?')
+                args.append(mime_type)
+        if public_id is not None:
+            filters.append('public_id=?')
+            args.append(public_id)
         if owner is not None:
             filters.append('owner=?')
             args.append(owner.full())
@@ -917,24 +937,25 @@
 
     def setFile(self, client, name, file_id, version='', parent=None, type_=C.FILE_TYPE_FILE,
                 file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None,
-                created=None, modified=None, owner=None, access=None, extra=None):
+                public_id=None, created=None, modified=None, owner=None, access=None, extra=None):
         """set a file metadata
 
         @param client(SatXMPPClient): client owning the file
-        @param name(unicode): name of the file (must not contain "/")
-        @param file_id(unicode): unique id of the file
-        @param version(unicode): version of this file
-        @param parent(unicode): id of the directory containing this file
+        @param name(str): name of the file (must not contain "/")
+        @param file_id(str): unique id of the file
+        @param version(str): version of this file
+        @param parent(str): id of the directory containing this file
             None if it is a root file/directory
-        @param type_(unicode): one of:
+        @param type_(str): one of:
             - file
             - directory
-        @param file_hash(unicode): unique hash of the payload
-        @param hash_algo(unicode): algorithm used for hashing the file (usually sha-256)
+        @param file_hash(str): unique hash of the payload
+        @param hash_algo(str): algorithm used for hashing the file (usually sha-256)
         @param size(int): size in bytes
-        @param namespace(unicode, None): identifier (human readable is better) to group files
+        @param namespace(str, None): identifier (human readable is better) to group files
             for instance, namespace could be used to group files in a specific photo album
-        @param mime_type(unicode): MIME type of the file, or None if not known/guessed
+        @param mime_type(str): media type of the file, or None if not known/guessed
+        @param public_id(str): ID used to server the file publicly via HTTP
         @param created(int): UNIX time of creation
         @param modified(int,None): UNIX time of last modification, or None to use created date
         @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component)
@@ -943,13 +964,21 @@
             will be encoded to json in database
         """
         if extra is not None:
-         assert isinstance(extra, dict)
+            assert isinstance(extra, dict)
+
+        if mime_type is None:
+            media_type = media_subtype = None
+        elif '/' in mime_type:
+            media_type, media_subtype = mime_type.split('/', 1)
+        else:
+            media_type, media_subtype = mime_type, None
+
         query = ('INSERT INTO files(id, version, parent, type, file_hash, hash_algo, name, size, namespace, '
-                 'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)')
+                 'media_type, media_subtype, public_id, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)')
         d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_,
                                          file_hash, hash_algo,
                                          name, size, namespace,
-                                         mime_type, created, modified,
+                                         media_type, media_subtype, public_id, created, modified,
                                          owner.full() if owner is not None else None,
                                          json.dumps(access) if access else None,
                                          json.dumps(extra) if extra else None,
@@ -1357,6 +1386,51 @@
             ret.extend(cmds or [])
         defer.returnValue(ret)
 
+    def update_v9(self):
+        """Update database from v8 to v9
+
+        (public_id on file with UNIQUE constraint, files indexes fix, media_type split)
+        """
+        # we have to do a specific update because we can't set UNIQUE constraint when adding a column
+        # (see https://sqlite.org/lang_altertable.html#alter_table_add_column)
+        log.info("Database update to v9")
+
+        create = {
+            'files': (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL",
+                       "parent TEXT NOT NULL",
+                       "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
+                           file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
+                       "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
+                       "namespace TEXT", "media_type TEXT", "media_subtype TEXT",
+                       "created DATETIME NOT NULL", "modified DATETIME",
+                       "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
+                      ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",
+                       "UNIQUE (public_id)")),
+
+        }
+        index = tuple({'files': (('profile_id', 'owner', 'media_type', 'media_subtype'),
+                                 ('profile_id', 'owner', 'parent'))}.items())
+        # XXX: Sqlite doc recommends to do the other way around (i.e. create new table,
+        #   copy, drop old table then rename), but the RENAME would then add
+        #   "IF NOT EXISTS" which breaks the (admittely fragile) schema comparison.
+        # TODO: rework sqlite update management, don't try to automatically detect
+        #   update, the database version is now enough.
+        statements = ["ALTER TABLE files RENAME TO files_old"]
+        statements.extend(Updater.createData2Raw(create))
+        cols = ','.join([col_stmt.split()[0] for col_stmt in create['files'][0] if "public_id" not in col_stmt])
+        old_cols = cols[:]
+        # we need to split mime_type to the new media_type and media_subtype
+        old_cols = old_cols.replace(
+            'media_type,media_subtype',
+            "substr(mime_type, 0, instr(mime_type,'/')),substr(mime_type, instr(mime_type,'/')+1)"
+        )
+        statements.extend([
+            f"INSERT INTO files({cols}) SELECT {old_cols} FROM files_old",
+            f"DROP TABLE files_old",
+        ])
+        statements.extend(Updater.indexData2Raw(index))
+        return statements
+
     def update_v8(self):
         """Update database from v7 to v8 (primary keys order changes + indexes)"""
         log.info("Database update to v8")