# HG changeset patch # User Goffi # Date 1590781849 -7200 # Node ID 780fb8dd07ef6195aad700607c7d6dba49ffbd0c # Parent a4b8c9bcfb57fc2d11c3d752cd267ea709c31eb2 core (memory/sqlite): new database schema (v9): this new schema brings the new `public_id` column, which will be used to serve files via HTTP, splits `mime_type` in 2 (`media_type` and `media_subtype`) to make indexing/media filtering easier, and fixes indexes for `files` table. diff -r a4b8c9bcfb57 -r 780fb8dd07ef sat/memory/memory.py --- a/sat/memory/memory.py Fri May 29 21:07:10 2020 +0200 +++ b/sat/memory/memory.py Fri May 29 21:50:49 2020 +0200 @@ -22,6 +22,7 @@ import shortuuid import mimetypes import time +from pathlib import Path from uuid import uuid4 from collections import namedtuple from twisted.python import failure @@ -1293,8 +1294,8 @@ def getFiles( self, client, peer_jid, file_id=None, version=None, parent=None, path=None, type_=None, file_hash=None, hash_algo=None, name=None, namespace=None, - mime_type=None, owner=None, access=None, projection=None, unique=False, - perms_to_check=(C.ACCESS_PERM_READ,)): + mime_type=None, public_id=None, owner=None, access=None, projection=None, + unique=False, perms_to_check=(C.ACCESS_PERM_READ,)): """Retrieve files with with given filters @param peer_jid(jid.JID, None): jid trying to access the file @@ -1315,6 +1316,7 @@ @param name(unicode, None): name of the file to retrieve @param namespace(unicode, None): namespace of the files to retrieve @param mime_type(unicode, None): filter on this mime type + @param public_id(unicode, None): filter on this public id @param owner(jid.JID, None): if not None, only get files from this owner @param access(dict, None): get file with given access (see [setFile]) @param projection(list[unicode], None): name of columns to retrieve @@ -1369,6 +1371,7 @@ name=name, namespace=namespace, mime_type=mime_type, + public_id=public_id, owner=owner, access=access, projection=projection, @@ -1389,11 +1392,12 @@ @defer.inlineCallbacks def setFile( - self, client, name, file_id=None, version="", parent=None, path=None, - type_=C.FILE_TYPE_FILE, file_hash=None, hash_algo=None, size=None, - namespace=None, mime_type=None, created=None, modified=None, owner=None, - access=None, extra=None, peer_jid=None, perms_to_check=(C.ACCESS_PERM_WRITE,) - ): + self, client, name, file_id=None, version="", parent=None, path=None, + type_=C.FILE_TYPE_FILE, file_hash=None, hash_algo=None, size=None, + namespace=None, mime_type=None, public_id=None, created=None, modified=None, + owner=None, access=None, extra=None, peer_jid=None, + perms_to_check=(C.ACCESS_PERM_WRITE,) + ): """Set a file metadata @param name(unicode): basename of the file @@ -1411,6 +1415,7 @@ files For instance, namespace could be used to group files in a specific photo album @param mime_type(unicode): MIME type of the file, or None if not known/guessed + @param public_id(unicode): id used to share publicly the file via HTTP @param created(int): UNIX time of creation @param modified(int,None): UNIX time of last modification, or None to use created date @@ -1448,7 +1453,11 @@ ): raise ValueError("file_hash and hash_algo must be set at the same time") if mime_type is None: - mime_type, file_encoding = mimetypes.guess_type(name) + mime_type, __ = mimetypes.guess_type(name) + else: + mime_type = mime_type.lower() + if public_id is not None: + assert len(public_id)>0 if created is None: created = time.time() if namespace is not None: @@ -1477,6 +1486,7 @@ parent=parent, type_=C.FILE_TYPE_DIRECTORY, namespace=namespace, + public_id=public_id, created=time.time(), owner=owner, access=access, @@ -1498,6 +1508,7 @@ size=size, namespace=namespace, mime_type=mime_type, + public_id=public_id, created=created, modified=modified, owner=owner, @@ -1518,7 +1529,14 @@ return self.storage.fileUpdate(file_id, column, update_cb) @defer.inlineCallbacks - def _deleteFile(self, client, peer_jid, recursive, files_path, file_data): + def _deleteFile( + self, + client, + peer_jid: jid.JID, + recursive: bool, + files_path: Path, + file_data: dict + ): """Internal method to delete files/directories recursively @param peer_jid(jid.JID): entity requesting the deletion (must be owner of files diff -r a4b8c9bcfb57 -r 780fb8dd07ef sat/memory/sqlite.py --- a/sat/memory/sqlite.py Fri May 29 21:07:10 2020 +0200 +++ b/sat/memory/sqlite.py Fri May 29 21:50:49 2020 +0200 @@ -38,7 +38,7 @@ log = getLogger(__name__) -CURRENT_DB_VERSION = 8 +CURRENT_DB_VERSION = 9 # XXX: DATABASE schemas are used in the following way: # - 'current' key is for the actual database schema, for a new base @@ -82,14 +82,16 @@ ("PRIMARY KEY (namespace, key)",))), ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), - ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", + ('files', (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL", + "parent TEXT NOT NULL", "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", - "namespace TEXT", "mime_type TEXT", + "namespace TEXT", "media_type TEXT", "media_subtype TEXT", "created DATETIME NOT NULL", "modified DATETIME", "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), - ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), + ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", + "UNIQUE (public_id)"))), )), 'INSERT': OrderedDict(( ('message_types', (("'chat'",), @@ -105,8 +107,12 @@ ('message', ('history_uid',)), ('subject', ('history_uid',)), ('thread', ('history_uid',)), - ('files', ('profile_id', 'mime_type', 'owner', 'parent'))), + ('files', (('profile_id', 'owner', 'media_type', 'media_subtype'), + ('profile_id', 'owner', 'parent'))), + ) }, + 9: {'specific': 'update_v9' + }, 8: {'specific': 'update_v8' }, 7: {'specific': 'update_v7' @@ -834,7 +840,7 @@ @defer.inlineCallbacks def getFiles(self, client, file_id=None, version='', parent=None, type_=None, file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None, - owner=None, access=None, projection=None, unique=False): + public_id=None, owner=None, access=None, projection=None, unique=False): """retrieve files with with given filters @param file_id(unicode, None): id of the file @@ -856,12 +862,19 @@ query_parts.append('DISTINCT') if projection is None: projection = ['id', 'version', 'parent', 'type', 'file_hash', 'hash_algo', 'name', - 'size', 'namespace', 'mime_type', 'created', 'modified', 'owner', + 'size', 'namespace', 'media_type', 'media_subtype', 'public_id', 'created', 'modified', 'owner', 'access', 'extra'] query_parts.append(','.join(projection)) query_parts.append("FROM files WHERE") - filters = ['profile_id=?'] - args = [self.profiles[client.profile]] + if client is not None: + filters = ['profile_id=?'] + args = [self.profiles[client.profile]] + else: + if public_id is None: + raise exceptions.InternalError( + "client can only be omitted when public_id is set") + filters = [] + args = [] if file_id is not None: filters.append('id=?') @@ -888,8 +901,15 @@ filters.append('namespace=?') args.append(namespace) if mime_type is not None: - filters.append('mime_type=?') - args.append(mime_type) + if '/' in mime_type: + filters.extend('media_type=?', 'media_subtype=?') + args.extend(mime_type.split('/', 1)) + else: + filters.append('media_type=?') + args.append(mime_type) + if public_id is not None: + filters.append('public_id=?') + args.append(public_id) if owner is not None: filters.append('owner=?') args.append(owner.full()) @@ -917,24 +937,25 @@ def setFile(self, client, name, file_id, version='', parent=None, type_=C.FILE_TYPE_FILE, file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None, - created=None, modified=None, owner=None, access=None, extra=None): + public_id=None, created=None, modified=None, owner=None, access=None, extra=None): """set a file metadata @param client(SatXMPPClient): client owning the file - @param name(unicode): name of the file (must not contain "/") - @param file_id(unicode): unique id of the file - @param version(unicode): version of this file - @param parent(unicode): id of the directory containing this file + @param name(str): name of the file (must not contain "/") + @param file_id(str): unique id of the file + @param version(str): version of this file + @param parent(str): id of the directory containing this file None if it is a root file/directory - @param type_(unicode): one of: + @param type_(str): one of: - file - directory - @param file_hash(unicode): unique hash of the payload - @param hash_algo(unicode): algorithm used for hashing the file (usually sha-256) + @param file_hash(str): unique hash of the payload + @param hash_algo(str): algorithm used for hashing the file (usually sha-256) @param size(int): size in bytes - @param namespace(unicode, None): identifier (human readable is better) to group files + @param namespace(str, None): identifier (human readable is better) to group files for instance, namespace could be used to group files in a specific photo album - @param mime_type(unicode): MIME type of the file, or None if not known/guessed + @param mime_type(str): media type of the file, or None if not known/guessed + @param public_id(str): ID used to server the file publicly via HTTP @param created(int): UNIX time of creation @param modified(int,None): UNIX time of last modification, or None to use created date @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component) @@ -943,13 +964,21 @@ will be encoded to json in database """ if extra is not None: - assert isinstance(extra, dict) + assert isinstance(extra, dict) + + if mime_type is None: + media_type = media_subtype = None + elif '/' in mime_type: + media_type, media_subtype = mime_type.split('/', 1) + else: + media_type, media_subtype = mime_type, None + query = ('INSERT INTO files(id, version, parent, type, file_hash, hash_algo, name, size, namespace, ' - 'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)') + 'media_type, media_subtype, public_id, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)') d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_, file_hash, hash_algo, name, size, namespace, - mime_type, created, modified, + media_type, media_subtype, public_id, created, modified, owner.full() if owner is not None else None, json.dumps(access) if access else None, json.dumps(extra) if extra else None, @@ -1357,6 +1386,51 @@ ret.extend(cmds or []) defer.returnValue(ret) + def update_v9(self): + """Update database from v8 to v9 + + (public_id on file with UNIQUE constraint, files indexes fix, media_type split) + """ + # we have to do a specific update because we can't set UNIQUE constraint when adding a column + # (see https://sqlite.org/lang_altertable.html#alter_table_add_column) + log.info("Database update to v9") + + create = { + 'files': (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL", + "parent TEXT NOT NULL", + "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( + file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), + "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", + "namespace TEXT", "media_type TEXT", "media_subtype TEXT", + "created DATETIME NOT NULL", "modified DATETIME", + "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), + ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", + "UNIQUE (public_id)")), + + } + index = tuple({'files': (('profile_id', 'owner', 'media_type', 'media_subtype'), + ('profile_id', 'owner', 'parent'))}.items()) + # XXX: Sqlite doc recommends to do the other way around (i.e. create new table, + # copy, drop old table then rename), but the RENAME would then add + # "IF NOT EXISTS" which breaks the (admittely fragile) schema comparison. + # TODO: rework sqlite update management, don't try to automatically detect + # update, the database version is now enough. + statements = ["ALTER TABLE files RENAME TO files_old"] + statements.extend(Updater.createData2Raw(create)) + cols = ','.join([col_stmt.split()[0] for col_stmt in create['files'][0] if "public_id" not in col_stmt]) + old_cols = cols[:] + # we need to split mime_type to the new media_type and media_subtype + old_cols = old_cols.replace( + 'media_type,media_subtype', + "substr(mime_type, 0, instr(mime_type,'/')),substr(mime_type, instr(mime_type,'/')+1)" + ) + statements.extend([ + f"INSERT INTO files({cols}) SELECT {old_cols} FROM files_old", + f"DROP TABLE files_old", + ]) + statements.extend(Updater.indexData2Raw(index)) + return statements + def update_v8(self): """Update database from v7 to v8 (primary keys order changes + indexes)""" log.info("Database update to v8") diff -r a4b8c9bcfb57 -r 780fb8dd07ef sat/plugins/plugin_comp_file_sharing_management.py --- a/sat/plugins/plugin_comp_file_sharing_management.py Fri May 29 21:07:10 2020 +0200 +++ b/sat/plugins/plugin_comp_file_sharing_management.py Fri May 29 21:50:49 2020 +0200 @@ -381,9 +381,9 @@ yield self._genThumbs(client, requestor, namespace, sub_file_data) elif file_data['type'] == C.FILE_TYPE_FILE: - mime_type = file_data['mime_type'] + media_type = file_data['media_type'] file_path = os.path.join(self.files_path, file_data['file_hash']) - if mime_type is not None and mime_type.startswith("image"): + if media_type == 'image': thumbnails = [] for max_thumb_size in (self._t.SIZE_SMALL, self._t.SIZE_MEDIUM):