Mercurial > libervia-backend
comparison sat/memory/sqlite.py @ 3288:780fb8dd07ef
core (memory/sqlite): new database schema (v9):
this new schema brings the new `public_id` column, which will be used to serve files via
HTTP, splits `mime_type` in 2 (`media_type` and `media_subtype`) to make indexing/media
filtering easier, and fixes indexes for `files` table.
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 29 May 2020 21:50:49 +0200 |
parents | 7255286a298a |
children | 83f25da66bec |
comparison
equal
deleted
inserted
replaced
3287:a4b8c9bcfb57 | 3288:780fb8dd07ef |
---|---|
36 import sqlite3 | 36 import sqlite3 |
37 import json | 37 import json |
38 | 38 |
39 log = getLogger(__name__) | 39 log = getLogger(__name__) |
40 | 40 |
41 CURRENT_DB_VERSION = 8 | 41 CURRENT_DB_VERSION = 9 |
42 | 42 |
43 # XXX: DATABASE schemas are used in the following way: | 43 # XXX: DATABASE schemas are used in the following way: |
44 # - 'current' key is for the actual database schema, for a new base | 44 # - 'current' key is for the actual database schema, for a new base |
45 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update | 45 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update |
46 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed | 46 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed |
80 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), | 80 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), |
81 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), | 81 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), |
82 ("PRIMARY KEY (namespace, key)",))), | 82 ("PRIMARY KEY (namespace, key)",))), |
83 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), | 83 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), |
84 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), | 84 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), |
85 ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", | 85 ('files', (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL", |
86 "parent TEXT NOT NULL", | |
86 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( | 87 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( |
87 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), | 88 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), |
88 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", | 89 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", |
89 "namespace TEXT", "mime_type TEXT", | 90 "namespace TEXT", "media_type TEXT", "media_subtype TEXT", |
90 "created DATETIME NOT NULL", "modified DATETIME", | 91 "created DATETIME NOT NULL", "modified DATETIME", |
91 "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), | 92 "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), |
92 ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), | 93 ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", |
94 "UNIQUE (public_id)"))), | |
93 )), | 95 )), |
94 'INSERT': OrderedDict(( | 96 'INSERT': OrderedDict(( |
95 ('message_types', (("'chat'",), | 97 ('message_types', (("'chat'",), |
96 ("'error'",), | 98 ("'error'",), |
97 ("'groupchat'",), | 99 ("'groupchat'",), |
103 'INDEX': (('history', (('profile_id', 'timestamp'), | 105 'INDEX': (('history', (('profile_id', 'timestamp'), |
104 ('profile_id', 'received_timestamp'))), | 106 ('profile_id', 'received_timestamp'))), |
105 ('message', ('history_uid',)), | 107 ('message', ('history_uid',)), |
106 ('subject', ('history_uid',)), | 108 ('subject', ('history_uid',)), |
107 ('thread', ('history_uid',)), | 109 ('thread', ('history_uid',)), |
108 ('files', ('profile_id', 'mime_type', 'owner', 'parent'))), | 110 ('files', (('profile_id', 'owner', 'media_type', 'media_subtype'), |
111 ('profile_id', 'owner', 'parent'))), | |
112 ) | |
109 }, | 113 }, |
114 9: {'specific': 'update_v9' | |
115 }, | |
110 8: {'specific': 'update_v8' | 116 8: {'specific': 'update_v8' |
111 }, | 117 }, |
112 7: {'specific': 'update_v7' | 118 7: {'specific': 'update_v7' |
113 }, | 119 }, |
114 6: {'cols create': {'history': ('stanza_id TEXT',)}, | 120 6: {'cols create': {'history': ('stanza_id TEXT',)}, |
832 ## Files | 838 ## Files |
833 | 839 |
834 @defer.inlineCallbacks | 840 @defer.inlineCallbacks |
835 def getFiles(self, client, file_id=None, version='', parent=None, type_=None, | 841 def getFiles(self, client, file_id=None, version='', parent=None, type_=None, |
836 file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None, | 842 file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None, |
837 owner=None, access=None, projection=None, unique=False): | 843 public_id=None, owner=None, access=None, projection=None, unique=False): |
838 """retrieve files with with given filters | 844 """retrieve files with with given filters |
839 | 845 |
840 @param file_id(unicode, None): id of the file | 846 @param file_id(unicode, None): id of the file |
841 None to ignore | 847 None to ignore |
842 @param version(unicode, None): version of the file | 848 @param version(unicode, None): version of the file |
854 query_parts = ["SELECT"] | 860 query_parts = ["SELECT"] |
855 if unique: | 861 if unique: |
856 query_parts.append('DISTINCT') | 862 query_parts.append('DISTINCT') |
857 if projection is None: | 863 if projection is None: |
858 projection = ['id', 'version', 'parent', 'type', 'file_hash', 'hash_algo', 'name', | 864 projection = ['id', 'version', 'parent', 'type', 'file_hash', 'hash_algo', 'name', |
859 'size', 'namespace', 'mime_type', 'created', 'modified', 'owner', | 865 'size', 'namespace', 'media_type', 'media_subtype', 'public_id', 'created', 'modified', 'owner', |
860 'access', 'extra'] | 866 'access', 'extra'] |
861 query_parts.append(','.join(projection)) | 867 query_parts.append(','.join(projection)) |
862 query_parts.append("FROM files WHERE") | 868 query_parts.append("FROM files WHERE") |
863 filters = ['profile_id=?'] | 869 if client is not None: |
864 args = [self.profiles[client.profile]] | 870 filters = ['profile_id=?'] |
871 args = [self.profiles[client.profile]] | |
872 else: | |
873 if public_id is None: | |
874 raise exceptions.InternalError( | |
875 "client can only be omitted when public_id is set") | |
876 filters = [] | |
877 args = [] | |
865 | 878 |
866 if file_id is not None: | 879 if file_id is not None: |
867 filters.append('id=?') | 880 filters.append('id=?') |
868 args.append(file_id) | 881 args.append(file_id) |
869 if version is not None: | 882 if version is not None: |
886 args.append(name) | 899 args.append(name) |
887 if namespace is not None: | 900 if namespace is not None: |
888 filters.append('namespace=?') | 901 filters.append('namespace=?') |
889 args.append(namespace) | 902 args.append(namespace) |
890 if mime_type is not None: | 903 if mime_type is not None: |
891 filters.append('mime_type=?') | 904 if '/' in mime_type: |
892 args.append(mime_type) | 905 filters.extend('media_type=?', 'media_subtype=?') |
906 args.extend(mime_type.split('/', 1)) | |
907 else: | |
908 filters.append('media_type=?') | |
909 args.append(mime_type) | |
910 if public_id is not None: | |
911 filters.append('public_id=?') | |
912 args.append(public_id) | |
893 if owner is not None: | 913 if owner is not None: |
894 filters.append('owner=?') | 914 filters.append('owner=?') |
895 args.append(owner.full()) | 915 args.append(owner.full()) |
896 if access is not None: | 916 if access is not None: |
897 raise NotImplementedError('Access check is not implemented yet') | 917 raise NotImplementedError('Access check is not implemented yet') |
915 file_data['owner'] = jid.JID(owner) | 935 file_data['owner'] = jid.JID(owner) |
916 defer.returnValue(files_data) | 936 defer.returnValue(files_data) |
917 | 937 |
918 def setFile(self, client, name, file_id, version='', parent=None, type_=C.FILE_TYPE_FILE, | 938 def setFile(self, client, name, file_id, version='', parent=None, type_=C.FILE_TYPE_FILE, |
919 file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None, | 939 file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None, |
920 created=None, modified=None, owner=None, access=None, extra=None): | 940 public_id=None, created=None, modified=None, owner=None, access=None, extra=None): |
921 """set a file metadata | 941 """set a file metadata |
922 | 942 |
923 @param client(SatXMPPClient): client owning the file | 943 @param client(SatXMPPClient): client owning the file |
924 @param name(unicode): name of the file (must not contain "/") | 944 @param name(str): name of the file (must not contain "/") |
925 @param file_id(unicode): unique id of the file | 945 @param file_id(str): unique id of the file |
926 @param version(unicode): version of this file | 946 @param version(str): version of this file |
927 @param parent(unicode): id of the directory containing this file | 947 @param parent(str): id of the directory containing this file |
928 None if it is a root file/directory | 948 None if it is a root file/directory |
929 @param type_(unicode): one of: | 949 @param type_(str): one of: |
930 - file | 950 - file |
931 - directory | 951 - directory |
932 @param file_hash(unicode): unique hash of the payload | 952 @param file_hash(str): unique hash of the payload |
933 @param hash_algo(unicode): algorithm used for hashing the file (usually sha-256) | 953 @param hash_algo(str): algorithm used for hashing the file (usually sha-256) |
934 @param size(int): size in bytes | 954 @param size(int): size in bytes |
935 @param namespace(unicode, None): identifier (human readable is better) to group files | 955 @param namespace(str, None): identifier (human readable is better) to group files |
936 for instance, namespace could be used to group files in a specific photo album | 956 for instance, namespace could be used to group files in a specific photo album |
937 @param mime_type(unicode): MIME type of the file, or None if not known/guessed | 957 @param mime_type(str): media type of the file, or None if not known/guessed |
958 @param public_id(str): ID used to server the file publicly via HTTP | |
938 @param created(int): UNIX time of creation | 959 @param created(int): UNIX time of creation |
939 @param modified(int,None): UNIX time of last modification, or None to use created date | 960 @param modified(int,None): UNIX time of last modification, or None to use created date |
940 @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component) | 961 @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component) |
941 @param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details | 962 @param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details |
942 @param extra(dict, None): serialisable dictionary of any extra data | 963 @param extra(dict, None): serialisable dictionary of any extra data |
943 will be encoded to json in database | 964 will be encoded to json in database |
944 """ | 965 """ |
945 if extra is not None: | 966 if extra is not None: |
946 assert isinstance(extra, dict) | 967 assert isinstance(extra, dict) |
968 | |
969 if mime_type is None: | |
970 media_type = media_subtype = None | |
971 elif '/' in mime_type: | |
972 media_type, media_subtype = mime_type.split('/', 1) | |
973 else: | |
974 media_type, media_subtype = mime_type, None | |
975 | |
947 query = ('INSERT INTO files(id, version, parent, type, file_hash, hash_algo, name, size, namespace, ' | 976 query = ('INSERT INTO files(id, version, parent, type, file_hash, hash_algo, name, size, namespace, ' |
948 'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)') | 977 'media_type, media_subtype, public_id, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)') |
949 d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_, | 978 d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_, |
950 file_hash, hash_algo, | 979 file_hash, hash_algo, |
951 name, size, namespace, | 980 name, size, namespace, |
952 mime_type, created, modified, | 981 media_type, media_subtype, public_id, created, modified, |
953 owner.full() if owner is not None else None, | 982 owner.full() if owner is not None else None, |
954 json.dumps(access) if access else None, | 983 json.dumps(access) if access else None, |
955 json.dumps(extra) if extra else None, | 984 json.dumps(extra) if extra else None, |
956 self.profiles[client.profile])) | 985 self.profiles[client.profile])) |
957 d.addErrback(lambda failure: log.error(_("Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure)))) | 986 d.addErrback(lambda failure: log.error(_("Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure)))) |
1355 if specific: | 1384 if specific: |
1356 cmds = yield getattr(self, specific)() | 1385 cmds = yield getattr(self, specific)() |
1357 ret.extend(cmds or []) | 1386 ret.extend(cmds or []) |
1358 defer.returnValue(ret) | 1387 defer.returnValue(ret) |
1359 | 1388 |
1389 def update_v9(self): | |
1390 """Update database from v8 to v9 | |
1391 | |
1392 (public_id on file with UNIQUE constraint, files indexes fix, media_type split) | |
1393 """ | |
1394 # we have to do a specific update because we can't set UNIQUE constraint when adding a column | |
1395 # (see https://sqlite.org/lang_altertable.html#alter_table_add_column) | |
1396 log.info("Database update to v9") | |
1397 | |
1398 create = { | |
1399 'files': (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL", | |
1400 "parent TEXT NOT NULL", | |
1401 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( | |
1402 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), | |
1403 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", | |
1404 "namespace TEXT", "media_type TEXT", "media_subtype TEXT", | |
1405 "created DATETIME NOT NULL", "modified DATETIME", | |
1406 "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), | |
1407 ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", | |
1408 "UNIQUE (public_id)")), | |
1409 | |
1410 } | |
1411 index = tuple({'files': (('profile_id', 'owner', 'media_type', 'media_subtype'), | |
1412 ('profile_id', 'owner', 'parent'))}.items()) | |
1413 # XXX: Sqlite doc recommends to do the other way around (i.e. create new table, | |
1414 # copy, drop old table then rename), but the RENAME would then add | |
1415 # "IF NOT EXISTS" which breaks the (admittely fragile) schema comparison. | |
1416 # TODO: rework sqlite update management, don't try to automatically detect | |
1417 # update, the database version is now enough. | |
1418 statements = ["ALTER TABLE files RENAME TO files_old"] | |
1419 statements.extend(Updater.createData2Raw(create)) | |
1420 cols = ','.join([col_stmt.split()[0] for col_stmt in create['files'][0] if "public_id" not in col_stmt]) | |
1421 old_cols = cols[:] | |
1422 # we need to split mime_type to the new media_type and media_subtype | |
1423 old_cols = old_cols.replace( | |
1424 'media_type,media_subtype', | |
1425 "substr(mime_type, 0, instr(mime_type,'/')),substr(mime_type, instr(mime_type,'/')+1)" | |
1426 ) | |
1427 statements.extend([ | |
1428 f"INSERT INTO files({cols}) SELECT {old_cols} FROM files_old", | |
1429 f"DROP TABLE files_old", | |
1430 ]) | |
1431 statements.extend(Updater.indexData2Raw(index)) | |
1432 return statements | |
1433 | |
1360 def update_v8(self): | 1434 def update_v8(self): |
1361 """Update database from v7 to v8 (primary keys order changes + indexes)""" | 1435 """Update database from v7 to v8 (primary keys order changes + indexes)""" |
1362 log.info("Database update to v8") | 1436 log.info("Database update to v8") |
1363 statements = ["PRAGMA foreign_keys = OFF"] | 1437 statements = ["PRAGMA foreign_keys = OFF"] |
1364 | 1438 |