# HG changeset patch # User Goffi # Date 1519838919 -3600 # Node ID 898b6e1fdc7a2e08a90f3ccccf8b8149f67c493d # Parent af4a38ebf52a7bf1667e650cdbbe578614bf4374 memory (sqlite): files handling: /!\ new database schema version - added a table to handle files metadata and hierarchy. - new methods getFiles and setFile allow respectively to retrieve files corresponding to filters, and to set metadata for one file. - fixed stmnt_regex to detect correcly new schema (during schema validation/update on init) diff -r af4a38ebf52a -r 898b6e1fdc7a src/memory/sqlite.py --- a/src/memory/sqlite.py Wed Feb 28 18:28:39 2018 +0100 +++ b/src/memory/sqlite.py Wed Feb 28 18:28:39 2018 +0100 @@ -26,6 +26,7 @@ from sat.tools.config import fixConfigOption from twisted.enterprise import adbapi from twisted.internet import defer +from twisted.words.protocols.jabber import jid from twisted.python import failure from collections import OrderedDict import re @@ -33,8 +34,9 @@ import cPickle as pickle import hashlib import sqlite3 +import json -CURRENT_DB_VERSION = 4 +CURRENT_DB_VERSION = 5 # XXX: DATABASE schemas are used in the following way: # - 'current' key is for the actual database schema, for a new base @@ -44,7 +46,7 @@ # - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints # - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns) # an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers. -# TODO: this database currently doesn't use indexes, it should +# TODO: indexes need to be improved DATABASE_SCHEMAS = { "current": {'CREATE': OrderedDict(( @@ -53,12 +55,12 @@ ('components', (("profile_id INTEGER PRIMARY KEY", "entry_point TEXT NOT NULL"), ("FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",))), ('message_types', (("type TEXT PRIMARY KEY",), - tuple())), + ())), ('history', (("uid TEXT PRIMARY KEY", "update_uid TEXT", "profile_id INTEGER", "source TEXT", "dest TEXT", "source_res TEXT", "dest_res TEXT", "timestamp DATETIME NOT NULL", "received_timestamp DATETIME", # XXX: timestamp is the time when the message was emitted. If received time stamp is not NULL, the message was delayed and timestamp is the declared value (and received_timestamp the time of reception) "type TEXT", "extra BLOB"), ("FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", "FOREIGN KEY(type) REFERENCES message_types(type)", - "UNIQUE (profile_id, timestamp, source, dest, source_res, dest_res)" # avoid storing 2 time the same message (specially for delayed cones) + "UNIQUE (profile_id, timestamp, source, dest, source_res, dest_res)" # avoid storing 2 time the same message (specially for delayed ones) ))), ('message', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "message TEXT", "language TEXT"), ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), @@ -76,7 +78,15 @@ ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), ("PRIMARY KEY (namespace, key)",))), ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), - ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))) + ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), + ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", + "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( + file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), + "hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", + "namespace TEXT", "mime_type TEXT", + "created DATETIME NOT NULL", "modified DATETIME", + "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), + ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), )), 'INSERT': OrderedDict(( ('message_types', (("'chat'",), @@ -88,6 +98,15 @@ )), )), }, + 5: {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", + "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( + file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), + "hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", + "namespace TEXT", "mime_type TEXT", + "created DATETIME NOT NULL", "modified DATETIME", + "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), + ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))}, + }, 4: {'create': {'components': (('profile_id INTEGER PRIMARY KEY', 'entry_point TEXT NOT NULL'), ('FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE',))} }, 3: {'specific': 'update_v3' @@ -187,7 +206,7 @@ for statement in statements: interaction.execute(statement) - #Profiles + ## Profiles def _cacheComponentsAndProfiles(self, components_result): """Get components results and send requests profiles @@ -286,7 +305,7 @@ d.addErrback(deletionError) return d - #Params + ## Params def loadGenParams(self, params_gen): """Load general parameters @@ -332,6 +351,7 @@ def setGenParam(self, category, name, value): """Save the general parameters in database + @param category: category of the parameter @param name: name of the parameter @param value: value to set @@ -353,7 +373,7 @@ d.addErrback(lambda ignore: log.error(_(u"Can't set individual parameter (%(category)s/%(name)s) for [%(profile)s] in database" % {"category": category, "name": name, "profile": profile}))) return d - #History + ## History def _addToHistoryCb(self, dummy, data): # Message metadata were successfuly added to history @@ -535,7 +555,7 @@ d.addCallback(self.listDict2listTuple) return d - #Private values + ## Private values def _privateDataEb(self, failure_, operation, namespace, key=None, profile=None): """generic errback for data queries""" @@ -650,6 +670,134 @@ d.addErrback(self._privateDataEb, u"delete", namespace, key, profile=profile) return d + ## Files + + @defer.inlineCallbacks + def getFiles(self, client, file_id=None, version=u'', parent=None, type_=None, + file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None, + owner=None, access=None, projection=None, unique=False): + """retrieve files with with given filters + + @param file_id(unicode, None): id of the file + None to ignore + @param version(unicode, None): version of the file + None to ignore + empty string to look for current version + @param parent(unicode, None): id of the directory containing the files + None to ignore + empty string to look for root files/directories + @param projection(list[unicode], None): name of columns to retrieve + None to retrieve all + @param unique(bool): if True will remove duplicates + other params are the same as for [setFile] + @return (list[dict]): files corresponding to filters + """ + query_parts = ["SELECT"] + if unique: + query_parts.append('DISTINCT') + if projection is None: + projection = ['id', 'version', 'parent', 'type', 'hash', 'hash_algo', 'name', + 'size', 'namespace', 'mime_type', 'created', 'modified', 'owner', + 'access', 'extra'] + query_parts.append(','.join(projection)) + query_parts.append("FROM files WHERE") + filters = ['profile_id=?'] + args = [self.profiles[client.profile]] + + if file_id is not None: + filters.append(u'id=?') + args.append(file_id) + if version is not None: + filters.append(u'version=?') + args.append(version) + if parent is not None: + filters.append(u'parent=?') + args.append(parent) + if type_ is not None: + filters.append(u'type=?') + args.append(type_) + if file_hash is not None: + filters.append(u'hash=?') + args.append(file_hash) + if hash_algo is not None: + filters.append(u'hash_algo=?') + args.append(hash_algo) + if name is not None: + filters.append(u'name=?') + args.append(name) + if namespace is not None: + filters.append(u'namespace=?') + args.append(namespace) + if mime_type is not None: + filters.append(u'mime_type=?') + args.append(mime_type) + if owner is not None: + filters.append(u'owner=?') + args.append(owner.full()) + if access is not None: + raise NotImplementedError('Access check is not implemented yet') + # a JSON comparison is needed here + + filters = u' AND '.join(filters) + query_parts.append(filters) + query = u' '.join(query_parts) + + result = yield self.dbpool.runQuery(query, args) + files_data = [dict(zip(projection, row)) for row in result] + to_parse = {'access', 'extra'}.intersection(projection) + to_filter = {'owner'}.intersection(projection) + if to_parse or to_filter: + for file_data in files_data: + for key in to_parse: + value = file_data[key] + file_data[key] = {} if value is None else json.loads(value) + owner = file_data.get('owner') + if owner is not None: + file_data['owner'] = jid.JID(owner) + defer.returnValue(files_data) + + def setFile(self, client, name, file_id, version=u'', parent=None, type_=C.FILE_TYPE_FILE, + file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None, + created=None, modified=None, owner=None, access=None, extra=None): + """set a file metadata + + @param client(SatXMPPClient): client owning the file + @param name(unicode): name of the file (must not contain "/") + @param file_id(unicode): unique id of the file + @param version(unicode): version of this file + @param parent(unicode): id of the directory containing this file + None if it is a root file/directory + @param type_(unicode): one of: + - file + - directory + @param file_hash(unicode): unique hash of the payload + @param hash_algo(unicode): algorithm used for hashing the file (usually sha-256) + @param size(int): size in bytes + @param namespace(unicode, None): identifier (human readable is better) to group files + for instance, namespace could be used to group files in a specific photo album + @param mime_type(unicode): MIME type of the file, or None if not known/guessed + @param created(int): UNIX time of creation + @param modified(int,None): UNIX time of last modification, or None to use created date + @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component) + @param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details + @param extra(dict, None): serialisable dictionary of any extra data + will be encoded to json in database + """ + if extra is not None: + assert isinstance(extra, dict) + query = ('INSERT INTO files(id, version, parent, type, hash, hash_algo, name, size, namespace, ' + 'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)') + d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_, + file_hash, hash_algo, + name, size, namespace, + mime_type, created, modified, + owner.full() if owner is not None else None, + json.dumps(access) if access else None, + json.dumps(extra) if extra else None, + self.profiles[client.profile])) + d.addErrback(lambda failure: log.error(_(u"Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure)))) + return d + ##Helper methods## def __getFirstResult(self, result): @@ -659,7 +807,7 @@ class Updater(object): - stmnt_regex = re.compile(r"(?:[\w ]+(?:\([\w, ]+\))?)+") + stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*") clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$") CREATE_SQL = "CREATE TABLE %s (%s)" INSERT_SQL = "INSERT INTO %s VALUES (%s)" @@ -706,6 +854,7 @@ """ local_version = yield self.getLocalVersion() raw_local_sch = yield self.getLocalSchema() + local_sch = self.rawStatements2data(raw_local_sch) current_sch = DATABASE_SCHEMAS['current']['CREATE'] local_hash = self.statementHash(local_sch)