# HG changeset patch
# User Goffi <goffi@goffi.org>
# Date 1519838919 -3600
# Node ID 898b6e1fdc7a2e08a90f3ccccf8b8149f67c493d
# Parent  af4a38ebf52a7bf1667e650cdbbe578614bf4374
memory (sqlite): files handling:

/!\ new database schema version

- added a table to handle files metadata and hierarchy.
- new methods getFiles and setFile allow respectively to retrieve files corresponding to filters, and to set metadata for one file.
- fixed stmnt_regex to detect correcly new schema (during schema validation/update on init)

diff -r af4a38ebf52a -r 898b6e1fdc7a src/memory/sqlite.py
--- a/src/memory/sqlite.py	Wed Feb 28 18:28:39 2018 +0100
+++ b/src/memory/sqlite.py	Wed Feb 28 18:28:39 2018 +0100
@@ -26,6 +26,7 @@
 from sat.tools.config import fixConfigOption
 from twisted.enterprise import adbapi
 from twisted.internet import defer
+from twisted.words.protocols.jabber import jid
 from twisted.python import failure
 from collections import OrderedDict
 import re
@@ -33,8 +34,9 @@
 import cPickle as pickle
 import hashlib
 import sqlite3
+import json
 
-CURRENT_DB_VERSION = 4
+CURRENT_DB_VERSION = 5
 
 # XXX: DATABASE schemas are used in the following way:
 #      - 'current' key is for the actual database schema, for a new base
@@ -44,7 +46,7 @@
 #      - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints
 #      - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns)
 #      an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers.
-# TODO: this database currently doesn't use indexes, it should
+# TODO: indexes need to be improved
 
 DATABASE_SCHEMAS = {
         "current": {'CREATE': OrderedDict((
@@ -53,12 +55,12 @@
                               ('components',      (("profile_id INTEGER PRIMARY KEY", "entry_point TEXT NOT NULL"),
                                                    ("FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",))),
                               ('message_types',   (("type TEXT PRIMARY KEY",),
-                                  tuple())),
+                                  ())),
                               ('history',         (("uid TEXT PRIMARY KEY", "update_uid TEXT", "profile_id INTEGER", "source TEXT", "dest TEXT", "source_res TEXT", "dest_res TEXT",
                                                     "timestamp DATETIME NOT NULL", "received_timestamp DATETIME", # XXX: timestamp is the time when the message was emitted. If received time stamp is not NULL, the message was delayed and timestamp is the declared value (and received_timestamp the time of reception)
                                                     "type TEXT", "extra BLOB"),
                                                    ("FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", "FOREIGN KEY(type) REFERENCES message_types(type)",
-                                                    "UNIQUE (profile_id, timestamp, source, dest, source_res, dest_res)" # avoid storing 2 time the same message (specially for delayed cones)
+                                                    "UNIQUE (profile_id, timestamp, source, dest, source_res, dest_res)" # avoid storing 2 time the same message (specially for delayed ones)
                                                     ))),
                               ('message',        (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "message TEXT", "language TEXT"),
                                                   ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
@@ -76,7 +78,15 @@
                               ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"),
                                                    ("PRIMARY KEY (namespace, key)",))),
                               ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
-                                                   ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")))
+                                                   ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
+                              ('files',           (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
+                                                    "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
+                                                        file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
+                                                    "hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
+                                                    "namespace TEXT", "mime_type TEXT",
+                                                    "created DATETIME NOT NULL", "modified DATETIME",
+                                                    "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
+                                                   ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
                               )),
                     'INSERT': OrderedDict((
                               ('message_types', (("'chat'",),
@@ -88,6 +98,15 @@
                                                 )),
                               )),
                     },
+        5:         {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
+                                          "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
+                                              file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
+                                          "hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
+                                          "namespace TEXT", "mime_type TEXT",
+                                          "created DATETIME NOT NULL", "modified DATETIME",
+                                          "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
+                                         ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))},
+                   },
         4:         {'create': {'components': (('profile_id INTEGER PRIMARY KEY', 'entry_point TEXT NOT NULL'), ('FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE',))}
                    },
         3:         {'specific': 'update_v3'
@@ -187,7 +206,7 @@
         for statement in statements:
             interaction.execute(statement)
 
-    #Profiles
+    ## Profiles
 
     def _cacheComponentsAndProfiles(self, components_result):
         """Get components results and send requests profiles
@@ -286,7 +305,7 @@
         d.addErrback(deletionError)
         return d
 
-    #Params
+    ## Params
     def loadGenParams(self, params_gen):
         """Load general parameters
 
@@ -332,6 +351,7 @@
 
     def setGenParam(self, category, name, value):
         """Save the general parameters in database
+
         @param category: category of the parameter
         @param name: name of the parameter
         @param value: value to set
@@ -353,7 +373,7 @@
         d.addErrback(lambda ignore: log.error(_(u"Can't set individual parameter (%(category)s/%(name)s) for [%(profile)s] in database" % {"category": category, "name": name, "profile": profile})))
         return d
 
-    #History
+    ## History
 
     def _addToHistoryCb(self, dummy, data):
         # Message metadata were successfuly added to history
@@ -535,7 +555,7 @@
         d.addCallback(self.listDict2listTuple)
         return d
 
-    #Private values
+    ## Private values
 
     def _privateDataEb(self, failure_, operation, namespace, key=None, profile=None):
         """generic errback for data queries"""
@@ -650,6 +670,134 @@
         d.addErrback(self._privateDataEb, u"delete", namespace, key, profile=profile)
         return d
 
+    ## Files
+
+    @defer.inlineCallbacks
+    def getFiles(self, client, file_id=None, version=u'', parent=None, type_=None,
+                 file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None,
+                 owner=None, access=None, projection=None, unique=False):
+        """retrieve files with with given filters
+
+        @param file_id(unicode, None): id of the file
+            None to ignore
+        @param version(unicode, None): version of the file
+            None to ignore
+            empty string to look for current version
+        @param parent(unicode, None): id of the directory containing the files
+            None to ignore
+            empty string to look for root files/directories
+        @param projection(list[unicode], None): name of columns to retrieve
+            None to retrieve all
+        @param unique(bool): if True will remove duplicates
+        other params are the same as for [setFile]
+        @return (list[dict]): files corresponding to filters
+        """
+        query_parts = ["SELECT"]
+        if unique:
+            query_parts.append('DISTINCT')
+        if projection is None:
+            projection = ['id', 'version', 'parent', 'type', 'hash', 'hash_algo', 'name',
+                          'size', 'namespace', 'mime_type', 'created', 'modified', 'owner',
+                          'access', 'extra']
+        query_parts.append(','.join(projection))
+        query_parts.append("FROM files WHERE")
+        filters = ['profile_id=?']
+        args = [self.profiles[client.profile]]
+
+        if file_id is not None:
+            filters.append(u'id=?')
+            args.append(file_id)
+        if version is not None:
+            filters.append(u'version=?')
+            args.append(version)
+        if parent is not None:
+            filters.append(u'parent=?')
+            args.append(parent)
+        if type_ is not None:
+            filters.append(u'type=?')
+            args.append(type_)
+        if file_hash is not None:
+            filters.append(u'hash=?')
+            args.append(file_hash)
+        if hash_algo is not None:
+            filters.append(u'hash_algo=?')
+            args.append(hash_algo)
+        if name is not None:
+            filters.append(u'name=?')
+            args.append(name)
+        if namespace is not None:
+            filters.append(u'namespace=?')
+            args.append(namespace)
+        if mime_type is not None:
+            filters.append(u'mime_type=?')
+            args.append(mime_type)
+        if owner is not None:
+            filters.append(u'owner=?')
+            args.append(owner.full())
+        if access is not None:
+            raise NotImplementedError('Access check is not implemented yet')
+            # a JSON comparison is needed here
+
+        filters = u' AND '.join(filters)
+        query_parts.append(filters)
+        query = u' '.join(query_parts)
+
+        result = yield self.dbpool.runQuery(query, args)
+        files_data = [dict(zip(projection, row)) for row in result]
+        to_parse = {'access', 'extra'}.intersection(projection)
+        to_filter = {'owner'}.intersection(projection)
+        if to_parse or to_filter:
+            for file_data in files_data:
+                for key in to_parse:
+                    value = file_data[key]
+                    file_data[key] = {} if value is None else json.loads(value)
+                owner = file_data.get('owner')
+                if owner is not None:
+                    file_data['owner'] = jid.JID(owner)
+        defer.returnValue(files_data)
+
+    def setFile(self, client, name, file_id, version=u'', parent=None, type_=C.FILE_TYPE_FILE,
+                file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None,
+                created=None, modified=None, owner=None, access=None, extra=None):
+        """set a file metadata
+
+        @param client(SatXMPPClient): client owning the file
+        @param name(unicode): name of the file (must not contain "/")
+        @param file_id(unicode): unique id of the file
+        @param version(unicode): version of this file
+        @param parent(unicode): id of the directory containing this file
+            None if it is a root file/directory
+        @param type_(unicode): one of:
+            - file
+            - directory
+        @param file_hash(unicode): unique hash of the payload
+        @param hash_algo(unicode): algorithm used for hashing the file (usually sha-256)
+        @param size(int): size in bytes
+        @param namespace(unicode, None): identifier (human readable is better) to group files
+            for instance, namespace could be used to group files in a specific photo album
+        @param mime_type(unicode): MIME type of the file, or None if not known/guessed
+        @param created(int): UNIX time of creation
+        @param modified(int,None): UNIX time of last modification, or None to use created date
+        @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component)
+        @param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details
+        @param extra(dict, None): serialisable dictionary of any extra data
+            will be encoded to json in database
+        """
+        if extra is not None:
+         assert isinstance(extra, dict)
+        query = ('INSERT INTO files(id, version, parent, type, hash, hash_algo, name, size, namespace, '
+                 'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)')
+        d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_,
+                                         file_hash, hash_algo,
+                                         name, size, namespace,
+                                         mime_type, created, modified,
+                                         owner.full() if owner is not None else None,
+                                         json.dumps(access) if access else None,
+                                         json.dumps(extra) if extra else None,
+                                         self.profiles[client.profile]))
+        d.addErrback(lambda failure: log.error(_(u"Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure))))
+        return d
+
     ##Helper methods##
 
     def __getFirstResult(self, result):
@@ -659,7 +807,7 @@
 
 
 class Updater(object):
-    stmnt_regex = re.compile(r"(?:[\w ]+(?:\([\w, ]+\))?)+")
+    stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*")
     clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$")
     CREATE_SQL = "CREATE TABLE %s (%s)"
     INSERT_SQL = "INSERT INTO %s VALUES (%s)"
@@ -706,6 +854,7 @@
         """
         local_version = yield self.getLocalVersion()
         raw_local_sch = yield self.getLocalSchema()
+
         local_sch = self.rawStatements2data(raw_local_sch)
         current_sch = DATABASE_SCHEMAS['current']['CREATE']
         local_hash = self.statementHash(local_sch)