Edit on GitHub

common.config_manager

  1import itertools
  2import threading
  3import pickle
  4import time
  5import json
  6
  7from pymemcache.client.base import Client as MemcacheClient
  8from pymemcache.exceptions import MemcacheError
  9from pymemcache import serde
 10from pathlib import Path
 11from common.lib.database import Database
 12
 13from common.lib.exceptions import ConfigException
 14from common.lib.config_definition import config_definition
 15
 16import configparser
 17import os
 18
 19class CacheMiss:
 20    """
 21    Helper class to distinguish memcache misses from true `None` values
 22    """
 23    pass
 24
 25class BaseConfigReader:
 26    """
 27    Helper class to unify various types of configuration readers
 28    """
 29    pass
 30
 31class ConfigManager(BaseConfigReader):
 32    db = None
 33    dbconn = None
 34    cache = {}
 35    logger = None
 36
 37    core_settings = {}
 38    config_definition = {}
 39    # Thread-local storage for a singleton memcache client per thread.
 40    # Prevents creating a new TCP connection per request in threaded/gunicorn contexts.
 41    _memcache_tls = threading.local()
 42
 43    def __init__(self, db=None):
 44        # ensure core settings (including database config) are loaded
 45        self.load_core_settings()
 46        self.load_user_settings()
 47        # Do not create a memcache client here; get_memcache() will lazily create per-thread.
 48
 49        # establish database connection if none available
 50        if db:
 51            self.with_db(db)
 52
 53    def with_db(self, db=None):
 54        """
 55        Initialise database
 56
 57        Not done on init, because something may need core settings before the
 58        database can be initialised
 59
 60        :param db:  Database object. If None, initialise it using the core config
 61        """
 62        if db or not self.db:
 63            if db and db.log and not self.logger:
 64                # borrow logger from database
 65                self.with_logger(db.log)
 66
 67            # Replace w/ db if provided else only initialise if not already
 68            self.db = db if db else Database(logger=self.logger, dbname=self.get("DB_NAME"), user=self.get("DB_USER"),
 69                                         password=self.get("DB_PASSWORD"), host=self.get("DB_HOST"),
 70                                         port=self.get("DB_PORT"), appname="config-reader")
 71        else:
 72            # self.db already initialized and no db provided
 73            pass
 74
 75    def with_logger(self, logger):
 76        """
 77        Attach logger to config manager
 78
 79        4CAT's logger has some features on top of the basic Python logger that
 80        are needed for further operation, e.g. the Debug2 log level.
 81
 82        :param Logger logger:
 83        """
 84        self.logger = logger
 85
 86    def load_user_settings(self):
 87        """
 88        Load settings configurable by the user
 89
 90        Does not load the settings themselves, but rather the definition so
 91        values can be validated, etc
 92        """
 93        # basic 4CAT settings
 94        self.config_definition.update(config_definition)
 95
 96        # module settings can't be loaded directly because modules need the
 97        # config manager to load, so that becomes circular
 98        # instead, this is cached on startup and then loaded here
 99        module_config_path = self.get("PATH_CONFIG").joinpath("module_config.bin")
100        if module_config_path.exists():
101            try:
102                with module_config_path.open("rb") as infile:
103                    retries = 0
104                    module_config = None
105                    # if 4CAT is being run in two different containers
106                    # (front-end and back-end) they might both be running this
107                    # bit of code at the same time. If the file is half-written
108                    # loading it will fail, so allow for a few retries
109                    while retries < 3:
110                        try:
111                            module_config = pickle.load(infile)
112                            break
113                        except Exception:  # this can be a number of exceptions, all with the same recovery path
114                            time.sleep(0.1)
115                            retries += 1
116                            continue
117
118                    if module_config is None:
119                        # not really a way to gracefully recover from this, but
120                        # we can at least describe the error
121                        raise RuntimeError("Could not read module_config.bin. The 4CAT developers did a bad job of "
122                                           "preventing this. Shame on them!")
123
124                    self.config_definition.update(module_config)
125            except (ValueError, TypeError):
126                pass
127
128    def load_core_settings(self):
129        """
130        Load 4CAT core settings
131
132        These are (mostly) stored in config.ini and cannot be changed from the
133        web interface.
134
135        :return:
136        """
137        config_file = Path(__file__).parent.parent.joinpath("config/config.ini")
138        config_reader = configparser.ConfigParser()
139        in_docker = False
140        if config_file.exists():
141            config_reader.read(config_file)
142            if config_reader["DOCKER"].getboolean("use_docker_config"):
143                # Can use throughtout 4CAT to know if Docker environment
144                in_docker = True
145        else:
146            # config should be created!
147            raise ConfigException("No config/config.ini file exists! Update and rename the config.ini-example file.")
148        
149        # Set up core settings
150        # Using Path.joinpath() will ensure paths are relative to ROOT_PATH or absolute (if /some/path is provided)
151        root_path = Path(os.path.abspath(os.path.dirname(__file__))).joinpath("..").resolve() # better don"t change this
152
153        self.core_settings.update({
154            "CONFIG_FILE": config_file.resolve(),
155            "USING_DOCKER": in_docker,
156            "DB_HOST": config_reader["DATABASE"].get("db_host"),
157            "DB_PORT": config_reader["DATABASE"].get("db_port"),
158            "DB_USER": config_reader["DATABASE"].get("db_user"),
159            "DB_NAME": config_reader["DATABASE"].get("db_name"),
160            "DB_PASSWORD": config_reader["DATABASE"].get("db_password"),
161
162            "API_HOST": config_reader["API"].get("api_host"),
163            "API_PORT": config_reader["API"].getint("api_port"),
164
165            "MEMCACHE_SERVER": config_reader.get("MEMCACHE", option="memcache_host", fallback=None),
166
167            "PATH_ROOT": root_path,
168            "PATH_CONFIG": root_path.joinpath("config"), # .current-version, config.ini are hardcoded here via docker/docker_setup.py and helper-scripts/migrate.py
169            "PATH_EXTENSIONS": root_path.joinpath("config/extensions"), # Must match setup.py and migrate.py
170            "PATH_LOGS": root_path.joinpath(config_reader["PATHS"].get("path_logs", "")),
171            "PATH_IMAGES": root_path.joinpath(config_reader["PATHS"].get("path_images", "")),
172            "PATH_DATA": root_path.joinpath(config_reader["PATHS"].get("path_data", "")),
173            "PATH_LOCKFILE": root_path.joinpath(config_reader["PATHS"].get("path_lockfile", "")),
174            "PATH_SESSIONS": root_path.joinpath(config_reader["PATHS"].get("path_sessions", "")),
175
176            "ANONYMISATION_SALT": config_reader["GENERATE"].get("anonymisation_salt"),
177            "SECRET_KEY": config_reader["GENERATE"].get("secret_key")
178        })
179
180
181    def get_memcache(self):
182        """
183        Get (or create) a thread-local memcache client
184
185        The config reader can optionally use Memcache to keep fetched values in
186        memory.
187        """
188        # Reuse per-thread client if already initialised.
189        existing = getattr(self._memcache_tls, "client", None)
190        if existing:
191            return existing
192
193        server = self.get("MEMCACHE_SERVER")
194        if server:
195            try:
196                memcache = MemcacheClient(server, serde=serde.pickle_serde, key_prefix=b"4cat-config")
197                # do one test fetch to test if connection is valid
198                memcache.set("4cat-init-dummy", time.time())
199                memcache.init_thread_id = threading.get_ident()
200                self._memcache_tls.client = memcache
201                return memcache
202            except (SystemError, ValueError, MemcacheError, ConnectionError, OSError):
203                # we have no access to the logger here so we simply pass
204                # later we can detect elsewhere that a memcache address is
205                # configured but no connection is there - then we can log
206                # config reader still works without memcache
207                pass
208
209        return None
210
211    def close_memcache(self):
212        """Close and dispose this thread's memcache client.
213
214        Call from gunicorn worker_exit or application teardown to ensure
215        sockets are closed explicitly instead of relying on GC/process exit.
216        """
217        client = getattr(self._memcache_tls, "client", None)
218        if client:
219            try:
220                client.close()
221            except Exception:
222                pass
223            finally:
224                try:
225                    del self._memcache_tls.client
226                except AttributeError:
227                    pass
228        
229
230    def ensure_database(self):
231        """
232        Ensure the database is in sync with the config definition
233
234        Deletes all stored settings not defined in 4CAT, and creates a global
235        setting for all settings not yet in the database.
236        """
237        self.with_db()
238
239        # create global values for known keys with the default
240        known_settings = self.get_all_setting_names()
241        for setting, parameters in self.config_definition.items():
242            if setting in known_settings:
243                continue
244
245            self.db.log.debug(f"Creating setting: {setting} with default value {parameters.get('default', '')}")
246            self.set(setting, parameters.get("default", ""))
247
248        # make sure settings and user table are in sync
249        user_tags = list(set(itertools.chain(*[u["tags"] for u in self.db.fetchall("SELECT DISTINCT tags FROM users")])))
250        known_tags = [t["tag"] for t in self.db.fetchall("SELECT DISTINCT tag FROM settings")]
251        tag_order = self.get("flask.tag_order")
252
253        for tag in known_tags:
254            # add tags used by a setting to tag order
255            if tag and tag not in tag_order:
256                tag_order.append(tag)
257
258        for tag in user_tags:
259            # add tags used by a user to tag order
260            if tag and tag not in tag_order:
261                tag_order.append(tag)
262
263        # admin tag should always be first in order
264        if "admin" in tag_order:
265            tag_order.remove("admin")
266
267        tag_order.insert(0, "admin")
268
269        self.set("flask.tag_order", tag_order)
270        self.db.commit()
271
272    def get_all_setting_names(self, with_core=True):
273        """
274        Get names of all settings
275
276        For when the value doesn't matter!
277
278        :param bool with_core:  Also include core (i.e. config.ini) settings
279        :return list:  List of setting names known by the database and core settings
280        """
281        # attempt to initialise the database connection so we can include
282        # user settings
283        if not self.db:
284            self.with_db()
285
286        settings = list(self.core_settings.keys()) if with_core else []
287        settings.extend([s["name"] for s in self.db.fetchall("SELECT DISTINCT name FROM settings")])
288
289        return settings
290
291    def get_all(self, is_json=False, user=None, tags=None, with_core=True, memcache=None):
292        """
293        Get all known settings
294
295        This is *not optimised* but used rarely enough that that doesn't
296        matter so much.
297
298        :param bool is_json:  if True, the value is returned as stored and not
299        interpreted as JSON if it comes from the database
300        :param user:  User object or name. Adds a tag `user:[username]` in
301        front of the tag list.
302        :param tags:  Tag or tags for the required setting. If a tag is
303        provided, the method checks if a special value for the setting exists
304        with the given tag, and returns that if one exists. First matching tag
305        wins.
306        :param bool with_core:  Also include core (i.e. config.ini) settings
307        :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
308
309        :return dict: Setting value, as a dictionary with setting names as keys
310        and setting values as values.
311        """
312        for setting in self.get_all_setting_names(with_core=with_core):
313            yield setting, self.get(setting, None, is_json, user, tags, memcache)
314
315
316    def get(self, attribute_name, default=None, is_json=False, user=None, tags=None, memcache=None):
317        """
318        Get a setting's value from the database
319
320        If the setting does not exist, the provided fallback value is returned.
321
322        :param str attribute_name:  Setting to return.
323        :param default:  Value to return if setting does not exist
324        :param bool is_json:  if True, the value is returned as stored and not
325        interpreted as JSON if it comes from the database
326        :param user:  User object or name. Adds a tag `user:[username]` in
327        front of the tag list.
328        :param tags:  Tag or tags for the required setting. If a tag is
329        provided, the method checks if a special value for the setting exists
330        with the given tag, and returns that if one exists. First matching tag
331        wins.
332    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
333
334        :return:  Setting value, or the provided fallback, or `None`.
335        """
336        # core settings are not from the database
337        # they are therefore also not memcached - too little gain
338        if type(attribute_name) is not str:
339            raise TypeError(f"attribute_name must be a str, {attribute_name.__class__.__name__} given")
340
341        if attribute_name in self.core_settings:
342            # we never get to the database or memcache part of this method if
343            # this is a core setting we already know
344            return self.core_settings[attribute_name]
345
346        # if trying to access a setting that's not a core setting, attempt to
347        # initialise the database connection
348        if not self.db:
349            self.with_db()
350
351        # get tags to look for
352        # copy() because else we keep adding onto the same list, which
353        # interacts badly with get_all()
354        if tags:
355            tags = tags.copy()
356        tags = self.get_active_tags(user, tags, memcache)
357
358        # now we have all tags - get the config values for each (if available)
359        # and then return the first matching one. Add the 'empty' tag at the
360        # end to fall back to the global value if no specific one exists.
361        tags.append("")
362
363        # Obtain thread-local memcache client if not explicitly given.
364        if not memcache:
365            memcache = self.get_memcache()
366
367        # first check if we have all the values in memcache, in which case we
368        # do not need a database query
369        if memcache:
370            if threading.get_ident() != memcache.init_thread_id:
371                raise RuntimeError("Thread-unsafe use of memcache! Please make sure you are using a configuration "
372                                   "wrapper to read with a thread-local memcache connection.")
373
374            cached_values = {tag: memcache.get(self._get_memcache_id(attribute_name, tag), default=CacheMiss) for tag in tags}
375
376        else:
377            cached_values = {t: CacheMiss for t in tags}
378
379        # for the tags we could not get from memcache, run a database query
380        # (and save to cache if possible)
381        missing_tags = [t for t in cached_values if cached_values[t] is CacheMiss]
382        if missing_tags:
383            # query database for any values within the required tags
384            query = "SELECT * FROM settings WHERE name = %s AND tag IN %s"
385            replacements = (attribute_name, tuple(missing_tags))
386            queried_settings = {setting["tag"]: setting["value"] for setting in self.db.fetchall(query, replacements)}
387
388            if memcache:
389                for tag, value in queried_settings.items():
390                    memcache.set(self._get_memcache_id(attribute_name, tag), value)
391
392            cached_values.update(queried_settings)
393
394        # there may be some tags for which we still do not have a value at
395        # this point. these simply do not have a tag-specific value but that in
396        # itself is worth caching, otherwise we're going to query for a
397        # non-existent value each time.
398        # so: cache a magic value for such setting/tag combinations, and
399        # replace the magic value with a CacheMiss in the dict that will be
400        # parsed
401        unconfigured_magic = "__unconfigured__"
402        if memcache:
403            for tag in [t for t in cached_values if cached_values[t] is CacheMiss]:
404                # should this be more magic?
405                memcache.set(self._get_memcache_id(attribute_name, tag), unconfigured_magic)
406
407            for tag in [t for t in cached_values if cached_values[t] == unconfigured_magic]:
408                cached_values[tag] = CacheMiss
409
410        # now we may still have some CacheMisses in the values dict, if there
411        # was no setting in the database with that tag. So, find the first
412        # value that is not a CacheMiss. If nothing matches, try the global tag
413        # and if even that does not match (no setting saved at all) return the
414        # default
415        for tag in tags:
416            if tag in cached_values and cached_values.get(tag) is not CacheMiss:
417                value = cached_values[tag]
418                break
419        else:
420            value = None
421
422        # parse some values...
423        if not is_json and value is not None:
424            value = json.loads(value)
425        # TODO: Which default should have priority? The provided default feels like it should be the highest priority, but I think that is an old implementation and perhaps should be removed. - Dale
426        elif value is None and attribute_name in self.config_definition and "default" in self.config_definition[attribute_name]:
427            value = self.config_definition[attribute_name]["default"]
428        elif value is None and default is not None:
429            value = default
430
431        return value
432
433    def get_active_tags(self, user=None, tags=None, memcache=None):
434        """
435        Get active tags for given user/tag list
436
437        Used internally to harmonize tag setting for various methods, but can
438        also be called directly to verify tag activation.
439
440        :param user:  User object or name. Adds a tag `user:[username]` in
441        front of the tag list.
442        :param tags:  Tag or tags for the required setting. If a tag is
443        provided, the method checks if a special value for the setting exists
444        with the given tag, and returns that if one exists. First matching tag
445        wins.
446    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
447        :return list:  List of tags
448        """
449        # be flexible about the input types here
450        if tags is None:
451            tags = []
452        elif type(tags) is str:
453            tags = [tags]
454
455        user = self._normalise_user(user)
456
457        # user-specific settings are just a special type of tag (which takes
458        # precedence), same goes for user groups. so if a user was passed, get
459        # that user's tags (including the 'special' user: tag) and add them
460        # to the list
461        if user:
462            user_tags = CacheMiss
463            
464            if not memcache:
465                memcache = self.get_memcache()
466                
467            if memcache:
468                memcache_id = f"_usertags-{user}"
469                user_tags = memcache.get(memcache_id, default=CacheMiss)
470
471            if user_tags is CacheMiss:
472                user_tags = self.db.fetchone("SELECT tags FROM users WHERE name = %s", (user,))
473                if user_tags and memcache:
474                    memcache.set(memcache_id, user_tags)
475
476            if user_tags:
477                try:
478                    tags.extend(user_tags["tags"])
479                except (TypeError, ValueError):
480                    # should be a JSON list, but isn't
481                    pass
482
483            tags.insert(0, f"user:{user}")
484
485        return tags
486
487    def set(self, attribute_name, value, is_json=False, tag="", overwrite_existing=True, memcache=None):
488        """
489        Insert OR set value for a setting
490
491        If overwrite_existing=True and the setting exists, the setting is updated; if overwrite_existing=False and the
492        setting exists the setting is not updated.
493
494        :param str attribute_name:  Attribute to set
495        :param value:  Value to set (will be serialised as JSON)
496        :param bool is_json:  True for a value that is already a serialised JSON string; False if value is object that needs to
497                          be serialised into a JSON string
498        :param bool overwrite_existing: True will overwrite existing setting, False will do nothing if setting exists
499        :param str tag:  Tag to write setting for
500    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
501
502        :return int: number of updated rows
503        """
504        # Check value is valid JSON
505        if is_json:
506            try:
507                json.dumps(json.loads(value))
508            except json.JSONDecodeError:
509                return None
510        else:
511            try:
512                value = json.dumps(value)
513            except json.JSONDecodeError:
514                return None
515
516        if attribute_name in self.config_definition and self.config_definition.get(attribute_name).get("global"):
517            tag = ""
518
519        if overwrite_existing:
520            query = "INSERT INTO settings (name, value, tag) VALUES (%s, %s, %s) ON CONFLICT (name, tag) DO UPDATE SET value = EXCLUDED.value"
521        else:
522            query = "INSERT INTO settings (name, value, tag) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING"
523
524        self.db.execute(query, (attribute_name, value, tag))
525        updated_rows = self.db.cursor.rowcount
526        self.db.log.debug(f"Updated setting for {attribute_name}: {value} (tag: {tag})")
527
528        if not memcache:
529            memcache = self.get_memcache()
530
531        if memcache:
532            # invalidate any cached value for this setting
533            memcache_id = self._get_memcache_id(attribute_name, tag)
534            memcache.delete(memcache_id)
535
536        return updated_rows
537
538    def delete_for_tag(self, attribute_name, tag):
539        """
540        Delete config override for a given tag
541
542        :param str attribute_name:
543        :param str tag:
544        :return int: number of deleted rows
545        """
546        self.db.delete("settings", where={"name": attribute_name, "tag": tag})
547        updated_rows = self.db.cursor.rowcount
548        client = self.get_memcache()
549        if client:
550            client.delete(self._get_memcache_id(attribute_name, tag))
551        return updated_rows
552
553    def clear_cache(self):
554        """
555        Clear cached configuration values
556
557        Called when the backend restarts - helps start with a blank slate.
558        """
559        client = self.get_memcache()
560        if not client:
561            return
562        client.flush_all()
563
564    def uncache_user_tags(self, users):
565        """
566        Clear cached user tags
567
568        User tags are cached with memcache if possible to avoid unnecessary
569        database roundtrips. This method clears the cached user tags, in case
570        a tag is added/deleted from a user.
571
572        :param list users:  List of users, as usernames or User objects
573        """
574        client = self.get_memcache()
575        if client:
576            for user in users:
577                user = self._normalise_user(user)
578                client.delete(f"_usertags-{user}")
579
580    def _normalise_user(self, user):
581        """
582        Normalise user object
583
584        Users may be passed as a username, a user object, or a proxy of such an
585        object. This method normalises this to a string (the username), or
586        `None` if no user is provided.
587
588        :param user:  User value to normalise
589        :return str|None:  Normalised value
590        """
591
592        # can provide either a string or user object
593        if type(user) is not str:
594            if type(user).__name__ == "LocalProxy":
595                # passed on from Flask
596                user = user._get_current_object()
597
598            if hasattr(user, "get_id"):
599                user = user.get_id()
600            elif user != None:  # noqa: E711
601                # werkzeug.local.LocalProxy (e.g., user not yet logged in) wraps None; use '!=' instead of 'is not'
602                raise TypeError(
603                    f"_normalise_user() expects None, a User object or a string for argument 'user', {type(user).__name__} given"
604                )
605
606        return user
607
608    def _get_memcache_id(self, attribute_name, tags=None):
609        """
610        Generate a memcache key for a config setting request
611
612        This includes the relevant user name/tags because the value may be
613        different depending on the value of these parameters.
614
615        :param str attribute_name:
616        :param str|list tags:
617        :return str:
618        """
619        if tags and isinstance(tags, str):
620            tags = [tags]
621
622        tag_bit = []
623        if tags:
624            tag_bit.append("|".join(tags))
625
626        memcache_id = attribute_name
627        if tag_bit:
628            memcache_id += f"-{'-'.join(tag_bit)}"
629
630        return memcache_id.encode("ascii")
631
632    def __getattr__(self, attr):
633        """
634        Getter so we can directly request values
635
636        :param attr:  Config setting to get
637        :return:  Value
638        """
639
640        if attr in dir(self):
641            # an explicitly defined attribute should always be called in favour
642            # of this passthrough
643            attribute = getattr(self, attr)
644            return attribute
645        else:
646            return self.get(attr)
647
648
649class ConfigWrapper(BaseConfigReader):
650    """
651    Wrapper for the config manager
652
653    Allows setting a default set of tags or user, so that all subsequent calls
654    to `get()` are done for those tags or that user. Can also adjust tags based
655    on the HTTP request, if used in a Flask context.
656    """
657    def __init__(self, config, user=None, tags=None, request=None):
658        """
659        Initialise config wrapper
660
661        :param ConfigManager config:  Initialised config manager
662        :param user:  User to get settings for
663        :param tags:  Tags to get settings for
664        :param request:  Request to get headers from. This can be used to set
665        a particular tag based on the HTTP headers of the request, e.g. to
666        serve 4CAT with a different configuration based on the proxy server
667        used.
668        """
669        if type(config) is ConfigWrapper:
670            # let's not do nested wrappers, but copy properties unless
671            # provided explicitly
672            self.user = user if user else config.user
673            self.tags = tags if tags else config.tags
674            self.request = request if request else config.request
675            self.config = config.config
676            # legacy: previous versions cached a per-request memcache client; now resolved inside ConfigManager
677        else:
678            self.config = config
679            self.user = user
680            self.tags = tags
681            self.request = request
682
683        # this ensures the user object in turn reads from the wrapper
684        if self.user:
685            self.user.with_config(self, rewrap=False)
686
687
688    def set(self, *args, **kwargs):
689        """
690        Wrap `set()`
691
692        :param args:
693        :param kwargs:
694        :return:
695        """
696        if "tag" not in kwargs and self.tags:
697            kwargs["tag"] = self.tags
698
699        # ConfigManager resolves thread-local memcache internally
700
701        return self.config.set(*args, **kwargs)
702
703    def get_all(self, *args, **kwargs):
704        """
705        Wrap `get_all()`
706
707        Takes the `user`, `tags` and `request` given when initialised into
708        account. If `tags` is set explicitly, the HTTP header-based override
709        is not applied.
710
711        :param args:
712        :param kwargs:
713        :return:
714        """
715        if "user" not in kwargs and self.user:
716            kwargs["user"] = self.user
717
718        if "tags" not in kwargs:
719            kwargs["tags"] = self.tags if self.tags else []
720            kwargs["tags"] = self.request_override(kwargs["tags"])
721
722        # ConfigManager resolves thread-local memcache internally
723
724        return self.config.get_all(*args, **kwargs)
725
726    def get(self, *args, **kwargs):
727        """
728        Wrap `get()`
729
730        Takes the `user`, `tags` and `request` given when initialised into
731        account. If `tags` is set explicitly, the HTTP header-based override
732        is not applied.
733
734        :param args:
735        :param kwargs:
736        :return:
737        """
738        if "user" not in kwargs:
739            kwargs["user"] = self.user
740
741        if "tags" not in kwargs:
742            kwargs["tags"] = self.tags if self.tags else []
743            kwargs["tags"] = self.request_override(kwargs["tags"])
744
745        # ConfigManager resolves thread-local memcache internally
746
747        return self.config.get(*args, **kwargs)
748
749    def get_active_tags(self, user=None, tags=None):
750        """
751        Wrap `get_active_tags()`
752
753        Takes the `user`, `tags` and `request` given when initialised into
754        account. If `tags` is set explicitly, the HTTP header-based override
755        is not applied.
756
757        :param user:
758        :param tags:
759        :return list:
760        """
761        active_tags = self.config.get_active_tags(user, tags)
762        if not tags:
763            active_tags = self.request_override(active_tags)
764        return active_tags
765
766    def request_override(self, tags):
767        """
768        Force tag via HTTP request headers
769
770        To facilitate loading different configurations based on the HTTP
771        request, the request object can be passed to the ConfigWrapper and
772        if a certain request header is set, the value of that header will be
773        added to the list of tags to consider when retrieving settings.
774
775        See the flask.proxy_secret config setting; this is used to prevent
776        users from changing configuration by forging the header.
777
778        :param list|str tags:  List of tags to extend based on request
779        :return list:  Amended list of tags
780        """
781        if type(tags) is str:
782            tags = [tags]
783
784        # use self.config.get here, not self.get, because else we get infinite
785        # recursion (since self.get can call this method)
786        if self.request and self.request.headers.get("X-4Cat-Config-Tag") and \
787            self.config.get("flask.proxy_secret") and \
788            self.request.headers.get("X-4Cat-Config-Via-Proxy") == self.config.get("flask.proxy_secret"):
789            # need to ensure not just anyone can add this header to their
790            # request!
791            # to this end, the second header must be set to the secret value;
792            # if it is not set, assume the headers are not being configured by
793            # the proxy server
794            if not tags:
795                tags = []
796
797            # can never set admin tag via headers (should always be user-based)
798            forbidden_overrides = ("admin",)
799            tags += [tag for tag in self.request.headers.get("X-4Cat-Config-Tag").split(",") if tag not in forbidden_overrides]
800
801        return tags
802
803
804    def __getattr__(self, item):
805        """
806        Generic wrapper
807
808        Just pipe everything through to the config object
809
810        :param item:
811        :return:
812        """
813        if hasattr(self.config, item):
814            return getattr(self.config, item)
815        elif hasattr(self, item):
816            return getattr(self, item)
817        else:
818            raise AttributeError(f"'{self.__name__}' object has no attribute '{item}'")
819
820
821class CoreConfigManager(ConfigManager):
822    """
823    A configuration reader that can only read from core settings
824
825    Can be used in thread-unsafe context and when no database is present.
826    """
827    def with_db(self, db=None):
828        """
829        Raise a RuntimeError when trying to link a database connection
830
831        :param db:
832        """
833        raise RuntimeError("Trying to read non-core configuration value from a CoreConfigManager")
class CacheMiss:
20class CacheMiss:
21    """
22    Helper class to distinguish memcache misses from true `None` values
23    """
24    pass

Helper class to distinguish memcache misses from true None values

class BaseConfigReader:
26class BaseConfigReader:
27    """
28    Helper class to unify various types of configuration readers
29    """
30    pass

Helper class to unify various types of configuration readers

class ConfigManager(BaseConfigReader):
 32class ConfigManager(BaseConfigReader):
 33    db = None
 34    dbconn = None
 35    cache = {}
 36    logger = None
 37
 38    core_settings = {}
 39    config_definition = {}
 40    # Thread-local storage for a singleton memcache client per thread.
 41    # Prevents creating a new TCP connection per request in threaded/gunicorn contexts.
 42    _memcache_tls = threading.local()
 43
 44    def __init__(self, db=None):
 45        # ensure core settings (including database config) are loaded
 46        self.load_core_settings()
 47        self.load_user_settings()
 48        # Do not create a memcache client here; get_memcache() will lazily create per-thread.
 49
 50        # establish database connection if none available
 51        if db:
 52            self.with_db(db)
 53
 54    def with_db(self, db=None):
 55        """
 56        Initialise database
 57
 58        Not done on init, because something may need core settings before the
 59        database can be initialised
 60
 61        :param db:  Database object. If None, initialise it using the core config
 62        """
 63        if db or not self.db:
 64            if db and db.log and not self.logger:
 65                # borrow logger from database
 66                self.with_logger(db.log)
 67
 68            # Replace w/ db if provided else only initialise if not already
 69            self.db = db if db else Database(logger=self.logger, dbname=self.get("DB_NAME"), user=self.get("DB_USER"),
 70                                         password=self.get("DB_PASSWORD"), host=self.get("DB_HOST"),
 71                                         port=self.get("DB_PORT"), appname="config-reader")
 72        else:
 73            # self.db already initialized and no db provided
 74            pass
 75
 76    def with_logger(self, logger):
 77        """
 78        Attach logger to config manager
 79
 80        4CAT's logger has some features on top of the basic Python logger that
 81        are needed for further operation, e.g. the Debug2 log level.
 82
 83        :param Logger logger:
 84        """
 85        self.logger = logger
 86
 87    def load_user_settings(self):
 88        """
 89        Load settings configurable by the user
 90
 91        Does not load the settings themselves, but rather the definition so
 92        values can be validated, etc
 93        """
 94        # basic 4CAT settings
 95        self.config_definition.update(config_definition)
 96
 97        # module settings can't be loaded directly because modules need the
 98        # config manager to load, so that becomes circular
 99        # instead, this is cached on startup and then loaded here
100        module_config_path = self.get("PATH_CONFIG").joinpath("module_config.bin")
101        if module_config_path.exists():
102            try:
103                with module_config_path.open("rb") as infile:
104                    retries = 0
105                    module_config = None
106                    # if 4CAT is being run in two different containers
107                    # (front-end and back-end) they might both be running this
108                    # bit of code at the same time. If the file is half-written
109                    # loading it will fail, so allow for a few retries
110                    while retries < 3:
111                        try:
112                            module_config = pickle.load(infile)
113                            break
114                        except Exception:  # this can be a number of exceptions, all with the same recovery path
115                            time.sleep(0.1)
116                            retries += 1
117                            continue
118
119                    if module_config is None:
120                        # not really a way to gracefully recover from this, but
121                        # we can at least describe the error
122                        raise RuntimeError("Could not read module_config.bin. The 4CAT developers did a bad job of "
123                                           "preventing this. Shame on them!")
124
125                    self.config_definition.update(module_config)
126            except (ValueError, TypeError):
127                pass
128
129    def load_core_settings(self):
130        """
131        Load 4CAT core settings
132
133        These are (mostly) stored in config.ini and cannot be changed from the
134        web interface.
135
136        :return:
137        """
138        config_file = Path(__file__).parent.parent.joinpath("config/config.ini")
139        config_reader = configparser.ConfigParser()
140        in_docker = False
141        if config_file.exists():
142            config_reader.read(config_file)
143            if config_reader["DOCKER"].getboolean("use_docker_config"):
144                # Can use throughtout 4CAT to know if Docker environment
145                in_docker = True
146        else:
147            # config should be created!
148            raise ConfigException("No config/config.ini file exists! Update and rename the config.ini-example file.")
149        
150        # Set up core settings
151        # Using Path.joinpath() will ensure paths are relative to ROOT_PATH or absolute (if /some/path is provided)
152        root_path = Path(os.path.abspath(os.path.dirname(__file__))).joinpath("..").resolve() # better don"t change this
153
154        self.core_settings.update({
155            "CONFIG_FILE": config_file.resolve(),
156            "USING_DOCKER": in_docker,
157            "DB_HOST": config_reader["DATABASE"].get("db_host"),
158            "DB_PORT": config_reader["DATABASE"].get("db_port"),
159            "DB_USER": config_reader["DATABASE"].get("db_user"),
160            "DB_NAME": config_reader["DATABASE"].get("db_name"),
161            "DB_PASSWORD": config_reader["DATABASE"].get("db_password"),
162
163            "API_HOST": config_reader["API"].get("api_host"),
164            "API_PORT": config_reader["API"].getint("api_port"),
165
166            "MEMCACHE_SERVER": config_reader.get("MEMCACHE", option="memcache_host", fallback=None),
167
168            "PATH_ROOT": root_path,
169            "PATH_CONFIG": root_path.joinpath("config"), # .current-version, config.ini are hardcoded here via docker/docker_setup.py and helper-scripts/migrate.py
170            "PATH_EXTENSIONS": root_path.joinpath("config/extensions"), # Must match setup.py and migrate.py
171            "PATH_LOGS": root_path.joinpath(config_reader["PATHS"].get("path_logs", "")),
172            "PATH_IMAGES": root_path.joinpath(config_reader["PATHS"].get("path_images", "")),
173            "PATH_DATA": root_path.joinpath(config_reader["PATHS"].get("path_data", "")),
174            "PATH_LOCKFILE": root_path.joinpath(config_reader["PATHS"].get("path_lockfile", "")),
175            "PATH_SESSIONS": root_path.joinpath(config_reader["PATHS"].get("path_sessions", "")),
176
177            "ANONYMISATION_SALT": config_reader["GENERATE"].get("anonymisation_salt"),
178            "SECRET_KEY": config_reader["GENERATE"].get("secret_key")
179        })
180
181
182    def get_memcache(self):
183        """
184        Get (or create) a thread-local memcache client
185
186        The config reader can optionally use Memcache to keep fetched values in
187        memory.
188        """
189        # Reuse per-thread client if already initialised.
190        existing = getattr(self._memcache_tls, "client", None)
191        if existing:
192            return existing
193
194        server = self.get("MEMCACHE_SERVER")
195        if server:
196            try:
197                memcache = MemcacheClient(server, serde=serde.pickle_serde, key_prefix=b"4cat-config")
198                # do one test fetch to test if connection is valid
199                memcache.set("4cat-init-dummy", time.time())
200                memcache.init_thread_id = threading.get_ident()
201                self._memcache_tls.client = memcache
202                return memcache
203            except (SystemError, ValueError, MemcacheError, ConnectionError, OSError):
204                # we have no access to the logger here so we simply pass
205                # later we can detect elsewhere that a memcache address is
206                # configured but no connection is there - then we can log
207                # config reader still works without memcache
208                pass
209
210        return None
211
212    def close_memcache(self):
213        """Close and dispose this thread's memcache client.
214
215        Call from gunicorn worker_exit or application teardown to ensure
216        sockets are closed explicitly instead of relying on GC/process exit.
217        """
218        client = getattr(self._memcache_tls, "client", None)
219        if client:
220            try:
221                client.close()
222            except Exception:
223                pass
224            finally:
225                try:
226                    del self._memcache_tls.client
227                except AttributeError:
228                    pass
229        
230
231    def ensure_database(self):
232        """
233        Ensure the database is in sync with the config definition
234
235        Deletes all stored settings not defined in 4CAT, and creates a global
236        setting for all settings not yet in the database.
237        """
238        self.with_db()
239
240        # create global values for known keys with the default
241        known_settings = self.get_all_setting_names()
242        for setting, parameters in self.config_definition.items():
243            if setting in known_settings:
244                continue
245
246            self.db.log.debug(f"Creating setting: {setting} with default value {parameters.get('default', '')}")
247            self.set(setting, parameters.get("default", ""))
248
249        # make sure settings and user table are in sync
250        user_tags = list(set(itertools.chain(*[u["tags"] for u in self.db.fetchall("SELECT DISTINCT tags FROM users")])))
251        known_tags = [t["tag"] for t in self.db.fetchall("SELECT DISTINCT tag FROM settings")]
252        tag_order = self.get("flask.tag_order")
253
254        for tag in known_tags:
255            # add tags used by a setting to tag order
256            if tag and tag not in tag_order:
257                tag_order.append(tag)
258
259        for tag in user_tags:
260            # add tags used by a user to tag order
261            if tag and tag not in tag_order:
262                tag_order.append(tag)
263
264        # admin tag should always be first in order
265        if "admin" in tag_order:
266            tag_order.remove("admin")
267
268        tag_order.insert(0, "admin")
269
270        self.set("flask.tag_order", tag_order)
271        self.db.commit()
272
273    def get_all_setting_names(self, with_core=True):
274        """
275        Get names of all settings
276
277        For when the value doesn't matter!
278
279        :param bool with_core:  Also include core (i.e. config.ini) settings
280        :return list:  List of setting names known by the database and core settings
281        """
282        # attempt to initialise the database connection so we can include
283        # user settings
284        if not self.db:
285            self.with_db()
286
287        settings = list(self.core_settings.keys()) if with_core else []
288        settings.extend([s["name"] for s in self.db.fetchall("SELECT DISTINCT name FROM settings")])
289
290        return settings
291
292    def get_all(self, is_json=False, user=None, tags=None, with_core=True, memcache=None):
293        """
294        Get all known settings
295
296        This is *not optimised* but used rarely enough that that doesn't
297        matter so much.
298
299        :param bool is_json:  if True, the value is returned as stored and not
300        interpreted as JSON if it comes from the database
301        :param user:  User object or name. Adds a tag `user:[username]` in
302        front of the tag list.
303        :param tags:  Tag or tags for the required setting. If a tag is
304        provided, the method checks if a special value for the setting exists
305        with the given tag, and returns that if one exists. First matching tag
306        wins.
307        :param bool with_core:  Also include core (i.e. config.ini) settings
308        :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
309
310        :return dict: Setting value, as a dictionary with setting names as keys
311        and setting values as values.
312        """
313        for setting in self.get_all_setting_names(with_core=with_core):
314            yield setting, self.get(setting, None, is_json, user, tags, memcache)
315
316
317    def get(self, attribute_name, default=None, is_json=False, user=None, tags=None, memcache=None):
318        """
319        Get a setting's value from the database
320
321        If the setting does not exist, the provided fallback value is returned.
322
323        :param str attribute_name:  Setting to return.
324        :param default:  Value to return if setting does not exist
325        :param bool is_json:  if True, the value is returned as stored and not
326        interpreted as JSON if it comes from the database
327        :param user:  User object or name. Adds a tag `user:[username]` in
328        front of the tag list.
329        :param tags:  Tag or tags for the required setting. If a tag is
330        provided, the method checks if a special value for the setting exists
331        with the given tag, and returns that if one exists. First matching tag
332        wins.
333    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
334
335        :return:  Setting value, or the provided fallback, or `None`.
336        """
337        # core settings are not from the database
338        # they are therefore also not memcached - too little gain
339        if type(attribute_name) is not str:
340            raise TypeError(f"attribute_name must be a str, {attribute_name.__class__.__name__} given")
341
342        if attribute_name in self.core_settings:
343            # we never get to the database or memcache part of this method if
344            # this is a core setting we already know
345            return self.core_settings[attribute_name]
346
347        # if trying to access a setting that's not a core setting, attempt to
348        # initialise the database connection
349        if not self.db:
350            self.with_db()
351
352        # get tags to look for
353        # copy() because else we keep adding onto the same list, which
354        # interacts badly with get_all()
355        if tags:
356            tags = tags.copy()
357        tags = self.get_active_tags(user, tags, memcache)
358
359        # now we have all tags - get the config values for each (if available)
360        # and then return the first matching one. Add the 'empty' tag at the
361        # end to fall back to the global value if no specific one exists.
362        tags.append("")
363
364        # Obtain thread-local memcache client if not explicitly given.
365        if not memcache:
366            memcache = self.get_memcache()
367
368        # first check if we have all the values in memcache, in which case we
369        # do not need a database query
370        if memcache:
371            if threading.get_ident() != memcache.init_thread_id:
372                raise RuntimeError("Thread-unsafe use of memcache! Please make sure you are using a configuration "
373                                   "wrapper to read with a thread-local memcache connection.")
374
375            cached_values = {tag: memcache.get(self._get_memcache_id(attribute_name, tag), default=CacheMiss) for tag in tags}
376
377        else:
378            cached_values = {t: CacheMiss for t in tags}
379
380        # for the tags we could not get from memcache, run a database query
381        # (and save to cache if possible)
382        missing_tags = [t for t in cached_values if cached_values[t] is CacheMiss]
383        if missing_tags:
384            # query database for any values within the required tags
385            query = "SELECT * FROM settings WHERE name = %s AND tag IN %s"
386            replacements = (attribute_name, tuple(missing_tags))
387            queried_settings = {setting["tag"]: setting["value"] for setting in self.db.fetchall(query, replacements)}
388
389            if memcache:
390                for tag, value in queried_settings.items():
391                    memcache.set(self._get_memcache_id(attribute_name, tag), value)
392
393            cached_values.update(queried_settings)
394
395        # there may be some tags for which we still do not have a value at
396        # this point. these simply do not have a tag-specific value but that in
397        # itself is worth caching, otherwise we're going to query for a
398        # non-existent value each time.
399        # so: cache a magic value for such setting/tag combinations, and
400        # replace the magic value with a CacheMiss in the dict that will be
401        # parsed
402        unconfigured_magic = "__unconfigured__"
403        if memcache:
404            for tag in [t for t in cached_values if cached_values[t] is CacheMiss]:
405                # should this be more magic?
406                memcache.set(self._get_memcache_id(attribute_name, tag), unconfigured_magic)
407
408            for tag in [t for t in cached_values if cached_values[t] == unconfigured_magic]:
409                cached_values[tag] = CacheMiss
410
411        # now we may still have some CacheMisses in the values dict, if there
412        # was no setting in the database with that tag. So, find the first
413        # value that is not a CacheMiss. If nothing matches, try the global tag
414        # and if even that does not match (no setting saved at all) return the
415        # default
416        for tag in tags:
417            if tag in cached_values and cached_values.get(tag) is not CacheMiss:
418                value = cached_values[tag]
419                break
420        else:
421            value = None
422
423        # parse some values...
424        if not is_json and value is not None:
425            value = json.loads(value)
426        # TODO: Which default should have priority? The provided default feels like it should be the highest priority, but I think that is an old implementation and perhaps should be removed. - Dale
427        elif value is None and attribute_name in self.config_definition and "default" in self.config_definition[attribute_name]:
428            value = self.config_definition[attribute_name]["default"]
429        elif value is None and default is not None:
430            value = default
431
432        return value
433
434    def get_active_tags(self, user=None, tags=None, memcache=None):
435        """
436        Get active tags for given user/tag list
437
438        Used internally to harmonize tag setting for various methods, but can
439        also be called directly to verify tag activation.
440
441        :param user:  User object or name. Adds a tag `user:[username]` in
442        front of the tag list.
443        :param tags:  Tag or tags for the required setting. If a tag is
444        provided, the method checks if a special value for the setting exists
445        with the given tag, and returns that if one exists. First matching tag
446        wins.
447    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
448        :return list:  List of tags
449        """
450        # be flexible about the input types here
451        if tags is None:
452            tags = []
453        elif type(tags) is str:
454            tags = [tags]
455
456        user = self._normalise_user(user)
457
458        # user-specific settings are just a special type of tag (which takes
459        # precedence), same goes for user groups. so if a user was passed, get
460        # that user's tags (including the 'special' user: tag) and add them
461        # to the list
462        if user:
463            user_tags = CacheMiss
464            
465            if not memcache:
466                memcache = self.get_memcache()
467                
468            if memcache:
469                memcache_id = f"_usertags-{user}"
470                user_tags = memcache.get(memcache_id, default=CacheMiss)
471
472            if user_tags is CacheMiss:
473                user_tags = self.db.fetchone("SELECT tags FROM users WHERE name = %s", (user,))
474                if user_tags and memcache:
475                    memcache.set(memcache_id, user_tags)
476
477            if user_tags:
478                try:
479                    tags.extend(user_tags["tags"])
480                except (TypeError, ValueError):
481                    # should be a JSON list, but isn't
482                    pass
483
484            tags.insert(0, f"user:{user}")
485
486        return tags
487
488    def set(self, attribute_name, value, is_json=False, tag="", overwrite_existing=True, memcache=None):
489        """
490        Insert OR set value for a setting
491
492        If overwrite_existing=True and the setting exists, the setting is updated; if overwrite_existing=False and the
493        setting exists the setting is not updated.
494
495        :param str attribute_name:  Attribute to set
496        :param value:  Value to set (will be serialised as JSON)
497        :param bool is_json:  True for a value that is already a serialised JSON string; False if value is object that needs to
498                          be serialised into a JSON string
499        :param bool overwrite_existing: True will overwrite existing setting, False will do nothing if setting exists
500        :param str tag:  Tag to write setting for
501    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
502
503        :return int: number of updated rows
504        """
505        # Check value is valid JSON
506        if is_json:
507            try:
508                json.dumps(json.loads(value))
509            except json.JSONDecodeError:
510                return None
511        else:
512            try:
513                value = json.dumps(value)
514            except json.JSONDecodeError:
515                return None
516
517        if attribute_name in self.config_definition and self.config_definition.get(attribute_name).get("global"):
518            tag = ""
519
520        if overwrite_existing:
521            query = "INSERT INTO settings (name, value, tag) VALUES (%s, %s, %s) ON CONFLICT (name, tag) DO UPDATE SET value = EXCLUDED.value"
522        else:
523            query = "INSERT INTO settings (name, value, tag) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING"
524
525        self.db.execute(query, (attribute_name, value, tag))
526        updated_rows = self.db.cursor.rowcount
527        self.db.log.debug(f"Updated setting for {attribute_name}: {value} (tag: {tag})")
528
529        if not memcache:
530            memcache = self.get_memcache()
531
532        if memcache:
533            # invalidate any cached value for this setting
534            memcache_id = self._get_memcache_id(attribute_name, tag)
535            memcache.delete(memcache_id)
536
537        return updated_rows
538
539    def delete_for_tag(self, attribute_name, tag):
540        """
541        Delete config override for a given tag
542
543        :param str attribute_name:
544        :param str tag:
545        :return int: number of deleted rows
546        """
547        self.db.delete("settings", where={"name": attribute_name, "tag": tag})
548        updated_rows = self.db.cursor.rowcount
549        client = self.get_memcache()
550        if client:
551            client.delete(self._get_memcache_id(attribute_name, tag))
552        return updated_rows
553
554    def clear_cache(self):
555        """
556        Clear cached configuration values
557
558        Called when the backend restarts - helps start with a blank slate.
559        """
560        client = self.get_memcache()
561        if not client:
562            return
563        client.flush_all()
564
565    def uncache_user_tags(self, users):
566        """
567        Clear cached user tags
568
569        User tags are cached with memcache if possible to avoid unnecessary
570        database roundtrips. This method clears the cached user tags, in case
571        a tag is added/deleted from a user.
572
573        :param list users:  List of users, as usernames or User objects
574        """
575        client = self.get_memcache()
576        if client:
577            for user in users:
578                user = self._normalise_user(user)
579                client.delete(f"_usertags-{user}")
580
581    def _normalise_user(self, user):
582        """
583        Normalise user object
584
585        Users may be passed as a username, a user object, or a proxy of such an
586        object. This method normalises this to a string (the username), or
587        `None` if no user is provided.
588
589        :param user:  User value to normalise
590        :return str|None:  Normalised value
591        """
592
593        # can provide either a string or user object
594        if type(user) is not str:
595            if type(user).__name__ == "LocalProxy":
596                # passed on from Flask
597                user = user._get_current_object()
598
599            if hasattr(user, "get_id"):
600                user = user.get_id()
601            elif user != None:  # noqa: E711
602                # werkzeug.local.LocalProxy (e.g., user not yet logged in) wraps None; use '!=' instead of 'is not'
603                raise TypeError(
604                    f"_normalise_user() expects None, a User object or a string for argument 'user', {type(user).__name__} given"
605                )
606
607        return user
608
609    def _get_memcache_id(self, attribute_name, tags=None):
610        """
611        Generate a memcache key for a config setting request
612
613        This includes the relevant user name/tags because the value may be
614        different depending on the value of these parameters.
615
616        :param str attribute_name:
617        :param str|list tags:
618        :return str:
619        """
620        if tags and isinstance(tags, str):
621            tags = [tags]
622
623        tag_bit = []
624        if tags:
625            tag_bit.append("|".join(tags))
626
627        memcache_id = attribute_name
628        if tag_bit:
629            memcache_id += f"-{'-'.join(tag_bit)}"
630
631        return memcache_id.encode("ascii")
632
633    def __getattr__(self, attr):
634        """
635        Getter so we can directly request values
636
637        :param attr:  Config setting to get
638        :return:  Value
639        """
640
641        if attr in dir(self):
642            # an explicitly defined attribute should always be called in favour
643            # of this passthrough
644            attribute = getattr(self, attr)
645            return attribute
646        else:
647            return self.get(attr)

Helper class to unify various types of configuration readers

ConfigManager(db=None)
44    def __init__(self, db=None):
45        # ensure core settings (including database config) are loaded
46        self.load_core_settings()
47        self.load_user_settings()
48        # Do not create a memcache client here; get_memcache() will lazily create per-thread.
49
50        # establish database connection if none available
51        if db:
52            self.with_db(db)
db = None
dbconn = None
cache = {}
logger = None
core_settings = {'CONFIG_FILE': PosixPath('/opt/docs-maker/4cat/config/config.ini'), 'USING_DOCKER': False, 'DB_HOST': 'localhost', 'DB_PORT': '5432', 'DB_USER': 'fourcat', 'DB_NAME': 'fourcat', 'DB_PASSWORD': 'supers3cr3t', 'API_HOST': 'localhost', 'API_PORT': 4444, 'MEMCACHE_SERVER': None, 'PATH_ROOT': PosixPath('/opt/docs-maker/4cat'), 'PATH_CONFIG': PosixPath('/opt/docs-maker/4cat/config'), 'PATH_EXTENSIONS': PosixPath('/opt/docs-maker/4cat/config/extensions'), 'PATH_LOGS': PosixPath('/opt/docs-maker/4cat/data/logs'), 'PATH_IMAGES': PosixPath('/opt/docs-maker/4cat/data/images'), 'PATH_DATA': PosixPath('/opt/docs-maker/4cat/data/datasets'), 'PATH_LOCKFILE': PosixPath('/opt/docs-maker/4cat/backend'), 'PATH_SESSIONS': PosixPath('/opt/docs-maker/4cat/config/sessions'), 'ANONYMISATION_SALT': 'REPLACE_THIS', 'SECRET_KEY': 'REPLACE_THIS'}
config_definition = {'datasources.intro': {'type': 'info', 'help': "Data sources enabled below will be offered to people on the 'Create Dataset' page. Additionally, people can upload datasets for these by for example exporting them with [Zeeschuimer](https://github.com/digitalmethodsinitiative/zeeschuimer) to this 4CAT instance.\n\nSome data sources offer further settings which may be configured on other tabs."}, 'datasources.intro2': {'type': 'info', 'help': "*Warning:* changes take effect immediately. Datasets that would have expired under the new settings will be deleted. You can use the 'Dataset bulk management' module in the control panel to manage the expiration status of existing datasets."}, 'datasources.enabled': {'type': 'datasources', 'default': ['ninegag', 'bsky', 'douban', 'douyin', 'imgur', 'upload', 'instagram', 'import_4cat', 'linkedin', 'media-import', 'telegram', 'tiktok', 'twitter', 'tiktok-comments', 'truthsocial', 'gab'], 'help': 'Data Sources', 'tooltip': 'A list of enabled data sources that people can choose from when creating a dataset page.'}, 'datasources.expiration': {'type': 'json', 'default': {'fourchan': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'eightchan': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'eightkun': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'ninegag': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'bitchute': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'bsky': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'dmi-tcat': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'dmi-tcatv2': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'douban': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'douyin': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'import_4cat': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'gab': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'imgur': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'upload': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'instagram': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'linkedin': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'media-import': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'parler': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'reddit': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'telegram': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'tiktok': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'tiktok-urls': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'truthsocial': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'tumblr': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'twitter': {'enabled': True, 'allow_optout': False, 'timeout': 0}, 'twitterv2': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'usenet': {'enabled': False, 'allow_optout': False, 'timeout': 0}, 'vk': {'enabled': False, 'allow_optout': False, 'timeout': 0}}, 'help': 'Data source-specific expiration', 'tooltip': "Allows setting expiration settings per datasource. Configured by proxy via the 'data sources' setting.", 'indirect': True}, 'extensions._intro': {'type': 'info', 'help': "4CAT extensions can be disabled and disabled via the control below. When enabled, extensions may define further settings that can typically be configured via the extension's tab on the left side of this page. **Note that 4CAT needs to be restarted for this to take effect!**"}, 'extensions.enabled': {'type': 'extensions', 'default': {}, 'help': 'Extensions', 'global': True}, '4cat.name': {'type': 'string', 'default': '4CAT', 'help': 'Short tool name', 'tooltip': "Configure short name for the tool in its web interface. The backend will always refer to '4CAT' - the name of the software, and a 'powered by 4CAT' notice may also show up in the web interface regardless of the value entered here."}, '4cat.name_long': {'type': 'string', 'default': '4CAT: Capture and Analysis Toolkit', 'help': 'Full tool name', 'tooltip': "Used in e.g. the interface header. The backend will always refer to '4CAT' - the name of the software, and a 'powered by 4CAT' notice may also show up in the web interface regardless of the value entered here."}, '4cat.about_this_server': {'type': 'textarea', 'default': '', 'help': 'Server information', 'tooltip': "Custom server information that is displayed on the 'About' page. Can for instance be used to show information about who maintains the tool or what its intended purpose is. Accepts Markdown markup."}, '4cat.crash_message': {'type': 'textarea', 'default': "This processor has crashed; the crash has been logged. 4CAT will try again when it is restarted. Contact your server administrator if this error persists. You can also report issues via 4CAT's [GitHub repository](https://github.com/digitalmethodsinitiative/4cat/issues).", 'help': 'Crash message', 'tooltip': 'This message is shown to users in the interface when a processor crashes while processing their dataset. It can contain Markdown markup.'}, 'privileges.can_create_dataset': {'type': 'toggle', 'default': True, 'help': 'Can create dataset', 'tooltip': "Controls whether users can view and use the 'Create dataset' page. Does NOT control whether users can run processors (which also create datasets); this is a separate setting."}, 'privileges.can_run_processors': {'type': 'toggle', 'default': True, 'help': 'Can run processors', 'tooltip': 'Controls whether processors can be run. There may be processor-specific settings or dependencies that override this.'}, 'privileges.can_view_all_datasets': {'type': 'toggle', 'default': False, 'help': 'Can view global dataset index', 'tooltip': 'Controls whether users can see the global datasets overview, i.e. not just for their own user but for all other users as well.'}, 'privileges.can_view_private_datasets': {'type': 'toggle', 'default': False, 'help': 'Can view private datasets', 'tooltip': 'Controls whether users can see the datasets made private by their owners.'}, 'privileges.can_create_api_token': {'type': 'toggle', 'default': True, 'help': 'Can create API token', 'tooltip': "Controls whether users can create a token for authentication with 4CAT's Web API."}, 'privileges.can_use_explorer': {'type': 'toggle', 'default': True, 'help': 'Can use Explorer', 'tooltip': 'Controls whether users can use the Explorer feature to analyse and annotate datasets.'}, 'privileges.can_export_datasets': {'type': 'toggle', 'default': True, 'help': 'Can export datasets', 'tooltip': 'Allows users to export datasets they own to other 4CAT instances.'}, 'privileges.admin.can_manage_users': {'type': 'toggle', 'default': False, 'help': 'Can manage users', 'tooltip': 'Controls whether users can add, edit and delete other users via the Control Panel'}, 'privileges.admin.can_manage_notifications': {'type': 'toggle', 'default': False, 'help': 'Can manage notifications', 'tooltip': 'Controls whether users can add, edit and delete notifications via the Control Panel'}, 'privileges.admin.can_manage_settings': {'type': 'toggle', 'default': False, 'help': 'Can manage settings', 'tooltip': 'Controls whether users can manipulate 4CAT settings via the Control Panel'}, 'privileges.admin.can_manipulate_all_datasets': {'type': 'toggle', 'default': False, 'help': 'Can manipulate all datasets', 'tooltip': 'Controls whether users can manipulate all datasets as if they were an owner, e.g. sharing it with others, running processors, et cetera.'}, 'privileges.admin.can_restart': {'type': 'toggle', 'default': False, 'help': 'Can restart/upgrade', 'tooltip': 'Controls whether users can restart, upgrade, and manage extensions 4CAT via the Control Panel'}, 'privileges.admin.can_manage_extensions': {'type': 'toggle', 'default': False, 'help': 'Can manage extensions', 'tooltip': 'Controls whether users can install and uninstall 4CAT extensions via the Control Panel'}, 'privileges.can_upgrade_to_dev': {'type': 'toggle', 'default': False, 'help': 'Can upgrade to development branch', 'tooltip': "Controls whether users can upgrade 4CAT to a development branch of the code via the Control Panel. This is an easy way to break 4CAT so it is recommended to not enable this unless you're really sure of what you're doing."}, 'privileges.admin.can_manage_tags': {'type': 'toggle', 'default': False, 'help': 'Can manage user tags', 'tooltip': 'Controls whether users can manipulate user tags via the Control Panel'}, 'privileges.admin.can_view_status': {'type': 'toggle', 'default': False, 'help': 'Can view worker status', 'tooltip': 'Controls whether users can view worker status via the Control Panel'}, '4cat.github_url': {'type': 'string', 'default': 'https://github.com/digitalmethodsinitiative/4cat', 'help': 'Repository URL', 'tooltip': 'URL to the github repository for this 4CAT instance', 'global': True}, '4cat.phone_home_url': {'type': 'string', 'default': 'https://ping.4cat.nl', 'help': 'Phone home URL', 'tooltip': 'This URL is called when 4CAT is installed, if the user consents, to help the 4CAT developers (the Digital Methods Initiative) keep track of how much it is used. Later, notifications for 4CAT admins are fetched from this URL to inform them about important changes and update procedures. If you want to disable this functionality, leave this field empty.', 'global': True}, '4cat.phone_home_asked': {'type': 'toggle', 'default': True, 'help': 'Shown phone home request?', 'tooltip': "Whether you've seen the 'phone home request'. Set to `False` to see the request again. There should be no need to change this manually.", 'global': True}, '4cat.layout_hue': {'type': 'hue', 'default': 356, 'help': 'Interface accent colour', 'saturation': 87, 'value': 81, 'min': 0, 'max': 360, 'coerce_type': <class 'int'>, 'global': True}, '4cat.layout_hue_secondary': {'type': 'hue', 'default': 86, 'help': 'Interface secondary colour', 'saturation': 87, 'value': 90, 'min': 0, 'max': 360, 'coerce_type': <class 'int'>, 'global': True}, '4cat.allow_access_request': {'type': 'toggle', 'default': True, 'help': 'Allow access requests', 'tooltip': 'When enabled, users can request a 4CAT account via the login page if they do not have one, provided e-mail settings are configured.'}, '4cat.allow_access_request_limiter': {'type': 'string', 'default': '100/day', 'help': 'Access request limit', 'tooltip': "Limit the number of access requests per day. This is a rate limit for the number of requests that can be made per IP address. The format is a number followed by a time unit, e.g. '100/day', '10/hour', '5/minute'. You can also combine these, e.g. '100/day;10/hour'.", 'global': True}, '4cat.sphinx_host': {'type': 'string', 'default': 'localhost', 'help': 'Sphinx host', 'tooltip': 'Sphinx is used for full-text search for collected datasources (e.g., 4chan, 8kun, 8chan) and requires additional setup (see 4CAT wiki on GitHub).', 'global': True}, 'proxies.urls': {'type': 'json', 'default': ['__localhost__'], 'help': 'Proxy URLs', 'tooltip': "A JSON Array of full proxy URLs. Include any proxy login details in the URL itself (e.g. http://username:password@proxy:port). There is one special value, '__localhost__'; this means a direct request, without using a proxy."}, 'proxies.cooloff': {'type': 'string', 'coerce_type': <class 'float'>, 'help': 'Cool-off time', 'tooltip': 'After a request has finished, do not use the proxy again for this many seconds.', 'default': 0.1, 'min': 0.0}, 'proxies.concurrent-overall': {'type': 'string', 'coerce_type': <class 'int'>, 'default': 1, 'min': 1, 'help': 'Max concurrent requests (overall)', 'tooltip': 'Per proxy, this many requests can run concurrently overall.'}, 'proxies.concurrent-host': {'type': 'string', 'coerce_type': <class 'int'>, 'default': 1, 'min': 1, 'help': 'Max concurrent requests (per host)', 'tooltip': 'Per proxy, this many requests can run concurrently per host. Should be lower than or equal to the overall limit.'}, 'proxies.allow-localhost-fallback': {'type': 'toggle', 'default': True, 'help': 'Fall back to localhost', 'tooltip': 'If all proxies are down, allow falling back to direct requests (i.e., no proxy).'}, 'logging.slack.level': {'type': 'choice', 'default': 'WARNING', 'options': {'DEBUG': 'Debug', 'INFO': 'Info', 'WARNING': 'Warning', 'ERROR': 'Error', 'CRITICAL': 'Critical'}, 'help': 'Slack alert level', 'tooltip': 'Level of alerts (or higher) to be sent to Slack. Only alerts above this level are sent to the Slack webhook', 'global': True}, 'logging.slack.webhook': {'type': 'string', 'default': '', 'help': 'Slack webhook URL', 'tooltip': 'Slack callback URL to use for alerts', 'global': True}, 'mail.admin_email': {'type': 'string', 'default': '', 'help': 'Admin e-mail', 'tooltip': 'E-mail of admin, to send account requests etc to', 'global': True}, 'mail.server': {'type': 'string', 'default': '', 'help': 'SMTP server', 'tooltip': 'SMTP server to connect to for sending e-mail alerts.', 'global': True}, 'mail.port': {'type': 'string', 'default': 0, 'coerce_type': <class 'int'>, 'help': 'SMTP port', 'tooltip': 'SMTP port to connect to for sending e-mail alerts. "0" defaults to "465" for SMTP_SSL or OS default for SMTP.', 'global': True}, 'mail.ssl': {'type': 'choice', 'default': 'ssl', 'options': {'ssl': 'SSL', 'tls': 'TLS', 'none': 'None'}, 'help': 'SMTP over SSL, TLS, or None', 'tooltip': 'Security scheme to use to connect to e-mail server', 'global': True}, 'mail.username': {'type': 'string', 'default': '', 'help': 'SMTP Username', 'tooltip': 'Only if your SMTP server requires login', 'global': True}, 'mail.password': {'type': 'string', 'default': '', 'help': 'SMTP Password', 'tooltip': 'Only if your SMTP server requires login', 'global': True}, 'mail.noreply': {'type': 'string', 'default': 'noreply@localhost', 'help': 'NoReply e-mail', 'global': True}, 'explorer.basic-explanation': {'type': 'info', 'help': "4CAT's Explorer feature lets you navigate and annotate datasets as if they appared on their original platform. This is intended to facilitate qualitative exploration and manual coding."}, 'explorer.max_posts': {'type': 'string', 'default': 100000, 'help': 'Amount of items', 'coerce_type': <class 'int'>, 'tooltip': 'Maximum number of items to be considered by the Explorer (prevents timeouts and memory errors)'}, 'explorer.posts_per_page': {'type': 'string', 'default': 50, 'help': 'Items per page', 'coerce_type': <class 'int'>, 'tooltip': 'Number of items to display per page'}, 'explorer.config_explanation': {'type': 'info', 'help': 'Data sources use <em>Explorer templates</em> that determine how they look and what information is displayed. Explorer templates consist of [custom HTML templates](https://github.com/digitalmethodsinitiative/4cat/tree/master/webtool/templates/explorer/datasource-templates) and [custom CSS files](https://github.com/digitalmethodsinitiative/4cat/tree/master/webtool/static/css/explorer). If no template is available for a data source, a <em>generic</em> template is used made of [this HTML file](https://github.com/digitalmethodsinitiative/4cat/blob/master/webtool/templates/explorer/datasource-templates/generic.html) and [this CSS file](https://github.com/digitalmethodsinitiative/4cat/tree/master/webtool/static/css/explorer/generic.css).\n\nYou can request a new data source Explorer template by [creating a GitHub issue](https://github.com/digitalmethodsinitiative/4cat/issues) or adding them yourself and opening a pull request.'}, 'flask.flask_app': {'type': 'string', 'default': 'webtool/fourcat', 'help': 'Flask App Name', 'tooltip': '', 'global': True}, 'flask.server_name': {'type': 'string', 'default': 'localhost:5000', 'help': 'Host name', 'tooltip': 'e.g., my4CAT.com, localhost, 127.0.0.1. Include a port when not using 80 (HTTP) or 443 (HTTPS), or when your reverse proxy forwards on a non-standard port. This value is passed to Flask’s SERVER_NAME. Restart the front-end for changes to apply.', 'global': True}, 'flask.autologin.hostnames': {'type': 'json', 'default': [], 'help': 'White-listed hostnames', 'tooltip': 'A list of host names or IP addresses to automatically log in. Docker should include localhost and Server Name. Front-end needs to be restarted for changed to apply.', 'global': True}, 'flask.autologin.api': {'type': 'json', 'default': [], 'help': 'White-list for API', 'tooltip': 'A list of host names or IP addresses to allow access to API endpoints with no rate limiting. Docker should include localhost and Server Name. Front-end needs to be restarted for changed to apply.', 'global': True}, 'flask.https': {'type': 'toggle', 'default': False, 'help': 'Use HTTPS', 'tooltip': "If your server is using 'https', set to True and 4CAT will use HTTPS links.", 'global': True}, 'flask.proxy_override': {'type': 'multi_select', 'default': [], 'options': {'x_for': 'X-Forwarded-For', 'x_proto': 'X-Forwarded-Proto', 'x_host': 'X-Forwarded-Host', 'x_port': 'X-Forwarded-Port', 'x_prefix': 'X-Forwarded-Prefix'}, 'help': 'Use proxy headers for URL', 'tooltip': 'These proxy headers will be taken into account when building URLs. For example, if X-Forwarded-Proto is enabled, the URL scheme (http/https) of the built URL will be based on the scheme defined by this header. Use when running 4CAT behind a reverse proxy. Requires a front-end restart to take effect.'}, 'flask.autologin.name': {'type': 'string', 'default': 'Automatic login', 'help': 'Auto-login name', 'tooltip': 'Username for whitelisted hosts (automatically logged in users see this name for themselves)'}, 'flask.secret_key': {'type': 'string', 'default': 'please change me... please...', 'help': 'Secret key', 'tooltip': 'Secret key for Flask, used for session cookies', 'global': True}, 'flask.max_form_parts': {'type': 'string', 'default': 1000, 'help': 'Max form parts per request', 'coerce_type': <class 'int'>, 'global': True, 'tooltip': 'Affects approximate number of files that can be uploaded at once'}, 'flask.tag_order': {'type': 'json', 'default': ['admin'], 'help': 'Tag priority', 'tooltip': "User tag priority order. This can be manipulated from the 'User tags' panel instead of directly.", 'global': True, 'indirect': True}, 'flask.proxy_secret': {'type': 'string', 'default': '', 'help': 'Proxy secret', 'tooltip': 'Secret value to authenticate proxy headers. If the value of the X-4CAT-Config-Via-Proxy header matches this value, the X-4CAT-Config-Tag header can be used to enable a given configuration tag. Leave empty to disable this functionality.'}, 'api.youtube.name': {'type': 'string', 'default': 'youtube', 'help': 'YouTube API Service', 'tooltip': "YouTube API 'service name', e.g. youtube, googleapis, etc.", 'global': True}, 'api.youtube.version': {'type': 'string', 'default': 'v3', 'help': 'YouTube API Version', 'tooltip': "e.g., ''v3'", 'global': True}, 'api.youtube.key': {'type': 'string', 'default': '', 'help': 'YouTube API Key', 'tooltip': 'The developer key from your API console'}, 'dmi-service-manager.aa_DSM-intro-1': {'type': 'info', 'help': "The [DMI Service Manager](https://github.com/digitalmethodsinitiative/dmi_service_manager) is a support tool used to run some advanced processors. These processors generally require high CPU usage, a lot of RAM, or a dedicated GPU and thus do not fit within 4CAT's arcitecture. It is also possible for multiple 4CAT instances to use the same service manager. Please see [this link](https://github.com/digitalmethodsinitiative/dmi_service_manager?tab=readme-ov-file#installation) for instructions on setting up your own instance of the DMI Service Manager."}, 'dmi-service-manager.ab_server_address': {'type': 'string', 'default': '', 'help': 'DMI Service Manager server/URL', 'tooltip': 'The URL of the DMI Service Manager server, e.g. http://localhost:5000', 'global': True}, 'dmi-service-manager.ac_local_or_remote': {'type': 'choice', 'default': 0, 'help': 'DMI Services Local or Remote', 'tooltip': 'Services have local access to 4CAT files or must be transferred from remote via DMI Service Manager', 'options': {'local': 'Local', 'remote': 'Remote'}, 'global': True}, 'llm.intro': {'type': 'info', 'help': '4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect to local or remote LLM servers. You can also set up your own LLM server using open source software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users.'}, 'llm.host_name': {'type': 'string', 'default': '4CAT LLM Server', 'help': 'Name of LLM Server in UI', 'tooltip': 'The name that will be shown to users in the interface when selecting an LLM server (or API or custom).', 'global': True}, 'llm.provider_type': {'type': 'choice', 'help': 'LLM Provider Type', 'default': 'none', 'options': {'ollama': 'Ollama', 'none': 'None'}, 'global': True}, 'llm.server': {'type': 'string', 'default': '', 'help': 'LLM Server URL', 'tooltip': 'The URL of the LLM server, e.g. http://localhost:5000', 'global': True}, 'llm.auth_type': {'type': 'string', 'help': 'LLM Server Authentication Type', 'default': '', 'tooltip': "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.", 'global': True}, 'llm.api_key': {'type': 'string', 'default': '', 'help': 'LLM Server API Key', 'tooltip': 'The API key to access the LLM server, if required.', 'global': True}, 'llm.available_models': {'type': 'json', 'default': {}, 'help': 'Available LLM models', 'tooltip': 'A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.', 'indirect': True, 'global': True}, 'llm.access': {'type': 'toggle', 'help': 'LLM Access', 'default': False, 'tooltip': 'Use tags or individual users to allow access to the LLM server (or set True in global for all).'}, 'ui.homepage': {'type': 'choice', 'options': {'about': "'About' page", 'create-dataset': "'Create dataset' page", 'datasets': 'Dataset overview'}, 'help': '4CAT home page', 'default': 'about'}, 'ui.inline_preview': {'type': 'toggle', 'help': 'Show inline preview', 'default': False, 'tooltip': "Show main dataset preview directly on dataset pages, instead of behind a 'preview' button"}, 'ui.offer_anonymisation': {'type': 'toggle', 'help': 'Offer anonymisation options', 'default': True, 'tooltip': 'Offer users the option to anonymise their datasets at the time of creation. It is strongly recommended to leave this enabled.'}, 'ui.advertise_install': {'type': 'toggle', 'help': 'Advertise local 4CAT', 'default': True, 'tooltip': 'In the login form, remind users of the possibility to install their own 4CAT server.'}, 'ui.show_datasource': {'type': 'toggle', 'help': 'Show data source', 'default': True, 'tooltip': 'Show data source for each dataset. Can be useful to disable if only one data source is enabled.'}, 'ui.nav_pages': {'type': 'multi_select', 'help': 'Pages in navigation', 'options': {'data-policy': 'Data Policy', 'citing': 'How to cite'}, 'default': [], 'tooltip': 'These pages will be included in the navigation bar at the top of the interface.'}, 'ui.prefer_mapped_preview': {'type': 'toggle', 'help': 'Prefer mapped preview', 'default': True, 'tooltip': 'If a dataset is a JSON file but it can be mapped to a CSV file, show the CSV in the preview insteadof the underlying JSON.'}, 'ui.offer_hashing': {'type': 'toggle', 'default': True, 'help': 'Offer pseudonymisation', 'tooltip': "Add a checkbox to the 'create dataset' forum to allow users to toggle pseudonymisation."}, 'ui.offer_private': {'type': 'toggle', 'default': True, 'help': 'Offer create as private', 'tooltip': "Add a checkbox to the 'create dataset' forum to allow users to make a dataset private."}, 'ui.option_email': {'type': 'choice', 'options': {'none': 'No Emails', 'processor_only': 'Processors only', 'datasources_only': 'Create Dataset only', 'both': 'Both datasets and processors'}, 'default': 'none', 'help': 'Show email when complete option', 'tooltip': 'If a mail server is set up, enabling this allow users to request emails when datasets and processors are completed.'}, 'image-visuals.max_images': {'type': 'string', 'default': 1000, 'coerce_type': <class 'int'>, 'help': 'Maximum images to show', 'tooltip': 'Maximum number of images to show in the image visualization tab of a dataset. This is to prevent issues with large datasets.'}}
def with_db(self, db=None):
54    def with_db(self, db=None):
55        """
56        Initialise database
57
58        Not done on init, because something may need core settings before the
59        database can be initialised
60
61        :param db:  Database object. If None, initialise it using the core config
62        """
63        if db or not self.db:
64            if db and db.log and not self.logger:
65                # borrow logger from database
66                self.with_logger(db.log)
67
68            # Replace w/ db if provided else only initialise if not already
69            self.db = db if db else Database(logger=self.logger, dbname=self.get("DB_NAME"), user=self.get("DB_USER"),
70                                         password=self.get("DB_PASSWORD"), host=self.get("DB_HOST"),
71                                         port=self.get("DB_PORT"), appname="config-reader")
72        else:
73            # self.db already initialized and no db provided
74            pass

Initialise database

Not done on init, because something may need core settings before the database can be initialised

Parameters
  • db: Database object. If None, initialise it using the core config
def with_logger(self, logger):
76    def with_logger(self, logger):
77        """
78        Attach logger to config manager
79
80        4CAT's logger has some features on top of the basic Python logger that
81        are needed for further operation, e.g. the Debug2 log level.
82
83        :param Logger logger:
84        """
85        self.logger = logger

Attach logger to config manager

4CAT's logger has some features on top of the basic Python logger that are needed for further operation, e.g. the Debug2 log level.

Parameters
  • Logger logger:
def load_user_settings(self):
 87    def load_user_settings(self):
 88        """
 89        Load settings configurable by the user
 90
 91        Does not load the settings themselves, but rather the definition so
 92        values can be validated, etc
 93        """
 94        # basic 4CAT settings
 95        self.config_definition.update(config_definition)
 96
 97        # module settings can't be loaded directly because modules need the
 98        # config manager to load, so that becomes circular
 99        # instead, this is cached on startup and then loaded here
100        module_config_path = self.get("PATH_CONFIG").joinpath("module_config.bin")
101        if module_config_path.exists():
102            try:
103                with module_config_path.open("rb") as infile:
104                    retries = 0
105                    module_config = None
106                    # if 4CAT is being run in two different containers
107                    # (front-end and back-end) they might both be running this
108                    # bit of code at the same time. If the file is half-written
109                    # loading it will fail, so allow for a few retries
110                    while retries < 3:
111                        try:
112                            module_config = pickle.load(infile)
113                            break
114                        except Exception:  # this can be a number of exceptions, all with the same recovery path
115                            time.sleep(0.1)
116                            retries += 1
117                            continue
118
119                    if module_config is None:
120                        # not really a way to gracefully recover from this, but
121                        # we can at least describe the error
122                        raise RuntimeError("Could not read module_config.bin. The 4CAT developers did a bad job of "
123                                           "preventing this. Shame on them!")
124
125                    self.config_definition.update(module_config)
126            except (ValueError, TypeError):
127                pass

Load settings configurable by the user

Does not load the settings themselves, but rather the definition so values can be validated, etc

def load_core_settings(self):
129    def load_core_settings(self):
130        """
131        Load 4CAT core settings
132
133        These are (mostly) stored in config.ini and cannot be changed from the
134        web interface.
135
136        :return:
137        """
138        config_file = Path(__file__).parent.parent.joinpath("config/config.ini")
139        config_reader = configparser.ConfigParser()
140        in_docker = False
141        if config_file.exists():
142            config_reader.read(config_file)
143            if config_reader["DOCKER"].getboolean("use_docker_config"):
144                # Can use throughtout 4CAT to know if Docker environment
145                in_docker = True
146        else:
147            # config should be created!
148            raise ConfigException("No config/config.ini file exists! Update and rename the config.ini-example file.")
149        
150        # Set up core settings
151        # Using Path.joinpath() will ensure paths are relative to ROOT_PATH or absolute (if /some/path is provided)
152        root_path = Path(os.path.abspath(os.path.dirname(__file__))).joinpath("..").resolve() # better don"t change this
153
154        self.core_settings.update({
155            "CONFIG_FILE": config_file.resolve(),
156            "USING_DOCKER": in_docker,
157            "DB_HOST": config_reader["DATABASE"].get("db_host"),
158            "DB_PORT": config_reader["DATABASE"].get("db_port"),
159            "DB_USER": config_reader["DATABASE"].get("db_user"),
160            "DB_NAME": config_reader["DATABASE"].get("db_name"),
161            "DB_PASSWORD": config_reader["DATABASE"].get("db_password"),
162
163            "API_HOST": config_reader["API"].get("api_host"),
164            "API_PORT": config_reader["API"].getint("api_port"),
165
166            "MEMCACHE_SERVER": config_reader.get("MEMCACHE", option="memcache_host", fallback=None),
167
168            "PATH_ROOT": root_path,
169            "PATH_CONFIG": root_path.joinpath("config"), # .current-version, config.ini are hardcoded here via docker/docker_setup.py and helper-scripts/migrate.py
170            "PATH_EXTENSIONS": root_path.joinpath("config/extensions"), # Must match setup.py and migrate.py
171            "PATH_LOGS": root_path.joinpath(config_reader["PATHS"].get("path_logs", "")),
172            "PATH_IMAGES": root_path.joinpath(config_reader["PATHS"].get("path_images", "")),
173            "PATH_DATA": root_path.joinpath(config_reader["PATHS"].get("path_data", "")),
174            "PATH_LOCKFILE": root_path.joinpath(config_reader["PATHS"].get("path_lockfile", "")),
175            "PATH_SESSIONS": root_path.joinpath(config_reader["PATHS"].get("path_sessions", "")),
176
177            "ANONYMISATION_SALT": config_reader["GENERATE"].get("anonymisation_salt"),
178            "SECRET_KEY": config_reader["GENERATE"].get("secret_key")
179        })

Load 4CAT core settings

These are (mostly) stored in config.ini and cannot be changed from the web interface.

Returns
def get_memcache(self):
182    def get_memcache(self):
183        """
184        Get (or create) a thread-local memcache client
185
186        The config reader can optionally use Memcache to keep fetched values in
187        memory.
188        """
189        # Reuse per-thread client if already initialised.
190        existing = getattr(self._memcache_tls, "client", None)
191        if existing:
192            return existing
193
194        server = self.get("MEMCACHE_SERVER")
195        if server:
196            try:
197                memcache = MemcacheClient(server, serde=serde.pickle_serde, key_prefix=b"4cat-config")
198                # do one test fetch to test if connection is valid
199                memcache.set("4cat-init-dummy", time.time())
200                memcache.init_thread_id = threading.get_ident()
201                self._memcache_tls.client = memcache
202                return memcache
203            except (SystemError, ValueError, MemcacheError, ConnectionError, OSError):
204                # we have no access to the logger here so we simply pass
205                # later we can detect elsewhere that a memcache address is
206                # configured but no connection is there - then we can log
207                # config reader still works without memcache
208                pass
209
210        return None

Get (or create) a thread-local memcache client

The config reader can optionally use Memcache to keep fetched values in memory.

def close_memcache(self):
212    def close_memcache(self):
213        """Close and dispose this thread's memcache client.
214
215        Call from gunicorn worker_exit or application teardown to ensure
216        sockets are closed explicitly instead of relying on GC/process exit.
217        """
218        client = getattr(self._memcache_tls, "client", None)
219        if client:
220            try:
221                client.close()
222            except Exception:
223                pass
224            finally:
225                try:
226                    del self._memcache_tls.client
227                except AttributeError:
228                    pass

Close and dispose this thread's memcache client.

Call from gunicorn worker_exit or application teardown to ensure sockets are closed explicitly instead of relying on GC/process exit.

def ensure_database(self):
231    def ensure_database(self):
232        """
233        Ensure the database is in sync with the config definition
234
235        Deletes all stored settings not defined in 4CAT, and creates a global
236        setting for all settings not yet in the database.
237        """
238        self.with_db()
239
240        # create global values for known keys with the default
241        known_settings = self.get_all_setting_names()
242        for setting, parameters in self.config_definition.items():
243            if setting in known_settings:
244                continue
245
246            self.db.log.debug(f"Creating setting: {setting} with default value {parameters.get('default', '')}")
247            self.set(setting, parameters.get("default", ""))
248
249        # make sure settings and user table are in sync
250        user_tags = list(set(itertools.chain(*[u["tags"] for u in self.db.fetchall("SELECT DISTINCT tags FROM users")])))
251        known_tags = [t["tag"] for t in self.db.fetchall("SELECT DISTINCT tag FROM settings")]
252        tag_order = self.get("flask.tag_order")
253
254        for tag in known_tags:
255            # add tags used by a setting to tag order
256            if tag and tag not in tag_order:
257                tag_order.append(tag)
258
259        for tag in user_tags:
260            # add tags used by a user to tag order
261            if tag and tag not in tag_order:
262                tag_order.append(tag)
263
264        # admin tag should always be first in order
265        if "admin" in tag_order:
266            tag_order.remove("admin")
267
268        tag_order.insert(0, "admin")
269
270        self.set("flask.tag_order", tag_order)
271        self.db.commit()

Ensure the database is in sync with the config definition

Deletes all stored settings not defined in 4CAT, and creates a global setting for all settings not yet in the database.

def get_all_setting_names(self, with_core=True):
273    def get_all_setting_names(self, with_core=True):
274        """
275        Get names of all settings
276
277        For when the value doesn't matter!
278
279        :param bool with_core:  Also include core (i.e. config.ini) settings
280        :return list:  List of setting names known by the database and core settings
281        """
282        # attempt to initialise the database connection so we can include
283        # user settings
284        if not self.db:
285            self.with_db()
286
287        settings = list(self.core_settings.keys()) if with_core else []
288        settings.extend([s["name"] for s in self.db.fetchall("SELECT DISTINCT name FROM settings")])
289
290        return settings

Get names of all settings

For when the value doesn't matter!

Parameters
  • bool with_core: Also include core (i.e. config.ini) settings
Returns

List of setting names known by the database and core settings

def get_all( self, is_json=False, user=None, tags=None, with_core=True, memcache=None):
292    def get_all(self, is_json=False, user=None, tags=None, with_core=True, memcache=None):
293        """
294        Get all known settings
295
296        This is *not optimised* but used rarely enough that that doesn't
297        matter so much.
298
299        :param bool is_json:  if True, the value is returned as stored and not
300        interpreted as JSON if it comes from the database
301        :param user:  User object or name. Adds a tag `user:[username]` in
302        front of the tag list.
303        :param tags:  Tag or tags for the required setting. If a tag is
304        provided, the method checks if a special value for the setting exists
305        with the given tag, and returns that if one exists. First matching tag
306        wins.
307        :param bool with_core:  Also include core (i.e. config.ini) settings
308        :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
309
310        :return dict: Setting value, as a dictionary with setting names as keys
311        and setting values as values.
312        """
313        for setting in self.get_all_setting_names(with_core=with_core):
314            yield setting, self.get(setting, None, is_json, user, tags, memcache)

Get all known settings

This is not optimised but used rarely enough that that doesn't matter so much.

Parameters
  • bool is_json: if True, the value is returned as stored and not interpreted as JSON if it comes from the database
  • **user: User object or name. Adds a tag user**: [username] in front of the tag list.
  • tags: Tag or tags for the required setting. If a tag is provided, the method checks if a special value for the setting exists with the given tag, and returns that if one exists. First matching tag wins.
  • bool with_core: Also include core (i.e. config.ini) settings
  • MemcacheClient memcache: Memcache client. If None, a thread-local client will be used.
Returns

Setting value, as a dictionary with setting names as keys and setting values as values.

def get( self, attribute_name, default=None, is_json=False, user=None, tags=None, memcache=None):
317    def get(self, attribute_name, default=None, is_json=False, user=None, tags=None, memcache=None):
318        """
319        Get a setting's value from the database
320
321        If the setting does not exist, the provided fallback value is returned.
322
323        :param str attribute_name:  Setting to return.
324        :param default:  Value to return if setting does not exist
325        :param bool is_json:  if True, the value is returned as stored and not
326        interpreted as JSON if it comes from the database
327        :param user:  User object or name. Adds a tag `user:[username]` in
328        front of the tag list.
329        :param tags:  Tag or tags for the required setting. If a tag is
330        provided, the method checks if a special value for the setting exists
331        with the given tag, and returns that if one exists. First matching tag
332        wins.
333    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
334
335        :return:  Setting value, or the provided fallback, or `None`.
336        """
337        # core settings are not from the database
338        # they are therefore also not memcached - too little gain
339        if type(attribute_name) is not str:
340            raise TypeError(f"attribute_name must be a str, {attribute_name.__class__.__name__} given")
341
342        if attribute_name in self.core_settings:
343            # we never get to the database or memcache part of this method if
344            # this is a core setting we already know
345            return self.core_settings[attribute_name]
346
347        # if trying to access a setting that's not a core setting, attempt to
348        # initialise the database connection
349        if not self.db:
350            self.with_db()
351
352        # get tags to look for
353        # copy() because else we keep adding onto the same list, which
354        # interacts badly with get_all()
355        if tags:
356            tags = tags.copy()
357        tags = self.get_active_tags(user, tags, memcache)
358
359        # now we have all tags - get the config values for each (if available)
360        # and then return the first matching one. Add the 'empty' tag at the
361        # end to fall back to the global value if no specific one exists.
362        tags.append("")
363
364        # Obtain thread-local memcache client if not explicitly given.
365        if not memcache:
366            memcache = self.get_memcache()
367
368        # first check if we have all the values in memcache, in which case we
369        # do not need a database query
370        if memcache:
371            if threading.get_ident() != memcache.init_thread_id:
372                raise RuntimeError("Thread-unsafe use of memcache! Please make sure you are using a configuration "
373                                   "wrapper to read with a thread-local memcache connection.")
374
375            cached_values = {tag: memcache.get(self._get_memcache_id(attribute_name, tag), default=CacheMiss) for tag in tags}
376
377        else:
378            cached_values = {t: CacheMiss for t in tags}
379
380        # for the tags we could not get from memcache, run a database query
381        # (and save to cache if possible)
382        missing_tags = [t for t in cached_values if cached_values[t] is CacheMiss]
383        if missing_tags:
384            # query database for any values within the required tags
385            query = "SELECT * FROM settings WHERE name = %s AND tag IN %s"
386            replacements = (attribute_name, tuple(missing_tags))
387            queried_settings = {setting["tag"]: setting["value"] for setting in self.db.fetchall(query, replacements)}
388
389            if memcache:
390                for tag, value in queried_settings.items():
391                    memcache.set(self._get_memcache_id(attribute_name, tag), value)
392
393            cached_values.update(queried_settings)
394
395        # there may be some tags for which we still do not have a value at
396        # this point. these simply do not have a tag-specific value but that in
397        # itself is worth caching, otherwise we're going to query for a
398        # non-existent value each time.
399        # so: cache a magic value for such setting/tag combinations, and
400        # replace the magic value with a CacheMiss in the dict that will be
401        # parsed
402        unconfigured_magic = "__unconfigured__"
403        if memcache:
404            for tag in [t for t in cached_values if cached_values[t] is CacheMiss]:
405                # should this be more magic?
406                memcache.set(self._get_memcache_id(attribute_name, tag), unconfigured_magic)
407
408            for tag in [t for t in cached_values if cached_values[t] == unconfigured_magic]:
409                cached_values[tag] = CacheMiss
410
411        # now we may still have some CacheMisses in the values dict, if there
412        # was no setting in the database with that tag. So, find the first
413        # value that is not a CacheMiss. If nothing matches, try the global tag
414        # and if even that does not match (no setting saved at all) return the
415        # default
416        for tag in tags:
417            if tag in cached_values and cached_values.get(tag) is not CacheMiss:
418                value = cached_values[tag]
419                break
420        else:
421            value = None
422
423        # parse some values...
424        if not is_json and value is not None:
425            value = json.loads(value)
426        # TODO: Which default should have priority? The provided default feels like it should be the highest priority, but I think that is an old implementation and perhaps should be removed. - Dale
427        elif value is None and attribute_name in self.config_definition and "default" in self.config_definition[attribute_name]:
428            value = self.config_definition[attribute_name]["default"]
429        elif value is None and default is not None:
430            value = default
431
432        return value

Get a setting's value from the database

If the setting does not exist, the provided fallback value is returned.

:param str attribute_name:  Setting to return.
:param default:  Value to return if setting does not exist
:param bool is_json:  if True, the value is returned as stored and not
interpreted as JSON if it comes from the database
:param user:  User object or name. Adds a tag `user:[username]` in
front of the tag list.
:param tags:  Tag or tags for the required setting. If a tag is
provided, the method checks if a special value for the setting exists
with the given tag, and returns that if one exists. First matching tag
wins.
Parameters
  • MemcacheClient memcache: Memcache client. If None, a thread-local client will be used.

    :return: Setting value, or the provided fallback, or None.

def get_active_tags(self, user=None, tags=None, memcache=None):
434    def get_active_tags(self, user=None, tags=None, memcache=None):
435        """
436        Get active tags for given user/tag list
437
438        Used internally to harmonize tag setting for various methods, but can
439        also be called directly to verify tag activation.
440
441        :param user:  User object or name. Adds a tag `user:[username]` in
442        front of the tag list.
443        :param tags:  Tag or tags for the required setting. If a tag is
444        provided, the method checks if a special value for the setting exists
445        with the given tag, and returns that if one exists. First matching tag
446        wins.
447    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
448        :return list:  List of tags
449        """
450        # be flexible about the input types here
451        if tags is None:
452            tags = []
453        elif type(tags) is str:
454            tags = [tags]
455
456        user = self._normalise_user(user)
457
458        # user-specific settings are just a special type of tag (which takes
459        # precedence), same goes for user groups. so if a user was passed, get
460        # that user's tags (including the 'special' user: tag) and add them
461        # to the list
462        if user:
463            user_tags = CacheMiss
464            
465            if not memcache:
466                memcache = self.get_memcache()
467                
468            if memcache:
469                memcache_id = f"_usertags-{user}"
470                user_tags = memcache.get(memcache_id, default=CacheMiss)
471
472            if user_tags is CacheMiss:
473                user_tags = self.db.fetchone("SELECT tags FROM users WHERE name = %s", (user,))
474                if user_tags and memcache:
475                    memcache.set(memcache_id, user_tags)
476
477            if user_tags:
478                try:
479                    tags.extend(user_tags["tags"])
480                except (TypeError, ValueError):
481                    # should be a JSON list, but isn't
482                    pass
483
484            tags.insert(0, f"user:{user}")
485
486        return tags

Get active tags for given user/tag list

Used internally to harmonize tag setting for various methods, but can
also be called directly to verify tag activation.

:param user:  User object or name. Adds a tag `user:[username]` in
front of the tag list.
:param tags:  Tag or tags for the required setting. If a tag is
provided, the method checks if a special value for the setting exists
with the given tag, and returns that if one exists. First matching tag
wins.
Parameters
  • MemcacheClient memcache: Memcache client. If None, a thread-local client will be used. :return list: List of tags
def set( self, attribute_name, value, is_json=False, tag='', overwrite_existing=True, memcache=None):
488    def set(self, attribute_name, value, is_json=False, tag="", overwrite_existing=True, memcache=None):
489        """
490        Insert OR set value for a setting
491
492        If overwrite_existing=True and the setting exists, the setting is updated; if overwrite_existing=False and the
493        setting exists the setting is not updated.
494
495        :param str attribute_name:  Attribute to set
496        :param value:  Value to set (will be serialised as JSON)
497        :param bool is_json:  True for a value that is already a serialised JSON string; False if value is object that needs to
498                          be serialised into a JSON string
499        :param bool overwrite_existing: True will overwrite existing setting, False will do nothing if setting exists
500        :param str tag:  Tag to write setting for
501    :param MemcacheClient memcache:  Memcache client. If `None`, a thread-local client will be used.
502
503        :return int: number of updated rows
504        """
505        # Check value is valid JSON
506        if is_json:
507            try:
508                json.dumps(json.loads(value))
509            except json.JSONDecodeError:
510                return None
511        else:
512            try:
513                value = json.dumps(value)
514            except json.JSONDecodeError:
515                return None
516
517        if attribute_name in self.config_definition and self.config_definition.get(attribute_name).get("global"):
518            tag = ""
519
520        if overwrite_existing:
521            query = "INSERT INTO settings (name, value, tag) VALUES (%s, %s, %s) ON CONFLICT (name, tag) DO UPDATE SET value = EXCLUDED.value"
522        else:
523            query = "INSERT INTO settings (name, value, tag) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING"
524
525        self.db.execute(query, (attribute_name, value, tag))
526        updated_rows = self.db.cursor.rowcount
527        self.db.log.debug(f"Updated setting for {attribute_name}: {value} (tag: {tag})")
528
529        if not memcache:
530            memcache = self.get_memcache()
531
532        if memcache:
533            # invalidate any cached value for this setting
534            memcache_id = self._get_memcache_id(attribute_name, tag)
535            memcache.delete(memcache_id)
536
537        return updated_rows

Insert OR set value for a setting

If overwrite_existing=True and the setting exists, the setting is updated; if overwrite_existing=False and the
setting exists the setting is not updated.

:param str attribute_name:  Attribute to set
:param value:  Value to set (will be serialised as JSON)
:param bool is_json:  True for a value that is already a serialised JSON string; False if value is object that needs to
                  be serialised into a JSON string
:param bool overwrite_existing: True will overwrite existing setting, False will do nothing if setting exists
:param str tag:  Tag to write setting for
Parameters
  • MemcacheClient memcache: Memcache client. If None, a thread-local client will be used.

    :return int: number of updated rows

def delete_for_tag(self, attribute_name, tag):
539    def delete_for_tag(self, attribute_name, tag):
540        """
541        Delete config override for a given tag
542
543        :param str attribute_name:
544        :param str tag:
545        :return int: number of deleted rows
546        """
547        self.db.delete("settings", where={"name": attribute_name, "tag": tag})
548        updated_rows = self.db.cursor.rowcount
549        client = self.get_memcache()
550        if client:
551            client.delete(self._get_memcache_id(attribute_name, tag))
552        return updated_rows

Delete config override for a given tag

Parameters
  • str attribute_name:
  • str tag:
Returns

number of deleted rows

def clear_cache(self):
554    def clear_cache(self):
555        """
556        Clear cached configuration values
557
558        Called when the backend restarts - helps start with a blank slate.
559        """
560        client = self.get_memcache()
561        if not client:
562            return
563        client.flush_all()

Clear cached configuration values

Called when the backend restarts - helps start with a blank slate.

def uncache_user_tags(self, users):
565    def uncache_user_tags(self, users):
566        """
567        Clear cached user tags
568
569        User tags are cached with memcache if possible to avoid unnecessary
570        database roundtrips. This method clears the cached user tags, in case
571        a tag is added/deleted from a user.
572
573        :param list users:  List of users, as usernames or User objects
574        """
575        client = self.get_memcache()
576        if client:
577            for user in users:
578                user = self._normalise_user(user)
579                client.delete(f"_usertags-{user}")

Clear cached user tags

User tags are cached with memcache if possible to avoid unnecessary database roundtrips. This method clears the cached user tags, in case a tag is added/deleted from a user.

Parameters
  • list users: List of users, as usernames or User objects
class ConfigWrapper(BaseConfigReader):
650class ConfigWrapper(BaseConfigReader):
651    """
652    Wrapper for the config manager
653
654    Allows setting a default set of tags or user, so that all subsequent calls
655    to `get()` are done for those tags or that user. Can also adjust tags based
656    on the HTTP request, if used in a Flask context.
657    """
658    def __init__(self, config, user=None, tags=None, request=None):
659        """
660        Initialise config wrapper
661
662        :param ConfigManager config:  Initialised config manager
663        :param user:  User to get settings for
664        :param tags:  Tags to get settings for
665        :param request:  Request to get headers from. This can be used to set
666        a particular tag based on the HTTP headers of the request, e.g. to
667        serve 4CAT with a different configuration based on the proxy server
668        used.
669        """
670        if type(config) is ConfigWrapper:
671            # let's not do nested wrappers, but copy properties unless
672            # provided explicitly
673            self.user = user if user else config.user
674            self.tags = tags if tags else config.tags
675            self.request = request if request else config.request
676            self.config = config.config
677            # legacy: previous versions cached a per-request memcache client; now resolved inside ConfigManager
678        else:
679            self.config = config
680            self.user = user
681            self.tags = tags
682            self.request = request
683
684        # this ensures the user object in turn reads from the wrapper
685        if self.user:
686            self.user.with_config(self, rewrap=False)
687
688
689    def set(self, *args, **kwargs):
690        """
691        Wrap `set()`
692
693        :param args:
694        :param kwargs:
695        :return:
696        """
697        if "tag" not in kwargs and self.tags:
698            kwargs["tag"] = self.tags
699
700        # ConfigManager resolves thread-local memcache internally
701
702        return self.config.set(*args, **kwargs)
703
704    def get_all(self, *args, **kwargs):
705        """
706        Wrap `get_all()`
707
708        Takes the `user`, `tags` and `request` given when initialised into
709        account. If `tags` is set explicitly, the HTTP header-based override
710        is not applied.
711
712        :param args:
713        :param kwargs:
714        :return:
715        """
716        if "user" not in kwargs and self.user:
717            kwargs["user"] = self.user
718
719        if "tags" not in kwargs:
720            kwargs["tags"] = self.tags if self.tags else []
721            kwargs["tags"] = self.request_override(kwargs["tags"])
722
723        # ConfigManager resolves thread-local memcache internally
724
725        return self.config.get_all(*args, **kwargs)
726
727    def get(self, *args, **kwargs):
728        """
729        Wrap `get()`
730
731        Takes the `user`, `tags` and `request` given when initialised into
732        account. If `tags` is set explicitly, the HTTP header-based override
733        is not applied.
734
735        :param args:
736        :param kwargs:
737        :return:
738        """
739        if "user" not in kwargs:
740            kwargs["user"] = self.user
741
742        if "tags" not in kwargs:
743            kwargs["tags"] = self.tags if self.tags else []
744            kwargs["tags"] = self.request_override(kwargs["tags"])
745
746        # ConfigManager resolves thread-local memcache internally
747
748        return self.config.get(*args, **kwargs)
749
750    def get_active_tags(self, user=None, tags=None):
751        """
752        Wrap `get_active_tags()`
753
754        Takes the `user`, `tags` and `request` given when initialised into
755        account. If `tags` is set explicitly, the HTTP header-based override
756        is not applied.
757
758        :param user:
759        :param tags:
760        :return list:
761        """
762        active_tags = self.config.get_active_tags(user, tags)
763        if not tags:
764            active_tags = self.request_override(active_tags)
765        return active_tags
766
767    def request_override(self, tags):
768        """
769        Force tag via HTTP request headers
770
771        To facilitate loading different configurations based on the HTTP
772        request, the request object can be passed to the ConfigWrapper and
773        if a certain request header is set, the value of that header will be
774        added to the list of tags to consider when retrieving settings.
775
776        See the flask.proxy_secret config setting; this is used to prevent
777        users from changing configuration by forging the header.
778
779        :param list|str tags:  List of tags to extend based on request
780        :return list:  Amended list of tags
781        """
782        if type(tags) is str:
783            tags = [tags]
784
785        # use self.config.get here, not self.get, because else we get infinite
786        # recursion (since self.get can call this method)
787        if self.request and self.request.headers.get("X-4Cat-Config-Tag") and \
788            self.config.get("flask.proxy_secret") and \
789            self.request.headers.get("X-4Cat-Config-Via-Proxy") == self.config.get("flask.proxy_secret"):
790            # need to ensure not just anyone can add this header to their
791            # request!
792            # to this end, the second header must be set to the secret value;
793            # if it is not set, assume the headers are not being configured by
794            # the proxy server
795            if not tags:
796                tags = []
797
798            # can never set admin tag via headers (should always be user-based)
799            forbidden_overrides = ("admin",)
800            tags += [tag for tag in self.request.headers.get("X-4Cat-Config-Tag").split(",") if tag not in forbidden_overrides]
801
802        return tags
803
804
805    def __getattr__(self, item):
806        """
807        Generic wrapper
808
809        Just pipe everything through to the config object
810
811        :param item:
812        :return:
813        """
814        if hasattr(self.config, item):
815            return getattr(self.config, item)
816        elif hasattr(self, item):
817            return getattr(self, item)
818        else:
819            raise AttributeError(f"'{self.__name__}' object has no attribute '{item}'")

Wrapper for the config manager

Allows setting a default set of tags or user, so that all subsequent calls to get() are done for those tags or that user. Can also adjust tags based on the HTTP request, if used in a Flask context.

ConfigWrapper(config, user=None, tags=None, request=None)
658    def __init__(self, config, user=None, tags=None, request=None):
659        """
660        Initialise config wrapper
661
662        :param ConfigManager config:  Initialised config manager
663        :param user:  User to get settings for
664        :param tags:  Tags to get settings for
665        :param request:  Request to get headers from. This can be used to set
666        a particular tag based on the HTTP headers of the request, e.g. to
667        serve 4CAT with a different configuration based on the proxy server
668        used.
669        """
670        if type(config) is ConfigWrapper:
671            # let's not do nested wrappers, but copy properties unless
672            # provided explicitly
673            self.user = user if user else config.user
674            self.tags = tags if tags else config.tags
675            self.request = request if request else config.request
676            self.config = config.config
677            # legacy: previous versions cached a per-request memcache client; now resolved inside ConfigManager
678        else:
679            self.config = config
680            self.user = user
681            self.tags = tags
682            self.request = request
683
684        # this ensures the user object in turn reads from the wrapper
685        if self.user:
686            self.user.with_config(self, rewrap=False)

Initialise config wrapper

Parameters
  • ConfigManager config: Initialised config manager
  • user: User to get settings for
  • tags: Tags to get settings for
  • request: Request to get headers from. This can be used to set a particular tag based on the HTTP headers of the request, e.g. to serve 4CAT with a different configuration based on the proxy server used.
def set(self, *args, **kwargs):
689    def set(self, *args, **kwargs):
690        """
691        Wrap `set()`
692
693        :param args:
694        :param kwargs:
695        :return:
696        """
697        if "tag" not in kwargs and self.tags:
698            kwargs["tag"] = self.tags
699
700        # ConfigManager resolves thread-local memcache internally
701
702        return self.config.set(*args, **kwargs)

Wrap set()

Parameters
  • args:
  • kwargs:
Returns
def get_all(self, *args, **kwargs):
704    def get_all(self, *args, **kwargs):
705        """
706        Wrap `get_all()`
707
708        Takes the `user`, `tags` and `request` given when initialised into
709        account. If `tags` is set explicitly, the HTTP header-based override
710        is not applied.
711
712        :param args:
713        :param kwargs:
714        :return:
715        """
716        if "user" not in kwargs and self.user:
717            kwargs["user"] = self.user
718
719        if "tags" not in kwargs:
720            kwargs["tags"] = self.tags if self.tags else []
721            kwargs["tags"] = self.request_override(kwargs["tags"])
722
723        # ConfigManager resolves thread-local memcache internally
724
725        return self.config.get_all(*args, **kwargs)

Wrap get_all()

Takes the user, tags and request given when initialised into account. If tags is set explicitly, the HTTP header-based override is not applied.

Parameters
  • args:
  • kwargs:
Returns
def get(self, *args, **kwargs):
727    def get(self, *args, **kwargs):
728        """
729        Wrap `get()`
730
731        Takes the `user`, `tags` and `request` given when initialised into
732        account. If `tags` is set explicitly, the HTTP header-based override
733        is not applied.
734
735        :param args:
736        :param kwargs:
737        :return:
738        """
739        if "user" not in kwargs:
740            kwargs["user"] = self.user
741
742        if "tags" not in kwargs:
743            kwargs["tags"] = self.tags if self.tags else []
744            kwargs["tags"] = self.request_override(kwargs["tags"])
745
746        # ConfigManager resolves thread-local memcache internally
747
748        return self.config.get(*args, **kwargs)

Wrap get()

Takes the user, tags and request given when initialised into account. If tags is set explicitly, the HTTP header-based override is not applied.

Parameters
  • args:
  • kwargs:
Returns
def get_active_tags(self, user=None, tags=None):
750    def get_active_tags(self, user=None, tags=None):
751        """
752        Wrap `get_active_tags()`
753
754        Takes the `user`, `tags` and `request` given when initialised into
755        account. If `tags` is set explicitly, the HTTP header-based override
756        is not applied.
757
758        :param user:
759        :param tags:
760        :return list:
761        """
762        active_tags = self.config.get_active_tags(user, tags)
763        if not tags:
764            active_tags = self.request_override(active_tags)
765        return active_tags

Wrap get_active_tags()

Takes the user, tags and request given when initialised into account. If tags is set explicitly, the HTTP header-based override is not applied.

Parameters
  • user:
  • tags:
Returns
def request_override(self, tags):
767    def request_override(self, tags):
768        """
769        Force tag via HTTP request headers
770
771        To facilitate loading different configurations based on the HTTP
772        request, the request object can be passed to the ConfigWrapper and
773        if a certain request header is set, the value of that header will be
774        added to the list of tags to consider when retrieving settings.
775
776        See the flask.proxy_secret config setting; this is used to prevent
777        users from changing configuration by forging the header.
778
779        :param list|str tags:  List of tags to extend based on request
780        :return list:  Amended list of tags
781        """
782        if type(tags) is str:
783            tags = [tags]
784
785        # use self.config.get here, not self.get, because else we get infinite
786        # recursion (since self.get can call this method)
787        if self.request and self.request.headers.get("X-4Cat-Config-Tag") and \
788            self.config.get("flask.proxy_secret") and \
789            self.request.headers.get("X-4Cat-Config-Via-Proxy") == self.config.get("flask.proxy_secret"):
790            # need to ensure not just anyone can add this header to their
791            # request!
792            # to this end, the second header must be set to the secret value;
793            # if it is not set, assume the headers are not being configured by
794            # the proxy server
795            if not tags:
796                tags = []
797
798            # can never set admin tag via headers (should always be user-based)
799            forbidden_overrides = ("admin",)
800            tags += [tag for tag in self.request.headers.get("X-4Cat-Config-Tag").split(",") if tag not in forbidden_overrides]
801
802        return tags

Force tag via HTTP request headers

To facilitate loading different configurations based on the HTTP request, the request object can be passed to the ConfigWrapper and if a certain request header is set, the value of that header will be added to the list of tags to consider when retrieving settings.

See the flask.proxy_secret config setting; this is used to prevent users from changing configuration by forging the header.

Parameters
  • list|str tags: List of tags to extend based on request
Returns

Amended list of tags

class CoreConfigManager(ConfigManager):
822class CoreConfigManager(ConfigManager):
823    """
824    A configuration reader that can only read from core settings
825
826    Can be used in thread-unsafe context and when no database is present.
827    """
828    def with_db(self, db=None):
829        """
830        Raise a RuntimeError when trying to link a database connection
831
832        :param db:
833        """
834        raise RuntimeError("Trying to read non-core configuration value from a CoreConfigManager")

A configuration reader that can only read from core settings

Can be used in thread-unsafe context and when no database is present.

def with_db(self, db=None):
828    def with_db(self, db=None):
829        """
830        Raise a RuntimeError when trying to link a database connection
831
832        :param db:
833        """
834        raise RuntimeError("Trying to read non-core configuration value from a CoreConfigManager")

Raise a RuntimeError when trying to link a database connection

Parameters
  • db: