Edit on GitHub

common.lib.user_input

  1from dateutil.parser import parse as parse_datetime
  2from common.lib.exceptions import QueryParametersException
  3from werkzeug.datastructures import ImmutableMultiDict
  4import json
  5
  6import re
  7
  8class RequirementsNotMetException(Exception):
  9    """
 10    If this is raised while parsing, that option is not included in the parsed
 11    output. Used with the "requires" option setting.
 12    """
 13    pass
 14
 15class UserInput:
 16    """
 17    Class for handling user input
 18
 19    It is important to sanitise user input, as carelessly entered parameters
 20    may in e.g. requesting far more data than needed, or lead to undefined
 21    behaviour. This class offers a set of pre-defined value types that can be
 22    consistently rendered as form elements in an interface and parsed.
 23    """
 24    OPTION_TOGGLE = "toggle"  # boolean toggle (checkbox)
 25    OPTION_CHOICE = "choice"  # one choice out of a list (select)
 26    OPTION_TEXT = "string"  # simple string or integer (input text)
 27    OPTION_MULTI = "multi"  # multiple values out of a list (select multiple)
 28    OPTION_MULTI_SELECT = "multi_select"  # multiple values out of a dropdown list (select multiple)
 29    OPTION_INFO = "info"  # just a bit of text, not actual input
 30    OPTION_TEXT_LARGE = "textarea"  # longer text
 31    OPTION_TEXT_JSON = "json"  # text, but should be valid JSON
 32    OPTION_DATE = "date"  # a single date
 33    OPTION_DATERANGE = "daterange"  # a beginning and end date
 34    OPTION_DIVIDER = "divider"  # meta-option, divides related sets of options
 35    OPTION_FILE = "file"  # file upload
 36    OPTION_HUE = "hue"  # colour hue
 37    OPTION_DATASOURCES = "datasources"  # data source toggling
 38    OPTION_EXTENSIONS = "extensions"  # extension toggling
 39    OPTION_DATASOURCES_TABLE = "datasources_table"  # a table with settings per data source
 40    OPTION_ANNOTATION = "annotation"  # checkbox for whether to an annotation
 41    OPTION_ANNOTATIONS = "annotations"  # table for whether to write multiple annotations
 42
 43    OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER)
 44
 45    @staticmethod
 46    def parse_all(options, input, silently_correct=True):
 47        """
 48        Parse form input for the provided options
 49
 50        Ignores all input not belonging to any of the defined options: parses
 51        and sanitises the rest, and returns a dictionary with the sanitised
 52        options. If an option is *not* present in the input, the default value
 53        is used, and if that is absent, `None`.
 54
 55        In other words, this ensures a dictionary with 1) only white-listed
 56        keys, 2) a value of an expected type for each key.
 57
 58        :param dict options:  Options, as a name -> settings dictionary
 59        :param dict input:  Input, as a form field -> value dictionary
 60        :param bool silently_correct:  If true, replace invalid values with the
 61        given default value; else, raise a QueryParametersException if a value
 62        is invalid.
 63
 64        :return dict:  Sanitised form input
 65        """
 66
 67        from common.lib.helpers import convert_to_int
 68        parsed_input = {}
 69
 70        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 71            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 72
 73        if type(input) is ImmutableMultiDict:
 74            # we are not using to_dict, because that messes up multi-selects
 75            input = {key: input.getlist(key) for key in input}
 76            for key, value in input.items():
 77                if type(value) is list and len(value) == 1:
 78                    input[key] = value[0]
 79
 80        # all parameters are submitted as option-[parameter ID], this is an 
 81        # artifact of how the web interface works and we can simply remove the
 82        # prefix
 83        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 84
 85        # re-order input so that the fields relying on the value of other
 86        # fields are parsed last
 87        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 88
 89        for option, settings in options.items():
 90            if settings.get("indirect"):
 91                # these are settings that are derived from and set by other
 92                # settings
 93                continue
 94
 95            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 96                # these are structural form elements and never have a value
 97                continue
 98
 99            elif settings.get("type") == UserInput.OPTION_DATERANGE:
100                # special case, since it combines two inputs
101                option_min = option + "-min"
102                option_max = option + "-max"
103
104                # normally this is taken care of client-side, but in case this
105                # didn't work, try to salvage it server-side
106                if option_min not in input or input.get(option_min) == "-1":
107                    option_min += "_proxy"
108
109                if option_max not in input or input.get(option_max) == "-1":
110                    option_max += "_proxy"
111
112                # save as a tuple of unix timestamps (or None)
113                try:
114                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
115
116                    if before and after and after > before:
117                        if not silently_correct:
118                            raise QueryParametersException("End of date range must be after beginning of date range.")
119                        else:
120                            before = after
121
122                    parsed_input[option] = (after, before)
123                except RequirementsNotMetException:
124                    pass
125
126            elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
127                # special case too, since if a checkbox is unchecked, it simply
128                # does not show up in the input
129                try:
130                    if option in input:
131                        # Toggle needs to be parsed
132                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
133                    else:
134                        # Toggle was left blank
135                        parsed_input[option] = False
136                except RequirementsNotMetException:
137                    pass
138
139            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
140                # special case, because this combines multiple inputs to
141                # configure data source availability and expiration
142                datasources = {datasource: {
143                    "enabled": f"{option}-enable-{datasource}" in input,
144                    "allow_optout": f"{option}-optout-{datasource}" in input,
145                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
146                } for datasource in input[option].split(",")}
147
148                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
149                parsed_input[option.split(".")[0] + ".expiration"] = datasources
150
151            elif settings.get("type") == UserInput.OPTION_EXTENSIONS:
152                # also a special case
153                parsed_input[option] = {extension: {
154                    "enabled": f"{option}-enable-{extension}" in input
155                } for extension in input[option].split(",")}
156
157            elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE:
158                # special case, parse table values to generate a dict
159                columns = list(settings["columns"].keys())
160                table_input = {}
161
162                for datasource in list(settings["default"].keys()):
163                    table_input[datasource] = {}
164                    for column in columns:
165
166                        choice = input.get(option + "-" + datasource + "-" + column, False)
167                        column_settings = settings["columns"][column]  # sub-settings per column
168                        table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True)
169
170                parsed_input[option] = table_input
171
172            elif option not in input:
173                # not provided? use default
174                parsed_input[option] = settings.get("default", None)
175
176            else:
177                # normal parsing and sanitisation
178                try:
179                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
180                except RequirementsNotMetException:
181                    pass
182
183        return parsed_input
184
185    @staticmethod
186    def parse_value(settings, choice, other_input=None, silently_correct=True):
187        """
188        Filter user input
189
190        Makes sure user input for post-processors is valid and within the
191        parameters specified by the post-processor
192
193        :param obj settings:  Settings, including defaults and valid options
194        :param choice:  The chosen option, to be parsed
195        :param dict other_input:  Other input, as parsed so far
196        :param bool silently_correct:  If true, replace invalid values with the
197        given default value; else, raise a QueryParametersException if a value
198        is invalid.
199
200        :return:  Validated and parsed input
201        """
202        # short-circuit if there is a requirement for the field to be parsed
203        # and the requirement isn't met
204        if settings.get("requires"):
205            try:
206                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
207            except IndexError:
208                # invalid condition, interpret as 'does the field with this name have a value'
209                field, operator, value = (choice, "!=", "")
210
211            if field not in other_input:
212                raise RequirementsNotMetException()
213
214            other_value = other_input.get(field)
215            if type(other_value) is bool:
216                # evalues to a boolean, i.e. checkboxes etc
217                if operator == "!=":
218                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
219                        raise RequirementsNotMetException()
220                else:
221                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
222                        raise RequirementsNotMetException()
223
224            else:
225                if type(other_value) in (tuple, list):
226                # iterables are a bit special
227                    if len(other_value) == 1:
228                        # treat one-item lists as "normal" values
229                        other_value = other_value[0]
230                    elif operator == "~=":  # interpret as 'is in list?'
231                        if value not in other_value:
232                            raise RequirementsNotMetException()
233                    else:
234                        # condition doesn't make sense for a list, so assume it's not True
235                        raise RequirementsNotMetException()
236
237                if operator == "^=" and not str(other_value).startswith(value):
238                    raise RequirementsNotMetException()
239                elif operator == "$=" and not str(other_value).endswith(value):
240                    raise RequirementsNotMetException()
241                elif operator == "~=" and value not in str(other_value):
242                    raise RequirementsNotMetException()
243                elif operator == "!=" and value == other_value:
244                    raise RequirementsNotMetException()
245                elif operator in ("==", "=") and value != other_value:
246                    raise RequirementsNotMetException()
247
248        input_type = settings.get("type", "")
249        if input_type in UserInput.OPTIONS_COSMETIC:
250            # these are structural form elements and can never return a value
251            return None
252
253        elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
254            # simple boolean toggle
255            if type(choice) is bool:
256                return choice
257            elif choice in ['false', 'False']:
258                # Sanitized options passed back to Flask can be converted to strings as 'false'
259                return False
260            elif choice in ['true', 'True', 'on']:
261                # Toggle will have value 'on', but may also becomes a string 'true'
262                return True
263            else:
264                raise QueryParametersException("Toggle invalid input")
265
266        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
267            # parse either integers (unix timestamps) or try to guess the date
268            # format (the latter may be used for input if JavaScript is turned
269            # off in the front-end and the input comes from there)
270            value = None
271            try:
272                value = int(choice)
273            except ValueError:
274                parsed_choice = parse_datetime(choice)
275                value = int(parsed_choice.timestamp())
276            finally:
277                return value
278
279        elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS):
280            # any number of values out of a list of possible values
281            # comma-separated during input, returned as a list of valid options
282            if not choice:
283                return settings.get("default", [])
284
285            chosen = choice.split(",")
286            return [item for item in chosen if item in settings.get("options", [])]
287
288        elif input_type == UserInput.OPTION_MULTI_SELECT:
289            # multiple number of values out of a dropdown list of possible values
290            # comma-separated during input, returned as a list of valid options
291            if not choice:
292                return settings.get("default", [])
293
294            if type(choice) is str:
295                # should be a list if the form control was actually a multiselect
296                # but we have some client side UI helpers that may produce a string
297                # instead
298                choice = choice.split(",")
299
300            return [item for item in choice if item in settings.get("options", [])]
301
302        elif input_type == UserInput.OPTION_CHOICE:
303            # select box
304            # one out of multiple options
305            # return option if valid, or default
306            if choice not in settings.get("options"):
307                if not silently_correct:
308                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
309                else:
310                    return settings.get("default", "")
311            else:
312                return choice
313
314        elif input_type == UserInput.OPTION_TEXT_JSON:
315            # verify that this is actually json
316            try:
317                json.dumps(json.loads(choice))
318            except json.JSONDecodeError:
319                raise QueryParametersException("Invalid JSON value '%s'" % choice)
320
321            return json.loads(choice)
322
323        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
324            # text string
325            # optionally clamp it as an integer; return default if not a valid
326            # integer (or float; inferred from default or made explicit via the
327            # coerce_type setting)
328            if settings.get("coerce_type"):
329                value_type = settings["coerce_type"]
330            else:
331                value_type = type(settings.get("default"))
332                if value_type not in (int, float):
333                    value_type = int
334
335            if "max" in settings:
336                try:
337                    choice = min(settings["max"], value_type(choice))
338                except (ValueError, TypeError):
339                    if not silently_correct:
340                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
341
342                    choice = settings.get("default")
343
344            if "min" in settings:
345                try:
346                    choice = max(settings["min"], value_type(choice))
347                except (ValueError, TypeError):
348                    if not silently_correct:
349                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
350
351                    choice = settings.get("default")
352
353            if choice is None or choice == "":
354                choice = settings.get("default")
355
356            if choice is None:
357                choice = 0 if "min" in settings or "max" in settings else ""
358
359            if settings.get("coerce_type"):
360                try:
361                    return value_type(choice)
362                except (ValueError, TypeError):
363                    return settings.get("default")
364            else:
365                return choice
366
367        else:
368            # no filtering
369            return choice
class RequirementsNotMetException(builtins.Exception):
 9class RequirementsNotMetException(Exception):
10    """
11    If this is raised while parsing, that option is not included in the parsed
12    output. Used with the "requires" option setting.
13    """
14    pass

If this is raised while parsing, that option is not included in the parsed output. Used with the "requires" option setting.

class UserInput:
 16class UserInput:
 17    """
 18    Class for handling user input
 19
 20    It is important to sanitise user input, as carelessly entered parameters
 21    may in e.g. requesting far more data than needed, or lead to undefined
 22    behaviour. This class offers a set of pre-defined value types that can be
 23    consistently rendered as form elements in an interface and parsed.
 24    """
 25    OPTION_TOGGLE = "toggle"  # boolean toggle (checkbox)
 26    OPTION_CHOICE = "choice"  # one choice out of a list (select)
 27    OPTION_TEXT = "string"  # simple string or integer (input text)
 28    OPTION_MULTI = "multi"  # multiple values out of a list (select multiple)
 29    OPTION_MULTI_SELECT = "multi_select"  # multiple values out of a dropdown list (select multiple)
 30    OPTION_INFO = "info"  # just a bit of text, not actual input
 31    OPTION_TEXT_LARGE = "textarea"  # longer text
 32    OPTION_TEXT_JSON = "json"  # text, but should be valid JSON
 33    OPTION_DATE = "date"  # a single date
 34    OPTION_DATERANGE = "daterange"  # a beginning and end date
 35    OPTION_DIVIDER = "divider"  # meta-option, divides related sets of options
 36    OPTION_FILE = "file"  # file upload
 37    OPTION_HUE = "hue"  # colour hue
 38    OPTION_DATASOURCES = "datasources"  # data source toggling
 39    OPTION_EXTENSIONS = "extensions"  # extension toggling
 40    OPTION_DATASOURCES_TABLE = "datasources_table"  # a table with settings per data source
 41    OPTION_ANNOTATION = "annotation"  # checkbox for whether to an annotation
 42    OPTION_ANNOTATIONS = "annotations"  # table for whether to write multiple annotations
 43
 44    OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER)
 45
 46    @staticmethod
 47    def parse_all(options, input, silently_correct=True):
 48        """
 49        Parse form input for the provided options
 50
 51        Ignores all input not belonging to any of the defined options: parses
 52        and sanitises the rest, and returns a dictionary with the sanitised
 53        options. If an option is *not* present in the input, the default value
 54        is used, and if that is absent, `None`.
 55
 56        In other words, this ensures a dictionary with 1) only white-listed
 57        keys, 2) a value of an expected type for each key.
 58
 59        :param dict options:  Options, as a name -> settings dictionary
 60        :param dict input:  Input, as a form field -> value dictionary
 61        :param bool silently_correct:  If true, replace invalid values with the
 62        given default value; else, raise a QueryParametersException if a value
 63        is invalid.
 64
 65        :return dict:  Sanitised form input
 66        """
 67
 68        from common.lib.helpers import convert_to_int
 69        parsed_input = {}
 70
 71        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 72            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 73
 74        if type(input) is ImmutableMultiDict:
 75            # we are not using to_dict, because that messes up multi-selects
 76            input = {key: input.getlist(key) for key in input}
 77            for key, value in input.items():
 78                if type(value) is list and len(value) == 1:
 79                    input[key] = value[0]
 80
 81        # all parameters are submitted as option-[parameter ID], this is an 
 82        # artifact of how the web interface works and we can simply remove the
 83        # prefix
 84        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 85
 86        # re-order input so that the fields relying on the value of other
 87        # fields are parsed last
 88        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 89
 90        for option, settings in options.items():
 91            if settings.get("indirect"):
 92                # these are settings that are derived from and set by other
 93                # settings
 94                continue
 95
 96            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 97                # these are structural form elements and never have a value
 98                continue
 99
100            elif settings.get("type") == UserInput.OPTION_DATERANGE:
101                # special case, since it combines two inputs
102                option_min = option + "-min"
103                option_max = option + "-max"
104
105                # normally this is taken care of client-side, but in case this
106                # didn't work, try to salvage it server-side
107                if option_min not in input or input.get(option_min) == "-1":
108                    option_min += "_proxy"
109
110                if option_max not in input or input.get(option_max) == "-1":
111                    option_max += "_proxy"
112
113                # save as a tuple of unix timestamps (or None)
114                try:
115                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
116
117                    if before and after and after > before:
118                        if not silently_correct:
119                            raise QueryParametersException("End of date range must be after beginning of date range.")
120                        else:
121                            before = after
122
123                    parsed_input[option] = (after, before)
124                except RequirementsNotMetException:
125                    pass
126
127            elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
128                # special case too, since if a checkbox is unchecked, it simply
129                # does not show up in the input
130                try:
131                    if option in input:
132                        # Toggle needs to be parsed
133                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
134                    else:
135                        # Toggle was left blank
136                        parsed_input[option] = False
137                except RequirementsNotMetException:
138                    pass
139
140            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
141                # special case, because this combines multiple inputs to
142                # configure data source availability and expiration
143                datasources = {datasource: {
144                    "enabled": f"{option}-enable-{datasource}" in input,
145                    "allow_optout": f"{option}-optout-{datasource}" in input,
146                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
147                } for datasource in input[option].split(",")}
148
149                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
150                parsed_input[option.split(".")[0] + ".expiration"] = datasources
151
152            elif settings.get("type") == UserInput.OPTION_EXTENSIONS:
153                # also a special case
154                parsed_input[option] = {extension: {
155                    "enabled": f"{option}-enable-{extension}" in input
156                } for extension in input[option].split(",")}
157
158            elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE:
159                # special case, parse table values to generate a dict
160                columns = list(settings["columns"].keys())
161                table_input = {}
162
163                for datasource in list(settings["default"].keys()):
164                    table_input[datasource] = {}
165                    for column in columns:
166
167                        choice = input.get(option + "-" + datasource + "-" + column, False)
168                        column_settings = settings["columns"][column]  # sub-settings per column
169                        table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True)
170
171                parsed_input[option] = table_input
172
173            elif option not in input:
174                # not provided? use default
175                parsed_input[option] = settings.get("default", None)
176
177            else:
178                # normal parsing and sanitisation
179                try:
180                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
181                except RequirementsNotMetException:
182                    pass
183
184        return parsed_input
185
186    @staticmethod
187    def parse_value(settings, choice, other_input=None, silently_correct=True):
188        """
189        Filter user input
190
191        Makes sure user input for post-processors is valid and within the
192        parameters specified by the post-processor
193
194        :param obj settings:  Settings, including defaults and valid options
195        :param choice:  The chosen option, to be parsed
196        :param dict other_input:  Other input, as parsed so far
197        :param bool silently_correct:  If true, replace invalid values with the
198        given default value; else, raise a QueryParametersException if a value
199        is invalid.
200
201        :return:  Validated and parsed input
202        """
203        # short-circuit if there is a requirement for the field to be parsed
204        # and the requirement isn't met
205        if settings.get("requires"):
206            try:
207                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
208            except IndexError:
209                # invalid condition, interpret as 'does the field with this name have a value'
210                field, operator, value = (choice, "!=", "")
211
212            if field not in other_input:
213                raise RequirementsNotMetException()
214
215            other_value = other_input.get(field)
216            if type(other_value) is bool:
217                # evalues to a boolean, i.e. checkboxes etc
218                if operator == "!=":
219                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
220                        raise RequirementsNotMetException()
221                else:
222                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
223                        raise RequirementsNotMetException()
224
225            else:
226                if type(other_value) in (tuple, list):
227                # iterables are a bit special
228                    if len(other_value) == 1:
229                        # treat one-item lists as "normal" values
230                        other_value = other_value[0]
231                    elif operator == "~=":  # interpret as 'is in list?'
232                        if value not in other_value:
233                            raise RequirementsNotMetException()
234                    else:
235                        # condition doesn't make sense for a list, so assume it's not True
236                        raise RequirementsNotMetException()
237
238                if operator == "^=" and not str(other_value).startswith(value):
239                    raise RequirementsNotMetException()
240                elif operator == "$=" and not str(other_value).endswith(value):
241                    raise RequirementsNotMetException()
242                elif operator == "~=" and value not in str(other_value):
243                    raise RequirementsNotMetException()
244                elif operator == "!=" and value == other_value:
245                    raise RequirementsNotMetException()
246                elif operator in ("==", "=") and value != other_value:
247                    raise RequirementsNotMetException()
248
249        input_type = settings.get("type", "")
250        if input_type in UserInput.OPTIONS_COSMETIC:
251            # these are structural form elements and can never return a value
252            return None
253
254        elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
255            # simple boolean toggle
256            if type(choice) is bool:
257                return choice
258            elif choice in ['false', 'False']:
259                # Sanitized options passed back to Flask can be converted to strings as 'false'
260                return False
261            elif choice in ['true', 'True', 'on']:
262                # Toggle will have value 'on', but may also becomes a string 'true'
263                return True
264            else:
265                raise QueryParametersException("Toggle invalid input")
266
267        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
268            # parse either integers (unix timestamps) or try to guess the date
269            # format (the latter may be used for input if JavaScript is turned
270            # off in the front-end and the input comes from there)
271            value = None
272            try:
273                value = int(choice)
274            except ValueError:
275                parsed_choice = parse_datetime(choice)
276                value = int(parsed_choice.timestamp())
277            finally:
278                return value
279
280        elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS):
281            # any number of values out of a list of possible values
282            # comma-separated during input, returned as a list of valid options
283            if not choice:
284                return settings.get("default", [])
285
286            chosen = choice.split(",")
287            return [item for item in chosen if item in settings.get("options", [])]
288
289        elif input_type == UserInput.OPTION_MULTI_SELECT:
290            # multiple number of values out of a dropdown list of possible values
291            # comma-separated during input, returned as a list of valid options
292            if not choice:
293                return settings.get("default", [])
294
295            if type(choice) is str:
296                # should be a list if the form control was actually a multiselect
297                # but we have some client side UI helpers that may produce a string
298                # instead
299                choice = choice.split(",")
300
301            return [item for item in choice if item in settings.get("options", [])]
302
303        elif input_type == UserInput.OPTION_CHOICE:
304            # select box
305            # one out of multiple options
306            # return option if valid, or default
307            if choice not in settings.get("options"):
308                if not silently_correct:
309                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
310                else:
311                    return settings.get("default", "")
312            else:
313                return choice
314
315        elif input_type == UserInput.OPTION_TEXT_JSON:
316            # verify that this is actually json
317            try:
318                json.dumps(json.loads(choice))
319            except json.JSONDecodeError:
320                raise QueryParametersException("Invalid JSON value '%s'" % choice)
321
322            return json.loads(choice)
323
324        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
325            # text string
326            # optionally clamp it as an integer; return default if not a valid
327            # integer (or float; inferred from default or made explicit via the
328            # coerce_type setting)
329            if settings.get("coerce_type"):
330                value_type = settings["coerce_type"]
331            else:
332                value_type = type(settings.get("default"))
333                if value_type not in (int, float):
334                    value_type = int
335
336            if "max" in settings:
337                try:
338                    choice = min(settings["max"], value_type(choice))
339                except (ValueError, TypeError):
340                    if not silently_correct:
341                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
342
343                    choice = settings.get("default")
344
345            if "min" in settings:
346                try:
347                    choice = max(settings["min"], value_type(choice))
348                except (ValueError, TypeError):
349                    if not silently_correct:
350                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
351
352                    choice = settings.get("default")
353
354            if choice is None or choice == "":
355                choice = settings.get("default")
356
357            if choice is None:
358                choice = 0 if "min" in settings or "max" in settings else ""
359
360            if settings.get("coerce_type"):
361                try:
362                    return value_type(choice)
363                except (ValueError, TypeError):
364                    return settings.get("default")
365            else:
366                return choice
367
368        else:
369            # no filtering
370            return choice

Class for handling user input

It is important to sanitise user input, as carelessly entered parameters may in e.g. requesting far more data than needed, or lead to undefined behaviour. This class offers a set of pre-defined value types that can be consistently rendered as form elements in an interface and parsed.

OPTION_TOGGLE = 'toggle'
OPTION_CHOICE = 'choice'
OPTION_TEXT = 'string'
OPTION_MULTI = 'multi'
OPTION_MULTI_SELECT = 'multi_select'
OPTION_INFO = 'info'
OPTION_TEXT_LARGE = 'textarea'
OPTION_TEXT_JSON = 'json'
OPTION_DATE = 'date'
OPTION_DATERANGE = 'daterange'
OPTION_DIVIDER = 'divider'
OPTION_FILE = 'file'
OPTION_HUE = 'hue'
OPTION_DATASOURCES = 'datasources'
OPTION_EXTENSIONS = 'extensions'
OPTION_DATASOURCES_TABLE = 'datasources_table'
OPTION_ANNOTATION = 'annotation'
OPTION_ANNOTATIONS = 'annotations'
OPTIONS_COSMETIC = ('info', 'divider')
@staticmethod
def parse_all(options, input, silently_correct=True):
 46    @staticmethod
 47    def parse_all(options, input, silently_correct=True):
 48        """
 49        Parse form input for the provided options
 50
 51        Ignores all input not belonging to any of the defined options: parses
 52        and sanitises the rest, and returns a dictionary with the sanitised
 53        options. If an option is *not* present in the input, the default value
 54        is used, and if that is absent, `None`.
 55
 56        In other words, this ensures a dictionary with 1) only white-listed
 57        keys, 2) a value of an expected type for each key.
 58
 59        :param dict options:  Options, as a name -> settings dictionary
 60        :param dict input:  Input, as a form field -> value dictionary
 61        :param bool silently_correct:  If true, replace invalid values with the
 62        given default value; else, raise a QueryParametersException if a value
 63        is invalid.
 64
 65        :return dict:  Sanitised form input
 66        """
 67
 68        from common.lib.helpers import convert_to_int
 69        parsed_input = {}
 70
 71        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 72            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 73
 74        if type(input) is ImmutableMultiDict:
 75            # we are not using to_dict, because that messes up multi-selects
 76            input = {key: input.getlist(key) for key in input}
 77            for key, value in input.items():
 78                if type(value) is list and len(value) == 1:
 79                    input[key] = value[0]
 80
 81        # all parameters are submitted as option-[parameter ID], this is an 
 82        # artifact of how the web interface works and we can simply remove the
 83        # prefix
 84        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 85
 86        # re-order input so that the fields relying on the value of other
 87        # fields are parsed last
 88        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 89
 90        for option, settings in options.items():
 91            if settings.get("indirect"):
 92                # these are settings that are derived from and set by other
 93                # settings
 94                continue
 95
 96            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 97                # these are structural form elements and never have a value
 98                continue
 99
100            elif settings.get("type") == UserInput.OPTION_DATERANGE:
101                # special case, since it combines two inputs
102                option_min = option + "-min"
103                option_max = option + "-max"
104
105                # normally this is taken care of client-side, but in case this
106                # didn't work, try to salvage it server-side
107                if option_min not in input or input.get(option_min) == "-1":
108                    option_min += "_proxy"
109
110                if option_max not in input or input.get(option_max) == "-1":
111                    option_max += "_proxy"
112
113                # save as a tuple of unix timestamps (or None)
114                try:
115                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
116
117                    if before and after and after > before:
118                        if not silently_correct:
119                            raise QueryParametersException("End of date range must be after beginning of date range.")
120                        else:
121                            before = after
122
123                    parsed_input[option] = (after, before)
124                except RequirementsNotMetException:
125                    pass
126
127            elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
128                # special case too, since if a checkbox is unchecked, it simply
129                # does not show up in the input
130                try:
131                    if option in input:
132                        # Toggle needs to be parsed
133                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
134                    else:
135                        # Toggle was left blank
136                        parsed_input[option] = False
137                except RequirementsNotMetException:
138                    pass
139
140            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
141                # special case, because this combines multiple inputs to
142                # configure data source availability and expiration
143                datasources = {datasource: {
144                    "enabled": f"{option}-enable-{datasource}" in input,
145                    "allow_optout": f"{option}-optout-{datasource}" in input,
146                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
147                } for datasource in input[option].split(",")}
148
149                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
150                parsed_input[option.split(".")[0] + ".expiration"] = datasources
151
152            elif settings.get("type") == UserInput.OPTION_EXTENSIONS:
153                # also a special case
154                parsed_input[option] = {extension: {
155                    "enabled": f"{option}-enable-{extension}" in input
156                } for extension in input[option].split(",")}
157
158            elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE:
159                # special case, parse table values to generate a dict
160                columns = list(settings["columns"].keys())
161                table_input = {}
162
163                for datasource in list(settings["default"].keys()):
164                    table_input[datasource] = {}
165                    for column in columns:
166
167                        choice = input.get(option + "-" + datasource + "-" + column, False)
168                        column_settings = settings["columns"][column]  # sub-settings per column
169                        table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True)
170
171                parsed_input[option] = table_input
172
173            elif option not in input:
174                # not provided? use default
175                parsed_input[option] = settings.get("default", None)
176
177            else:
178                # normal parsing and sanitisation
179                try:
180                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
181                except RequirementsNotMetException:
182                    pass
183
184        return parsed_input

Parse form input for the provided options

Ignores all input not belonging to any of the defined options: parses and sanitises the rest, and returns a dictionary with the sanitised options. If an option is not present in the input, the default value is used, and if that is absent, None.

In other words, this ensures a dictionary with 1) only white-listed keys, 2) a value of an expected type for each key.

Parameters
  • dict options: Options, as a name -> settings dictionary
  • dict input: Input, as a form field -> value dictionary
  • bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns

Sanitised form input

@staticmethod
def parse_value(settings, choice, other_input=None, silently_correct=True):
186    @staticmethod
187    def parse_value(settings, choice, other_input=None, silently_correct=True):
188        """
189        Filter user input
190
191        Makes sure user input for post-processors is valid and within the
192        parameters specified by the post-processor
193
194        :param obj settings:  Settings, including defaults and valid options
195        :param choice:  The chosen option, to be parsed
196        :param dict other_input:  Other input, as parsed so far
197        :param bool silently_correct:  If true, replace invalid values with the
198        given default value; else, raise a QueryParametersException if a value
199        is invalid.
200
201        :return:  Validated and parsed input
202        """
203        # short-circuit if there is a requirement for the field to be parsed
204        # and the requirement isn't met
205        if settings.get("requires"):
206            try:
207                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
208            except IndexError:
209                # invalid condition, interpret as 'does the field with this name have a value'
210                field, operator, value = (choice, "!=", "")
211
212            if field not in other_input:
213                raise RequirementsNotMetException()
214
215            other_value = other_input.get(field)
216            if type(other_value) is bool:
217                # evalues to a boolean, i.e. checkboxes etc
218                if operator == "!=":
219                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
220                        raise RequirementsNotMetException()
221                else:
222                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
223                        raise RequirementsNotMetException()
224
225            else:
226                if type(other_value) in (tuple, list):
227                # iterables are a bit special
228                    if len(other_value) == 1:
229                        # treat one-item lists as "normal" values
230                        other_value = other_value[0]
231                    elif operator == "~=":  # interpret as 'is in list?'
232                        if value not in other_value:
233                            raise RequirementsNotMetException()
234                    else:
235                        # condition doesn't make sense for a list, so assume it's not True
236                        raise RequirementsNotMetException()
237
238                if operator == "^=" and not str(other_value).startswith(value):
239                    raise RequirementsNotMetException()
240                elif operator == "$=" and not str(other_value).endswith(value):
241                    raise RequirementsNotMetException()
242                elif operator == "~=" and value not in str(other_value):
243                    raise RequirementsNotMetException()
244                elif operator == "!=" and value == other_value:
245                    raise RequirementsNotMetException()
246                elif operator in ("==", "=") and value != other_value:
247                    raise RequirementsNotMetException()
248
249        input_type = settings.get("type", "")
250        if input_type in UserInput.OPTIONS_COSMETIC:
251            # these are structural form elements and can never return a value
252            return None
253
254        elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
255            # simple boolean toggle
256            if type(choice) is bool:
257                return choice
258            elif choice in ['false', 'False']:
259                # Sanitized options passed back to Flask can be converted to strings as 'false'
260                return False
261            elif choice in ['true', 'True', 'on']:
262                # Toggle will have value 'on', but may also becomes a string 'true'
263                return True
264            else:
265                raise QueryParametersException("Toggle invalid input")
266
267        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
268            # parse either integers (unix timestamps) or try to guess the date
269            # format (the latter may be used for input if JavaScript is turned
270            # off in the front-end and the input comes from there)
271            value = None
272            try:
273                value = int(choice)
274            except ValueError:
275                parsed_choice = parse_datetime(choice)
276                value = int(parsed_choice.timestamp())
277            finally:
278                return value
279
280        elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS):
281            # any number of values out of a list of possible values
282            # comma-separated during input, returned as a list of valid options
283            if not choice:
284                return settings.get("default", [])
285
286            chosen = choice.split(",")
287            return [item for item in chosen if item in settings.get("options", [])]
288
289        elif input_type == UserInput.OPTION_MULTI_SELECT:
290            # multiple number of values out of a dropdown list of possible values
291            # comma-separated during input, returned as a list of valid options
292            if not choice:
293                return settings.get("default", [])
294
295            if type(choice) is str:
296                # should be a list if the form control was actually a multiselect
297                # but we have some client side UI helpers that may produce a string
298                # instead
299                choice = choice.split(",")
300
301            return [item for item in choice if item in settings.get("options", [])]
302
303        elif input_type == UserInput.OPTION_CHOICE:
304            # select box
305            # one out of multiple options
306            # return option if valid, or default
307            if choice not in settings.get("options"):
308                if not silently_correct:
309                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
310                else:
311                    return settings.get("default", "")
312            else:
313                return choice
314
315        elif input_type == UserInput.OPTION_TEXT_JSON:
316            # verify that this is actually json
317            try:
318                json.dumps(json.loads(choice))
319            except json.JSONDecodeError:
320                raise QueryParametersException("Invalid JSON value '%s'" % choice)
321
322            return json.loads(choice)
323
324        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
325            # text string
326            # optionally clamp it as an integer; return default if not a valid
327            # integer (or float; inferred from default or made explicit via the
328            # coerce_type setting)
329            if settings.get("coerce_type"):
330                value_type = settings["coerce_type"]
331            else:
332                value_type = type(settings.get("default"))
333                if value_type not in (int, float):
334                    value_type = int
335
336            if "max" in settings:
337                try:
338                    choice = min(settings["max"], value_type(choice))
339                except (ValueError, TypeError):
340                    if not silently_correct:
341                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
342
343                    choice = settings.get("default")
344
345            if "min" in settings:
346                try:
347                    choice = max(settings["min"], value_type(choice))
348                except (ValueError, TypeError):
349                    if not silently_correct:
350                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
351
352                    choice = settings.get("default")
353
354            if choice is None or choice == "":
355                choice = settings.get("default")
356
357            if choice is None:
358                choice = 0 if "min" in settings or "max" in settings else ""
359
360            if settings.get("coerce_type"):
361                try:
362                    return value_type(choice)
363                except (ValueError, TypeError):
364                    return settings.get("default")
365            else:
366                return choice
367
368        else:
369            # no filtering
370            return choice

Filter user input

Makes sure user input for post-processors is valid and within the parameters specified by the post-processor

Parameters
  • obj settings: Settings, including defaults and valid options
  • choice: The chosen option, to be parsed
  • dict other_input: Other input, as parsed so far
  • bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns

Validated and parsed input