Edit on GitHub

common.lib.user_input

  1from dateutil.parser import parse as parse_datetime
  2from common.lib.exceptions import QueryParametersException
  3from werkzeug.datastructures import ImmutableMultiDict
  4import json
  5
  6import re
  7
  8class RequirementsNotMetException(Exception):
  9    """
 10    If this is raised while parsing, that option is not included in the parsed
 11    output. Used with the "requires" option setting.
 12    """
 13    pass
 14
 15class UserInput:
 16    """
 17    Class for handling user input
 18
 19    It is important to sanitise user input, as carelessly entered parameters
 20    may in e.g. requesting far more data than needed, or lead to undefined
 21    behaviour. This class offers a set of pre-defined value types that can be
 22    consistently rendered as form elements in an interface and parsed.
 23    """
 24    OPTION_TOGGLE = "toggle"  # boolean toggle (checkbox)
 25    OPTION_CHOICE = "choice"  # one choice out of a list (select)
 26    OPTION_TEXT = "string"  # simple string or integer (input text)
 27    OPTION_MULTI = "multi"  # multiple values out of a list (select multiple)
 28    OPTION_MULTI_SELECT = "multi_select"  # multiple values out of a dropdown list (select multiple)
 29    OPTION_INFO = "info"  # just a bit of text, not actual input
 30    OPTION_TEXT_LARGE = "textarea"  # longer text
 31    OPTION_TEXT_JSON = "json"  # text, but should be valid JSON
 32    OPTION_DATE = "date"  # a single date
 33    OPTION_DATERANGE = "daterange"  # a beginning and end date
 34    OPTION_DIVIDER = "divider"  # meta-option, divides related sets of options
 35    OPTION_FILE = "file"  # file upload
 36    OPTION_HUE = "hue"  # colour hue
 37    OPTION_DATASOURCES = "datasources"  # data source toggling
 38    OPTION_DATASOURCES_TABLE = "datasources_table"  # a table with settings per data source
 39    OPTION_ANNOTATION = "annotation"  # checkbox for whether to an annotation
 40    OPTION_ANNOTATIONS = "annotations"  # table for whether to write multiple annotations
 41
 42    OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER)
 43
 44    @staticmethod
 45    def parse_all(options, input, silently_correct=True):
 46        """
 47        Parse form input for the provided options
 48
 49        Ignores all input not belonging to any of the defined options: parses
 50        and sanitises the rest, and returns a dictionary with the sanitised
 51        options. If an option is *not* present in the input, the default value
 52        is used, and if that is absent, `None`.
 53
 54        In other words, this ensures a dictionary with 1) only white-listed
 55        keys, 2) a value of an expected type for each key.
 56
 57        :param dict options:  Options, as a name -> settings dictionary
 58        :param dict input:  Input, as a form field -> value dictionary
 59        :param bool silently_correct:  If true, replace invalid values with the
 60        given default value; else, raise a QueryParametersException if a value
 61        is invalid.
 62
 63        :return dict:  Sanitised form input
 64        """
 65
 66        from common.lib.helpers import convert_to_int
 67        parsed_input = {}
 68
 69        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 70            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 71
 72        if type(input) is ImmutableMultiDict:
 73            # we are not using to_dict, because that messes up multi-selects
 74            input = {key: input.getlist(key) for key in input}
 75            for key, value in input.items():
 76                if type(value) is list and len(value) == 1:
 77                    input[key] = value[0]
 78
 79        # all parameters are submitted as option-[parameter ID], this is an 
 80        # artifact of how the web interface works and we can simply remove the
 81        # prefix
 82        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 83
 84        # re-order input so that the fields relying on the value of other
 85        # fields are parsed last
 86        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 87
 88        for option, settings in options.items():
 89            if settings.get("indirect"):
 90                # these are settings that are derived from and set by other
 91                # settings
 92                continue
 93
 94            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 95                # these are structural form elements and never have a value
 96                continue
 97
 98            elif settings.get("type") == UserInput.OPTION_DATERANGE:
 99                # special case, since it combines two inputs
100                option_min = option + "-min"
101                option_max = option + "-max"
102
103                # normally this is taken care of client-side, but in case this
104                # didn't work, try to salvage it server-side
105                if option_min not in input or input.get(option_min) == "-1":
106                    option_min += "_proxy"
107
108                if option_max not in input or input.get(option_max) == "-1":
109                    option_max += "_proxy"
110
111                # save as a tuple of unix timestamps (or None)
112                try:
113                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
114
115                    if before and after and after > before:
116                        if not silently_correct:
117                            raise QueryParametersException("End of date range must be after beginning of date range.")
118                        else:
119                            before = after
120
121                    parsed_input[option] = (after, before)
122                except RequirementsNotMetException:
123                    pass
124
125            elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
126                # special case too, since if a checkbox is unchecked, it simply
127                # does not show up in the input
128                try:
129                    if option in input:
130                        # Toggle needs to be parsed
131                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
132                    else:
133                        # Toggle was left blank
134                        parsed_input[option] = False
135                except RequirementsNotMetException:
136                    pass
137
138            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
139                # special case, because this combines multiple inputs to
140                # configure data source availability and expiration
141                datasources = {datasource: {
142                    "enabled": f"{option}-enable-{datasource}" in input,
143                    "allow_optout": f"{option}-optout-{datasource}" in input,
144                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
145                } for datasource in input[option].split(",")}
146
147                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
148                parsed_input[option.split(".")[0] + ".expiration"] = datasources
149
150            elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE:
151                # special case, parse table values to generate a dict
152                columns = list(settings["columns"].keys())
153                table_input = {}
154
155                for datasource in list(settings["default"].keys()):
156                    table_input[datasource] = {}
157                    for column in columns:
158
159                        choice = input.get(option + "-" + datasource + "-" + column, False)
160                        column_settings = settings["columns"][column]  # sub-settings per column
161                        table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True)
162
163                parsed_input[option] = table_input
164
165            elif option not in input:
166                # not provided? use default
167                parsed_input[option] = settings.get("default", None)
168
169            else:
170                # normal parsing and sanitisation
171                try:
172                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
173                except RequirementsNotMetException:
174                    pass
175
176        return parsed_input
177
178    @staticmethod
179    def parse_value(settings, choice, other_input=None, silently_correct=True):
180        """
181        Filter user input
182
183        Makes sure user input for post-processors is valid and within the
184        parameters specified by the post-processor
185
186        :param obj settings:  Settings, including defaults and valid options
187        :param choice:  The chosen option, to be parsed
188        :param dict other_input:  Other input, as parsed so far
189        :param bool silently_correct:  If true, replace invalid values with the
190        given default value; else, raise a QueryParametersException if a value
191        is invalid.
192
193        :return:  Validated and parsed input
194        """
195        # short-circuit if there is a requirement for the field to be parsed
196        # and the requirement isn't met
197        if settings.get("requires"):
198            try:
199                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
200            except IndexError:
201                # invalid condition, interpret as 'does the field with this name have a value'
202                field, operator, value = (choice, "!=", "")
203
204            if field not in other_input:
205                raise RequirementsNotMetException()
206
207            other_value = other_input.get(field)
208            if type(other_value) is bool:
209                # evalues to a boolean, i.e. checkboxes etc
210                if operator == "!=":
211                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
212                        raise RequirementsNotMetException()
213                else:
214                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
215                        raise RequirementsNotMetException()
216
217            else:
218                if type(other_value) in (tuple, list):
219                # iterables are a bit special
220                    if len(other_value) == 1:
221                        # treat one-item lists as "normal" values
222                        other_value = other_value[0]
223                    elif operator == "~=":  # interpret as 'is in list?'
224                        if value not in other_value:
225                            raise RequirementsNotMetException()
226                    else:
227                        # condition doesn't make sense for a list, so assume it's not True
228                        raise RequirementsNotMetException()
229
230                if operator == "^=" and not str(other_value).startswith(value):
231                    raise RequirementsNotMetException()
232                elif operator == "$=" and not str(other_value).endswith(value):
233                    raise RequirementsNotMetException()
234                elif operator == "~=" and value not in str(other_value):
235                    raise RequirementsNotMetException()
236                elif operator == "!=" and value == other_value:
237                    raise RequirementsNotMetException()
238                elif operator in ("==", "=") and value != other_value:
239                    raise RequirementsNotMetException()
240
241        input_type = settings.get("type", "")
242        if input_type in UserInput.OPTIONS_COSMETIC:
243            # these are structural form elements and can never return a value
244            return None
245
246        elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
247            # simple boolean toggle
248            if type(choice) is bool:
249                return choice
250            elif choice in ['false', 'False']:
251                # Sanitized options passed back to Flask can be converted to strings as 'false'
252                return False
253            elif choice in ['true', 'True', 'on']:
254                # Toggle will have value 'on', but may also becomes a string 'true'
255                return True
256            else:
257                raise QueryParametersException("Toggle invalid input")
258
259        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
260            # parse either integers (unix timestamps) or try to guess the date
261            # format (the latter may be used for input if JavaScript is turned
262            # off in the front-end and the input comes from there)
263            value = None
264            try:
265                value = int(choice)
266            except ValueError:
267                parsed_choice = parse_datetime(choice)
268                value = int(parsed_choice.timestamp())
269            finally:
270                return value
271
272        elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS):
273            # any number of values out of a list of possible values
274            # comma-separated during input, returned as a list of valid options
275            if not choice:
276                return settings.get("default", [])
277
278            chosen = choice.split(",")
279            return [item for item in chosen if item in settings.get("options", [])]
280
281        elif input_type == UserInput.OPTION_MULTI_SELECT:
282            # multiple number of values out of a dropdown list of possible values
283            # comma-separated during input, returned as a list of valid options
284            if not choice:
285                return settings.get("default", [])
286
287            if type(choice) is str:
288                # should be a list if the form control was actually a multiselect
289                # but we have some client side UI helpers that may produce a string
290                # instead
291                choice = choice.split(",")
292
293            return [item for item in choice if item in settings.get("options", [])]
294
295        elif input_type == UserInput.OPTION_CHOICE:
296            # select box
297            # one out of multiple options
298            # return option if valid, or default
299            if choice not in settings.get("options"):
300                if not silently_correct:
301                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
302                else:
303                    return settings.get("default", "")
304            else:
305                return choice
306
307        elif input_type == UserInput.OPTION_TEXT_JSON:
308            # verify that this is actually json
309            try:
310                json.dumps(json.loads(choice))
311            except json.JSONDecodeError:
312                raise QueryParametersException("Invalid JSON value '%s'" % choice)
313
314            return json.loads(choice)
315
316        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
317            # text string
318            # optionally clamp it as an integer; return default if not a valid
319            # integer (or float; inferred from default or made explicit via the
320            # coerce_type setting)
321            if settings.get("coerce_type"):
322                value_type = settings["coerce_type"]
323            else:
324                value_type = type(settings.get("default"))
325                if value_type not in (int, float):
326                    value_type = int
327
328            if "max" in settings:
329                try:
330                    choice = min(settings["max"], value_type(choice))
331                except (ValueError, TypeError):
332                    if not silently_correct:
333                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
334
335                    choice = settings.get("default")
336
337            if "min" in settings:
338                try:
339                    choice = max(settings["min"], value_type(choice))
340                except (ValueError, TypeError):
341                    if not silently_correct:
342                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
343
344                    choice = settings.get("default")
345
346            if choice is None or choice == "":
347                choice = settings.get("default")
348
349            if choice is None:
350                choice = 0 if "min" in settings or "max" in settings else ""
351
352            if settings.get("coerce_type"):
353                try:
354                    return value_type(choice)
355                except (ValueError, TypeError):
356                    return settings.get("default")
357            else:
358                return choice
359
360        else:
361            # no filtering
362            return choice
class RequirementsNotMetException(builtins.Exception):
 9class RequirementsNotMetException(Exception):
10    """
11    If this is raised while parsing, that option is not included in the parsed
12    output. Used with the "requires" option setting.
13    """
14    pass

If this is raised while parsing, that option is not included in the parsed output. Used with the "requires" option setting.

class UserInput:
 16class UserInput:
 17    """
 18    Class for handling user input
 19
 20    It is important to sanitise user input, as carelessly entered parameters
 21    may in e.g. requesting far more data than needed, or lead to undefined
 22    behaviour. This class offers a set of pre-defined value types that can be
 23    consistently rendered as form elements in an interface and parsed.
 24    """
 25    OPTION_TOGGLE = "toggle"  # boolean toggle (checkbox)
 26    OPTION_CHOICE = "choice"  # one choice out of a list (select)
 27    OPTION_TEXT = "string"  # simple string or integer (input text)
 28    OPTION_MULTI = "multi"  # multiple values out of a list (select multiple)
 29    OPTION_MULTI_SELECT = "multi_select"  # multiple values out of a dropdown list (select multiple)
 30    OPTION_INFO = "info"  # just a bit of text, not actual input
 31    OPTION_TEXT_LARGE = "textarea"  # longer text
 32    OPTION_TEXT_JSON = "json"  # text, but should be valid JSON
 33    OPTION_DATE = "date"  # a single date
 34    OPTION_DATERANGE = "daterange"  # a beginning and end date
 35    OPTION_DIVIDER = "divider"  # meta-option, divides related sets of options
 36    OPTION_FILE = "file"  # file upload
 37    OPTION_HUE = "hue"  # colour hue
 38    OPTION_DATASOURCES = "datasources"  # data source toggling
 39    OPTION_DATASOURCES_TABLE = "datasources_table"  # a table with settings per data source
 40    OPTION_ANNOTATION = "annotation"  # checkbox for whether to an annotation
 41    OPTION_ANNOTATIONS = "annotations"  # table for whether to write multiple annotations
 42
 43    OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER)
 44
 45    @staticmethod
 46    def parse_all(options, input, silently_correct=True):
 47        """
 48        Parse form input for the provided options
 49
 50        Ignores all input not belonging to any of the defined options: parses
 51        and sanitises the rest, and returns a dictionary with the sanitised
 52        options. If an option is *not* present in the input, the default value
 53        is used, and if that is absent, `None`.
 54
 55        In other words, this ensures a dictionary with 1) only white-listed
 56        keys, 2) a value of an expected type for each key.
 57
 58        :param dict options:  Options, as a name -> settings dictionary
 59        :param dict input:  Input, as a form field -> value dictionary
 60        :param bool silently_correct:  If true, replace invalid values with the
 61        given default value; else, raise a QueryParametersException if a value
 62        is invalid.
 63
 64        :return dict:  Sanitised form input
 65        """
 66
 67        from common.lib.helpers import convert_to_int
 68        parsed_input = {}
 69
 70        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 71            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 72
 73        if type(input) is ImmutableMultiDict:
 74            # we are not using to_dict, because that messes up multi-selects
 75            input = {key: input.getlist(key) for key in input}
 76            for key, value in input.items():
 77                if type(value) is list and len(value) == 1:
 78                    input[key] = value[0]
 79
 80        # all parameters are submitted as option-[parameter ID], this is an 
 81        # artifact of how the web interface works and we can simply remove the
 82        # prefix
 83        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 84
 85        # re-order input so that the fields relying on the value of other
 86        # fields are parsed last
 87        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 88
 89        for option, settings in options.items():
 90            if settings.get("indirect"):
 91                # these are settings that are derived from and set by other
 92                # settings
 93                continue
 94
 95            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 96                # these are structural form elements and never have a value
 97                continue
 98
 99            elif settings.get("type") == UserInput.OPTION_DATERANGE:
100                # special case, since it combines two inputs
101                option_min = option + "-min"
102                option_max = option + "-max"
103
104                # normally this is taken care of client-side, but in case this
105                # didn't work, try to salvage it server-side
106                if option_min not in input or input.get(option_min) == "-1":
107                    option_min += "_proxy"
108
109                if option_max not in input or input.get(option_max) == "-1":
110                    option_max += "_proxy"
111
112                # save as a tuple of unix timestamps (or None)
113                try:
114                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
115
116                    if before and after and after > before:
117                        if not silently_correct:
118                            raise QueryParametersException("End of date range must be after beginning of date range.")
119                        else:
120                            before = after
121
122                    parsed_input[option] = (after, before)
123                except RequirementsNotMetException:
124                    pass
125
126            elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
127                # special case too, since if a checkbox is unchecked, it simply
128                # does not show up in the input
129                try:
130                    if option in input:
131                        # Toggle needs to be parsed
132                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
133                    else:
134                        # Toggle was left blank
135                        parsed_input[option] = False
136                except RequirementsNotMetException:
137                    pass
138
139            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
140                # special case, because this combines multiple inputs to
141                # configure data source availability and expiration
142                datasources = {datasource: {
143                    "enabled": f"{option}-enable-{datasource}" in input,
144                    "allow_optout": f"{option}-optout-{datasource}" in input,
145                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
146                } for datasource in input[option].split(",")}
147
148                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
149                parsed_input[option.split(".")[0] + ".expiration"] = datasources
150
151            elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE:
152                # special case, parse table values to generate a dict
153                columns = list(settings["columns"].keys())
154                table_input = {}
155
156                for datasource in list(settings["default"].keys()):
157                    table_input[datasource] = {}
158                    for column in columns:
159
160                        choice = input.get(option + "-" + datasource + "-" + column, False)
161                        column_settings = settings["columns"][column]  # sub-settings per column
162                        table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True)
163
164                parsed_input[option] = table_input
165
166            elif option not in input:
167                # not provided? use default
168                parsed_input[option] = settings.get("default", None)
169
170            else:
171                # normal parsing and sanitisation
172                try:
173                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
174                except RequirementsNotMetException:
175                    pass
176
177        return parsed_input
178
179    @staticmethod
180    def parse_value(settings, choice, other_input=None, silently_correct=True):
181        """
182        Filter user input
183
184        Makes sure user input for post-processors is valid and within the
185        parameters specified by the post-processor
186
187        :param obj settings:  Settings, including defaults and valid options
188        :param choice:  The chosen option, to be parsed
189        :param dict other_input:  Other input, as parsed so far
190        :param bool silently_correct:  If true, replace invalid values with the
191        given default value; else, raise a QueryParametersException if a value
192        is invalid.
193
194        :return:  Validated and parsed input
195        """
196        # short-circuit if there is a requirement for the field to be parsed
197        # and the requirement isn't met
198        if settings.get("requires"):
199            try:
200                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
201            except IndexError:
202                # invalid condition, interpret as 'does the field with this name have a value'
203                field, operator, value = (choice, "!=", "")
204
205            if field not in other_input:
206                raise RequirementsNotMetException()
207
208            other_value = other_input.get(field)
209            if type(other_value) is bool:
210                # evalues to a boolean, i.e. checkboxes etc
211                if operator == "!=":
212                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
213                        raise RequirementsNotMetException()
214                else:
215                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
216                        raise RequirementsNotMetException()
217
218            else:
219                if type(other_value) in (tuple, list):
220                # iterables are a bit special
221                    if len(other_value) == 1:
222                        # treat one-item lists as "normal" values
223                        other_value = other_value[0]
224                    elif operator == "~=":  # interpret as 'is in list?'
225                        if value not in other_value:
226                            raise RequirementsNotMetException()
227                    else:
228                        # condition doesn't make sense for a list, so assume it's not True
229                        raise RequirementsNotMetException()
230
231                if operator == "^=" and not str(other_value).startswith(value):
232                    raise RequirementsNotMetException()
233                elif operator == "$=" and not str(other_value).endswith(value):
234                    raise RequirementsNotMetException()
235                elif operator == "~=" and value not in str(other_value):
236                    raise RequirementsNotMetException()
237                elif operator == "!=" and value == other_value:
238                    raise RequirementsNotMetException()
239                elif operator in ("==", "=") and value != other_value:
240                    raise RequirementsNotMetException()
241
242        input_type = settings.get("type", "")
243        if input_type in UserInput.OPTIONS_COSMETIC:
244            # these are structural form elements and can never return a value
245            return None
246
247        elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
248            # simple boolean toggle
249            if type(choice) is bool:
250                return choice
251            elif choice in ['false', 'False']:
252                # Sanitized options passed back to Flask can be converted to strings as 'false'
253                return False
254            elif choice in ['true', 'True', 'on']:
255                # Toggle will have value 'on', but may also becomes a string 'true'
256                return True
257            else:
258                raise QueryParametersException("Toggle invalid input")
259
260        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
261            # parse either integers (unix timestamps) or try to guess the date
262            # format (the latter may be used for input if JavaScript is turned
263            # off in the front-end and the input comes from there)
264            value = None
265            try:
266                value = int(choice)
267            except ValueError:
268                parsed_choice = parse_datetime(choice)
269                value = int(parsed_choice.timestamp())
270            finally:
271                return value
272
273        elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS):
274            # any number of values out of a list of possible values
275            # comma-separated during input, returned as a list of valid options
276            if not choice:
277                return settings.get("default", [])
278
279            chosen = choice.split(",")
280            return [item for item in chosen if item in settings.get("options", [])]
281
282        elif input_type == UserInput.OPTION_MULTI_SELECT:
283            # multiple number of values out of a dropdown list of possible values
284            # comma-separated during input, returned as a list of valid options
285            if not choice:
286                return settings.get("default", [])
287
288            if type(choice) is str:
289                # should be a list if the form control was actually a multiselect
290                # but we have some client side UI helpers that may produce a string
291                # instead
292                choice = choice.split(",")
293
294            return [item for item in choice if item in settings.get("options", [])]
295
296        elif input_type == UserInput.OPTION_CHOICE:
297            # select box
298            # one out of multiple options
299            # return option if valid, or default
300            if choice not in settings.get("options"):
301                if not silently_correct:
302                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
303                else:
304                    return settings.get("default", "")
305            else:
306                return choice
307
308        elif input_type == UserInput.OPTION_TEXT_JSON:
309            # verify that this is actually json
310            try:
311                json.dumps(json.loads(choice))
312            except json.JSONDecodeError:
313                raise QueryParametersException("Invalid JSON value '%s'" % choice)
314
315            return json.loads(choice)
316
317        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
318            # text string
319            # optionally clamp it as an integer; return default if not a valid
320            # integer (or float; inferred from default or made explicit via the
321            # coerce_type setting)
322            if settings.get("coerce_type"):
323                value_type = settings["coerce_type"]
324            else:
325                value_type = type(settings.get("default"))
326                if value_type not in (int, float):
327                    value_type = int
328
329            if "max" in settings:
330                try:
331                    choice = min(settings["max"], value_type(choice))
332                except (ValueError, TypeError):
333                    if not silently_correct:
334                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
335
336                    choice = settings.get("default")
337
338            if "min" in settings:
339                try:
340                    choice = max(settings["min"], value_type(choice))
341                except (ValueError, TypeError):
342                    if not silently_correct:
343                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
344
345                    choice = settings.get("default")
346
347            if choice is None or choice == "":
348                choice = settings.get("default")
349
350            if choice is None:
351                choice = 0 if "min" in settings or "max" in settings else ""
352
353            if settings.get("coerce_type"):
354                try:
355                    return value_type(choice)
356                except (ValueError, TypeError):
357                    return settings.get("default")
358            else:
359                return choice
360
361        else:
362            # no filtering
363            return choice

Class for handling user input

It is important to sanitise user input, as carelessly entered parameters may in e.g. requesting far more data than needed, or lead to undefined behaviour. This class offers a set of pre-defined value types that can be consistently rendered as form elements in an interface and parsed.

OPTION_TOGGLE = 'toggle'
OPTION_CHOICE = 'choice'
OPTION_TEXT = 'string'
OPTION_MULTI = 'multi'
OPTION_MULTI_SELECT = 'multi_select'
OPTION_INFO = 'info'
OPTION_TEXT_LARGE = 'textarea'
OPTION_TEXT_JSON = 'json'
OPTION_DATE = 'date'
OPTION_DATERANGE = 'daterange'
OPTION_DIVIDER = 'divider'
OPTION_FILE = 'file'
OPTION_HUE = 'hue'
OPTION_DATASOURCES = 'datasources'
OPTION_DATASOURCES_TABLE = 'datasources_table'
OPTION_ANNOTATION = 'annotation'
OPTION_ANNOTATIONS = 'annotations'
OPTIONS_COSMETIC = ('info', 'divider')
@staticmethod
def parse_all(options, input, silently_correct=True):
 45    @staticmethod
 46    def parse_all(options, input, silently_correct=True):
 47        """
 48        Parse form input for the provided options
 49
 50        Ignores all input not belonging to any of the defined options: parses
 51        and sanitises the rest, and returns a dictionary with the sanitised
 52        options. If an option is *not* present in the input, the default value
 53        is used, and if that is absent, `None`.
 54
 55        In other words, this ensures a dictionary with 1) only white-listed
 56        keys, 2) a value of an expected type for each key.
 57
 58        :param dict options:  Options, as a name -> settings dictionary
 59        :param dict input:  Input, as a form field -> value dictionary
 60        :param bool silently_correct:  If true, replace invalid values with the
 61        given default value; else, raise a QueryParametersException if a value
 62        is invalid.
 63
 64        :return dict:  Sanitised form input
 65        """
 66
 67        from common.lib.helpers import convert_to_int
 68        parsed_input = {}
 69
 70        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 71            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 72
 73        if type(input) is ImmutableMultiDict:
 74            # we are not using to_dict, because that messes up multi-selects
 75            input = {key: input.getlist(key) for key in input}
 76            for key, value in input.items():
 77                if type(value) is list and len(value) == 1:
 78                    input[key] = value[0]
 79
 80        # all parameters are submitted as option-[parameter ID], this is an 
 81        # artifact of how the web interface works and we can simply remove the
 82        # prefix
 83        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 84
 85        # re-order input so that the fields relying on the value of other
 86        # fields are parsed last
 87        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 88
 89        for option, settings in options.items():
 90            if settings.get("indirect"):
 91                # these are settings that are derived from and set by other
 92                # settings
 93                continue
 94
 95            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 96                # these are structural form elements and never have a value
 97                continue
 98
 99            elif settings.get("type") == UserInput.OPTION_DATERANGE:
100                # special case, since it combines two inputs
101                option_min = option + "-min"
102                option_max = option + "-max"
103
104                # normally this is taken care of client-side, but in case this
105                # didn't work, try to salvage it server-side
106                if option_min not in input or input.get(option_min) == "-1":
107                    option_min += "_proxy"
108
109                if option_max not in input or input.get(option_max) == "-1":
110                    option_max += "_proxy"
111
112                # save as a tuple of unix timestamps (or None)
113                try:
114                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
115
116                    if before and after and after > before:
117                        if not silently_correct:
118                            raise QueryParametersException("End of date range must be after beginning of date range.")
119                        else:
120                            before = after
121
122                    parsed_input[option] = (after, before)
123                except RequirementsNotMetException:
124                    pass
125
126            elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
127                # special case too, since if a checkbox is unchecked, it simply
128                # does not show up in the input
129                try:
130                    if option in input:
131                        # Toggle needs to be parsed
132                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
133                    else:
134                        # Toggle was left blank
135                        parsed_input[option] = False
136                except RequirementsNotMetException:
137                    pass
138
139            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
140                # special case, because this combines multiple inputs to
141                # configure data source availability and expiration
142                datasources = {datasource: {
143                    "enabled": f"{option}-enable-{datasource}" in input,
144                    "allow_optout": f"{option}-optout-{datasource}" in input,
145                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
146                } for datasource in input[option].split(",")}
147
148                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
149                parsed_input[option.split(".")[0] + ".expiration"] = datasources
150
151            elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE:
152                # special case, parse table values to generate a dict
153                columns = list(settings["columns"].keys())
154                table_input = {}
155
156                for datasource in list(settings["default"].keys()):
157                    table_input[datasource] = {}
158                    for column in columns:
159
160                        choice = input.get(option + "-" + datasource + "-" + column, False)
161                        column_settings = settings["columns"][column]  # sub-settings per column
162                        table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True)
163
164                parsed_input[option] = table_input
165
166            elif option not in input:
167                # not provided? use default
168                parsed_input[option] = settings.get("default", None)
169
170            else:
171                # normal parsing and sanitisation
172                try:
173                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
174                except RequirementsNotMetException:
175                    pass
176
177        return parsed_input

Parse form input for the provided options

Ignores all input not belonging to any of the defined options: parses and sanitises the rest, and returns a dictionary with the sanitised options. If an option is not present in the input, the default value is used, and if that is absent, None.

In other words, this ensures a dictionary with 1) only white-listed keys, 2) a value of an expected type for each key.

Parameters
  • dict options: Options, as a name -> settings dictionary
  • dict input: Input, as a form field -> value dictionary
  • bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns

Sanitised form input

@staticmethod
def parse_value(settings, choice, other_input=None, silently_correct=True):
179    @staticmethod
180    def parse_value(settings, choice, other_input=None, silently_correct=True):
181        """
182        Filter user input
183
184        Makes sure user input for post-processors is valid and within the
185        parameters specified by the post-processor
186
187        :param obj settings:  Settings, including defaults and valid options
188        :param choice:  The chosen option, to be parsed
189        :param dict other_input:  Other input, as parsed so far
190        :param bool silently_correct:  If true, replace invalid values with the
191        given default value; else, raise a QueryParametersException if a value
192        is invalid.
193
194        :return:  Validated and parsed input
195        """
196        # short-circuit if there is a requirement for the field to be parsed
197        # and the requirement isn't met
198        if settings.get("requires"):
199            try:
200                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
201            except IndexError:
202                # invalid condition, interpret as 'does the field with this name have a value'
203                field, operator, value = (choice, "!=", "")
204
205            if field not in other_input:
206                raise RequirementsNotMetException()
207
208            other_value = other_input.get(field)
209            if type(other_value) is bool:
210                # evalues to a boolean, i.e. checkboxes etc
211                if operator == "!=":
212                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
213                        raise RequirementsNotMetException()
214                else:
215                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
216                        raise RequirementsNotMetException()
217
218            else:
219                if type(other_value) in (tuple, list):
220                # iterables are a bit special
221                    if len(other_value) == 1:
222                        # treat one-item lists as "normal" values
223                        other_value = other_value[0]
224                    elif operator == "~=":  # interpret as 'is in list?'
225                        if value not in other_value:
226                            raise RequirementsNotMetException()
227                    else:
228                        # condition doesn't make sense for a list, so assume it's not True
229                        raise RequirementsNotMetException()
230
231                if operator == "^=" and not str(other_value).startswith(value):
232                    raise RequirementsNotMetException()
233                elif operator == "$=" and not str(other_value).endswith(value):
234                    raise RequirementsNotMetException()
235                elif operator == "~=" and value not in str(other_value):
236                    raise RequirementsNotMetException()
237                elif operator == "!=" and value == other_value:
238                    raise RequirementsNotMetException()
239                elif operator in ("==", "=") and value != other_value:
240                    raise RequirementsNotMetException()
241
242        input_type = settings.get("type", "")
243        if input_type in UserInput.OPTIONS_COSMETIC:
244            # these are structural form elements and can never return a value
245            return None
246
247        elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION):
248            # simple boolean toggle
249            if type(choice) is bool:
250                return choice
251            elif choice in ['false', 'False']:
252                # Sanitized options passed back to Flask can be converted to strings as 'false'
253                return False
254            elif choice in ['true', 'True', 'on']:
255                # Toggle will have value 'on', but may also becomes a string 'true'
256                return True
257            else:
258                raise QueryParametersException("Toggle invalid input")
259
260        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
261            # parse either integers (unix timestamps) or try to guess the date
262            # format (the latter may be used for input if JavaScript is turned
263            # off in the front-end and the input comes from there)
264            value = None
265            try:
266                value = int(choice)
267            except ValueError:
268                parsed_choice = parse_datetime(choice)
269                value = int(parsed_choice.timestamp())
270            finally:
271                return value
272
273        elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS):
274            # any number of values out of a list of possible values
275            # comma-separated during input, returned as a list of valid options
276            if not choice:
277                return settings.get("default", [])
278
279            chosen = choice.split(",")
280            return [item for item in chosen if item in settings.get("options", [])]
281
282        elif input_type == UserInput.OPTION_MULTI_SELECT:
283            # multiple number of values out of a dropdown list of possible values
284            # comma-separated during input, returned as a list of valid options
285            if not choice:
286                return settings.get("default", [])
287
288            if type(choice) is str:
289                # should be a list if the form control was actually a multiselect
290                # but we have some client side UI helpers that may produce a string
291                # instead
292                choice = choice.split(",")
293
294            return [item for item in choice if item in settings.get("options", [])]
295
296        elif input_type == UserInput.OPTION_CHOICE:
297            # select box
298            # one out of multiple options
299            # return option if valid, or default
300            if choice not in settings.get("options"):
301                if not silently_correct:
302                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
303                else:
304                    return settings.get("default", "")
305            else:
306                return choice
307
308        elif input_type == UserInput.OPTION_TEXT_JSON:
309            # verify that this is actually json
310            try:
311                json.dumps(json.loads(choice))
312            except json.JSONDecodeError:
313                raise QueryParametersException("Invalid JSON value '%s'" % choice)
314
315            return json.loads(choice)
316
317        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
318            # text string
319            # optionally clamp it as an integer; return default if not a valid
320            # integer (or float; inferred from default or made explicit via the
321            # coerce_type setting)
322            if settings.get("coerce_type"):
323                value_type = settings["coerce_type"]
324            else:
325                value_type = type(settings.get("default"))
326                if value_type not in (int, float):
327                    value_type = int
328
329            if "max" in settings:
330                try:
331                    choice = min(settings["max"], value_type(choice))
332                except (ValueError, TypeError):
333                    if not silently_correct:
334                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
335
336                    choice = settings.get("default")
337
338            if "min" in settings:
339                try:
340                    choice = max(settings["min"], value_type(choice))
341                except (ValueError, TypeError):
342                    if not silently_correct:
343                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
344
345                    choice = settings.get("default")
346
347            if choice is None or choice == "":
348                choice = settings.get("default")
349
350            if choice is None:
351                choice = 0 if "min" in settings or "max" in settings else ""
352
353            if settings.get("coerce_type"):
354                try:
355                    return value_type(choice)
356                except (ValueError, TypeError):
357                    return settings.get("default")
358            else:
359                return choice
360
361        else:
362            # no filtering
363            return choice

Filter user input

Makes sure user input for post-processors is valid and within the parameters specified by the post-processor

Parameters
  • obj settings: Settings, including defaults and valid options
  • choice: The chosen option, to be parsed
  • dict other_input: Other input, as parsed so far
  • bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns

Validated and parsed input