Edit on GitHub

common.lib.user_input

  1from dateutil.parser import parse as parse_datetime
  2from common.lib.exceptions import QueryParametersException
  3from werkzeug.datastructures import ImmutableMultiDict
  4import json
  5
  6import re
  7
  8class RequirementsNotMetException(Exception):
  9    """
 10    If this is raised while parsing, that option is not included in the parsed
 11    output. Used with the "requires" option setting.
 12    """
 13    pass
 14
 15class UserInput:
 16    """
 17    Class for handling user input
 18
 19    It is important to sanitise user input, as carelessly entered parameters
 20    may in e.g. requesting far more data than needed, or lead to undefined
 21    behaviour. This class offers a set of pre-defined value types that can be
 22    consistently rendered as form elements in an interface and parsed.
 23    """
 24    OPTION_TOGGLE = "toggle"  # boolean toggle (checkbox)
 25    OPTION_CHOICE = "choice"  # one choice out of a list (select)
 26    OPTION_TEXT = "string"  # simple string or integer (input text)
 27    OPTION_MULTI = "multi"  # multiple values out of a list (select multiple)
 28    OPTION_MULTI_SELECT = "multi_select"  # multiple values out of a dropdown list (select multiple)
 29    OPTION_INFO = "info"  # just a bit of text, not actual input
 30    OPTION_TEXT_LARGE = "textarea"  # longer text
 31    OPTION_TEXT_JSON = "json"  # text, but should be valid JSON
 32    OPTION_DATE = "date"  # a single date
 33    OPTION_DATERANGE = "daterange"  # a beginning and end date
 34    OPTION_DIVIDER = "divider"  # meta-option, divides related sets of options
 35    OPTION_FILE = "file"  # file upload
 36    OPTION_HUE = "hue"  # colour hue
 37    OPTION_DATASOURCES = "datasources"  # data source toggling
 38
 39    OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER)
 40
 41    @staticmethod
 42    def parse_all(options, input, silently_correct=True):
 43        """
 44        Parse form input for the provided options
 45
 46        Ignores all input not belonging to any of the defined options: parses
 47        and sanitises the rest, and returns a dictionary with the sanitised
 48        options. If an option is *not* present in the input, the default value
 49        is used, and if that is absent, `None`.
 50
 51        In other words, this ensures a dictionary with 1) only white-listed
 52        keys, 2) a value of an expected type for each key.
 53
 54        :param dict options:  Options, as a name -> settings dictionary
 55        :param dict input:  Input, as a form field -> value dictionary
 56        :param bool silently_correct:  If true, replace invalid values with the
 57        given default value; else, raise a QueryParametersException if a value
 58        is invalid.
 59
 60        :return dict:  Sanitised form input
 61        """
 62        from common.lib.helpers import convert_to_int
 63        parsed_input = {}
 64
 65        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 66            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 67
 68        if type(input) is ImmutableMultiDict:
 69            # we are not using to_dict, because that messes up multi-selects
 70            input = {key: input.getlist(key) for key in input}
 71            for key, value in input.items():
 72                if type(value) is list and len(value) == 1:
 73                    input[key] = value[0]
 74
 75        # all parameters are submitted as option-[parameter ID], this is an 
 76        # artifact of how the web interface works and we can simply remove the
 77        # prefix
 78        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 79
 80        # re-order input so that the fields relying on the value of other
 81        # fields are parsed last
 82        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 83
 84        for option, settings in options.items():
 85            if settings.get("indirect"):
 86                # these are settings that are derived from and set by other
 87                # settings
 88                continue
 89
 90            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 91                # these are structural form elements and never have a value
 92                continue
 93
 94            elif settings.get("type") == UserInput.OPTION_DATERANGE:
 95                # special case, since it combines two inputs
 96                option_min = option + "-min"
 97                option_max = option + "-max"
 98
 99                # normally this is taken care of client-side, but in case this
100                # didn't work, try to salvage it server-side
101                if option_min not in input or input.get(option_min) == "-1":
102                    option_min += "_proxy"
103
104                if option_max not in input or input.get(option_max) == "-1":
105                    option_max += "_proxy"
106
107                # save as a tuple of unix timestamps (or None)
108                try:
109                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
110
111                    if before and after and after > before:
112                        if not silently_correct:
113                            raise QueryParametersException("End of date range must be after beginning of date range.")
114                        else:
115                            before = after
116
117                    parsed_input[option] = (after, before)
118                except RequirementsNotMetException:
119                    pass
120
121            elif settings.get("type") == UserInput.OPTION_TOGGLE:
122                # special case too, since if a checkbox is unchecked, it simply
123                # does not show up in the input
124                try:
125                    if option in input:
126                        # Toggle needs to be parsed
127                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
128                    else:
129                        # Toggle was left blank
130                        parsed_input[option] = False
131                except RequirementsNotMetException:
132                    pass
133
134            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
135                # special case, because this combines multiple inputs to
136                # configure data source availability and expiration
137                datasources = {datasource: {
138                    "enabled": f"{option}-enable-{datasource}" in input,
139                    "allow_optout": f"{option}-optout-{datasource}" in input,
140                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
141                } for datasource in input[option].split(",")}
142
143                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
144                parsed_input[option.split(".")[0] + ".expiration"] = datasources
145
146            elif option not in input:
147                # not provided? use default
148                parsed_input[option] = settings.get("default", None)
149
150            else:
151                # normal parsing and sanitisation
152                try:
153                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
154                except RequirementsNotMetException:
155                    pass
156
157        return parsed_input
158
159    @staticmethod
160    def parse_value(settings, choice, other_input=None, silently_correct=True):
161        """
162        Filter user input
163
164        Makes sure user input for post-processors is valid and within the
165        parameters specified by the post-processor
166
167        :param obj settings:  Settings, including defaults and valid options
168        :param choice:  The chosen option, to be parsed
169        :param dict other_input:  Other input, as parsed so far
170        :param bool silently_correct:  If true, replace invalid values with the
171        given default value; else, raise a QueryParametersException if a value
172        is invalid.
173
174        :return:  Validated and parsed input
175        """
176        # short-circuit if there is a requirement for the field to be parsed
177        # and the requirement isn't met
178        if settings.get("requires"):
179            try:
180                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
181            except IndexError:
182                # invalid condition, interpret as 'does the field with this name have a value'
183                field, operator, value = (choice, "!=", "")
184
185            if field not in other_input:
186                raise RequirementsNotMetException()
187
188            other_value = other_input.get(field)
189            if type(other_value) is bool:
190                # evalues to a boolean, i.e. checkboxes etc
191                if operator == "!=":
192                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
193                        raise RequirementsNotMetException()
194                else:
195                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
196                        raise RequirementsNotMetException()
197
198            else:
199                if type(other_value) in (tuple, list):
200                # iterables are a bit special
201                    if len(other_value) == 1:
202                        # treat one-item lists as "normal" values
203                        other_value = other_value[0]
204                    elif operator == "~=":  # interpret as 'is in list?'
205                        if value not in other_value:
206                            raise RequirementsNotMetException()
207                    else:
208                        # condition doesn't make sense for a list, so assume it's not True
209                        raise RequirementsNotMetException()
210
211                if operator == "^=" and not str(other_value).startswith(value):
212                    raise RequirementsNotMetException()
213                elif operator == "$=" and not str(other_value).endswith(value):
214                    raise RequirementsNotMetException()
215                elif operator == "~=" and value not in str(other_value):
216                    raise RequirementsNotMetException()
217                elif operator == "!=" and value == other_value:
218                    raise RequirementsNotMetException()
219                elif operator in ("==", "=") and value != other_value:
220                    raise RequirementsNotMetException()
221
222        input_type = settings.get("type", "")
223        if input_type in UserInput.OPTIONS_COSMETIC:
224            # these are structural form elements and can never return a value
225            return None
226
227        elif input_type == UserInput.OPTION_TOGGLE:
228            # simple boolean toggle
229            if type(choice) == bool:
230                return choice
231            elif choice in ['false', 'False']:
232                # Sanitized options passed back to Flask can be converted to strings as 'false'
233                return False
234            elif choice in ['true', 'True', 'on']:
235                # Toggle will have value 'on', but may also becomes a string 'true'
236                return True
237            else:
238                raise QueryParametersException("Toggle invalid input")
239
240        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
241            # parse either integers (unix timestamps) or try to guess the date
242            # format (the latter may be used for input if JavaScript is turned
243            # off in the front-end and the input comes from there)
244            value = None
245            try:
246                value = int(choice)
247            except ValueError:
248                parsed_choice = parse_datetime(choice)
249                value = int(parsed_choice.timestamp())
250            finally:
251                return value
252
253        elif input_type == UserInput.OPTION_MULTI:
254            # any number of values out of a list of possible values
255            # comma-separated during input, returned as a list of valid options
256            if not choice:
257                return settings.get("default", [])
258
259            chosen = choice.split(",")
260            return [item for item in chosen if item in settings.get("options", [])]
261
262        elif input_type == UserInput.OPTION_MULTI_SELECT:
263            # multiple number of values out of a dropdown list of possible values
264            # comma-separated during input, returned as a list of valid options
265            if not choice:
266                return settings.get("default", [])
267
268            if type(choice) is str:
269                # should be a list if the form control was actually a multiselect
270                # but we have some client side UI helpers that may produce a string
271                # instead
272                choice = choice.split(",")
273
274            return [item for item in choice if item in settings.get("options", [])]
275
276        elif input_type == UserInput.OPTION_CHOICE:
277            # select box
278            # one out of multiple options
279            # return option if valid, or default
280            if choice not in settings.get("options"):
281                if not silently_correct:
282                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
283                else:
284                    return settings.get("default", "")
285            else:
286                return choice
287
288        elif input_type == UserInput.OPTION_TEXT_JSON:
289            # verify that this is actually json
290            try:
291                redumped_value = json.dumps(json.loads(choice))
292            except json.JSONDecodeError:
293                raise QueryParametersException("Invalid JSON value '%s'" % choice)
294
295            return json.loads(choice)
296
297        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
298            # text string
299            # optionally clamp it as an integer; return default if not a valid
300            # integer (or float; inferred from default or made explicit via the
301            # coerce_type setting)
302            if settings.get("coerce_type"):
303                value_type = settings["coerce_type"]
304            else:
305                value_type = type(settings.get("default"))
306                if value_type not in (int, float):
307                    value_type = int
308
309            if "max" in settings:
310                try:
311                    choice = min(settings["max"], value_type(choice))
312                except (ValueError, TypeError) as e:
313                    if not silently_correct:
314                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
315
316                    choice = settings.get("default")
317
318            if "min" in settings:
319                try:
320                    choice = max(settings["min"], value_type(choice))
321                except (ValueError, TypeError) as e:
322                    if not silently_correct:
323                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
324
325                    choice = settings.get("default")
326
327            if choice is None or choice == "":
328                choice = settings.get("default")
329
330            if choice is None:
331                choice = 0 if "min" in settings or "max" in settings else ""
332
333            if settings.get("coerce_type"):
334                try:
335                    return value_type(choice)
336                except (ValueError, TypeError):
337                    return settings.get("default")
338            else:
339                return choice
340
341        else:
342            # no filtering
343            return choice
class RequirementsNotMetException(builtins.Exception):
 9class RequirementsNotMetException(Exception):
10    """
11    If this is raised while parsing, that option is not included in the parsed
12    output. Used with the "requires" option setting.
13    """
14    pass

If this is raised while parsing, that option is not included in the parsed output. Used with the "requires" option setting.

class UserInput:
 16class UserInput:
 17    """
 18    Class for handling user input
 19
 20    It is important to sanitise user input, as carelessly entered parameters
 21    may in e.g. requesting far more data than needed, or lead to undefined
 22    behaviour. This class offers a set of pre-defined value types that can be
 23    consistently rendered as form elements in an interface and parsed.
 24    """
 25    OPTION_TOGGLE = "toggle"  # boolean toggle (checkbox)
 26    OPTION_CHOICE = "choice"  # one choice out of a list (select)
 27    OPTION_TEXT = "string"  # simple string or integer (input text)
 28    OPTION_MULTI = "multi"  # multiple values out of a list (select multiple)
 29    OPTION_MULTI_SELECT = "multi_select"  # multiple values out of a dropdown list (select multiple)
 30    OPTION_INFO = "info"  # just a bit of text, not actual input
 31    OPTION_TEXT_LARGE = "textarea"  # longer text
 32    OPTION_TEXT_JSON = "json"  # text, but should be valid JSON
 33    OPTION_DATE = "date"  # a single date
 34    OPTION_DATERANGE = "daterange"  # a beginning and end date
 35    OPTION_DIVIDER = "divider"  # meta-option, divides related sets of options
 36    OPTION_FILE = "file"  # file upload
 37    OPTION_HUE = "hue"  # colour hue
 38    OPTION_DATASOURCES = "datasources"  # data source toggling
 39
 40    OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER)
 41
 42    @staticmethod
 43    def parse_all(options, input, silently_correct=True):
 44        """
 45        Parse form input for the provided options
 46
 47        Ignores all input not belonging to any of the defined options: parses
 48        and sanitises the rest, and returns a dictionary with the sanitised
 49        options. If an option is *not* present in the input, the default value
 50        is used, and if that is absent, `None`.
 51
 52        In other words, this ensures a dictionary with 1) only white-listed
 53        keys, 2) a value of an expected type for each key.
 54
 55        :param dict options:  Options, as a name -> settings dictionary
 56        :param dict input:  Input, as a form field -> value dictionary
 57        :param bool silently_correct:  If true, replace invalid values with the
 58        given default value; else, raise a QueryParametersException if a value
 59        is invalid.
 60
 61        :return dict:  Sanitised form input
 62        """
 63        from common.lib.helpers import convert_to_int
 64        parsed_input = {}
 65
 66        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 67            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 68
 69        if type(input) is ImmutableMultiDict:
 70            # we are not using to_dict, because that messes up multi-selects
 71            input = {key: input.getlist(key) for key in input}
 72            for key, value in input.items():
 73                if type(value) is list and len(value) == 1:
 74                    input[key] = value[0]
 75
 76        # all parameters are submitted as option-[parameter ID], this is an 
 77        # artifact of how the web interface works and we can simply remove the
 78        # prefix
 79        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 80
 81        # re-order input so that the fields relying on the value of other
 82        # fields are parsed last
 83        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 84
 85        for option, settings in options.items():
 86            if settings.get("indirect"):
 87                # these are settings that are derived from and set by other
 88                # settings
 89                continue
 90
 91            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 92                # these are structural form elements and never have a value
 93                continue
 94
 95            elif settings.get("type") == UserInput.OPTION_DATERANGE:
 96                # special case, since it combines two inputs
 97                option_min = option + "-min"
 98                option_max = option + "-max"
 99
100                # normally this is taken care of client-side, but in case this
101                # didn't work, try to salvage it server-side
102                if option_min not in input or input.get(option_min) == "-1":
103                    option_min += "_proxy"
104
105                if option_max not in input or input.get(option_max) == "-1":
106                    option_max += "_proxy"
107
108                # save as a tuple of unix timestamps (or None)
109                try:
110                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
111
112                    if before and after and after > before:
113                        if not silently_correct:
114                            raise QueryParametersException("End of date range must be after beginning of date range.")
115                        else:
116                            before = after
117
118                    parsed_input[option] = (after, before)
119                except RequirementsNotMetException:
120                    pass
121
122            elif settings.get("type") == UserInput.OPTION_TOGGLE:
123                # special case too, since if a checkbox is unchecked, it simply
124                # does not show up in the input
125                try:
126                    if option in input:
127                        # Toggle needs to be parsed
128                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
129                    else:
130                        # Toggle was left blank
131                        parsed_input[option] = False
132                except RequirementsNotMetException:
133                    pass
134
135            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
136                # special case, because this combines multiple inputs to
137                # configure data source availability and expiration
138                datasources = {datasource: {
139                    "enabled": f"{option}-enable-{datasource}" in input,
140                    "allow_optout": f"{option}-optout-{datasource}" in input,
141                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
142                } for datasource in input[option].split(",")}
143
144                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
145                parsed_input[option.split(".")[0] + ".expiration"] = datasources
146
147            elif option not in input:
148                # not provided? use default
149                parsed_input[option] = settings.get("default", None)
150
151            else:
152                # normal parsing and sanitisation
153                try:
154                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
155                except RequirementsNotMetException:
156                    pass
157
158        return parsed_input
159
160    @staticmethod
161    def parse_value(settings, choice, other_input=None, silently_correct=True):
162        """
163        Filter user input
164
165        Makes sure user input for post-processors is valid and within the
166        parameters specified by the post-processor
167
168        :param obj settings:  Settings, including defaults and valid options
169        :param choice:  The chosen option, to be parsed
170        :param dict other_input:  Other input, as parsed so far
171        :param bool silently_correct:  If true, replace invalid values with the
172        given default value; else, raise a QueryParametersException if a value
173        is invalid.
174
175        :return:  Validated and parsed input
176        """
177        # short-circuit if there is a requirement for the field to be parsed
178        # and the requirement isn't met
179        if settings.get("requires"):
180            try:
181                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
182            except IndexError:
183                # invalid condition, interpret as 'does the field with this name have a value'
184                field, operator, value = (choice, "!=", "")
185
186            if field not in other_input:
187                raise RequirementsNotMetException()
188
189            other_value = other_input.get(field)
190            if type(other_value) is bool:
191                # evalues to a boolean, i.e. checkboxes etc
192                if operator == "!=":
193                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
194                        raise RequirementsNotMetException()
195                else:
196                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
197                        raise RequirementsNotMetException()
198
199            else:
200                if type(other_value) in (tuple, list):
201                # iterables are a bit special
202                    if len(other_value) == 1:
203                        # treat one-item lists as "normal" values
204                        other_value = other_value[0]
205                    elif operator == "~=":  # interpret as 'is in list?'
206                        if value not in other_value:
207                            raise RequirementsNotMetException()
208                    else:
209                        # condition doesn't make sense for a list, so assume it's not True
210                        raise RequirementsNotMetException()
211
212                if operator == "^=" and not str(other_value).startswith(value):
213                    raise RequirementsNotMetException()
214                elif operator == "$=" and not str(other_value).endswith(value):
215                    raise RequirementsNotMetException()
216                elif operator == "~=" and value not in str(other_value):
217                    raise RequirementsNotMetException()
218                elif operator == "!=" and value == other_value:
219                    raise RequirementsNotMetException()
220                elif operator in ("==", "=") and value != other_value:
221                    raise RequirementsNotMetException()
222
223        input_type = settings.get("type", "")
224        if input_type in UserInput.OPTIONS_COSMETIC:
225            # these are structural form elements and can never return a value
226            return None
227
228        elif input_type == UserInput.OPTION_TOGGLE:
229            # simple boolean toggle
230            if type(choice) == bool:
231                return choice
232            elif choice in ['false', 'False']:
233                # Sanitized options passed back to Flask can be converted to strings as 'false'
234                return False
235            elif choice in ['true', 'True', 'on']:
236                # Toggle will have value 'on', but may also becomes a string 'true'
237                return True
238            else:
239                raise QueryParametersException("Toggle invalid input")
240
241        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
242            # parse either integers (unix timestamps) or try to guess the date
243            # format (the latter may be used for input if JavaScript is turned
244            # off in the front-end and the input comes from there)
245            value = None
246            try:
247                value = int(choice)
248            except ValueError:
249                parsed_choice = parse_datetime(choice)
250                value = int(parsed_choice.timestamp())
251            finally:
252                return value
253
254        elif input_type == UserInput.OPTION_MULTI:
255            # any number of values out of a list of possible values
256            # comma-separated during input, returned as a list of valid options
257            if not choice:
258                return settings.get("default", [])
259
260            chosen = choice.split(",")
261            return [item for item in chosen if item in settings.get("options", [])]
262
263        elif input_type == UserInput.OPTION_MULTI_SELECT:
264            # multiple number of values out of a dropdown list of possible values
265            # comma-separated during input, returned as a list of valid options
266            if not choice:
267                return settings.get("default", [])
268
269            if type(choice) is str:
270                # should be a list if the form control was actually a multiselect
271                # but we have some client side UI helpers that may produce a string
272                # instead
273                choice = choice.split(",")
274
275            return [item for item in choice if item in settings.get("options", [])]
276
277        elif input_type == UserInput.OPTION_CHOICE:
278            # select box
279            # one out of multiple options
280            # return option if valid, or default
281            if choice not in settings.get("options"):
282                if not silently_correct:
283                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
284                else:
285                    return settings.get("default", "")
286            else:
287                return choice
288
289        elif input_type == UserInput.OPTION_TEXT_JSON:
290            # verify that this is actually json
291            try:
292                redumped_value = json.dumps(json.loads(choice))
293            except json.JSONDecodeError:
294                raise QueryParametersException("Invalid JSON value '%s'" % choice)
295
296            return json.loads(choice)
297
298        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
299            # text string
300            # optionally clamp it as an integer; return default if not a valid
301            # integer (or float; inferred from default or made explicit via the
302            # coerce_type setting)
303            if settings.get("coerce_type"):
304                value_type = settings["coerce_type"]
305            else:
306                value_type = type(settings.get("default"))
307                if value_type not in (int, float):
308                    value_type = int
309
310            if "max" in settings:
311                try:
312                    choice = min(settings["max"], value_type(choice))
313                except (ValueError, TypeError) as e:
314                    if not silently_correct:
315                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
316
317                    choice = settings.get("default")
318
319            if "min" in settings:
320                try:
321                    choice = max(settings["min"], value_type(choice))
322                except (ValueError, TypeError) as e:
323                    if not silently_correct:
324                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
325
326                    choice = settings.get("default")
327
328            if choice is None or choice == "":
329                choice = settings.get("default")
330
331            if choice is None:
332                choice = 0 if "min" in settings or "max" in settings else ""
333
334            if settings.get("coerce_type"):
335                try:
336                    return value_type(choice)
337                except (ValueError, TypeError):
338                    return settings.get("default")
339            else:
340                return choice
341
342        else:
343            # no filtering
344            return choice

Class for handling user input

It is important to sanitise user input, as carelessly entered parameters may in e.g. requesting far more data than needed, or lead to undefined behaviour. This class offers a set of pre-defined value types that can be consistently rendered as form elements in an interface and parsed.

OPTION_TOGGLE = 'toggle'
OPTION_CHOICE = 'choice'
OPTION_TEXT = 'string'
OPTION_MULTI = 'multi'
OPTION_MULTI_SELECT = 'multi_select'
OPTION_INFO = 'info'
OPTION_TEXT_LARGE = 'textarea'
OPTION_TEXT_JSON = 'json'
OPTION_DATE = 'date'
OPTION_DATERANGE = 'daterange'
OPTION_DIVIDER = 'divider'
OPTION_FILE = 'file'
OPTION_HUE = 'hue'
OPTION_DATASOURCES = 'datasources'
OPTIONS_COSMETIC = ('info', 'divider')
@staticmethod
def parse_all(options, input, silently_correct=True):
 42    @staticmethod
 43    def parse_all(options, input, silently_correct=True):
 44        """
 45        Parse form input for the provided options
 46
 47        Ignores all input not belonging to any of the defined options: parses
 48        and sanitises the rest, and returns a dictionary with the sanitised
 49        options. If an option is *not* present in the input, the default value
 50        is used, and if that is absent, `None`.
 51
 52        In other words, this ensures a dictionary with 1) only white-listed
 53        keys, 2) a value of an expected type for each key.
 54
 55        :param dict options:  Options, as a name -> settings dictionary
 56        :param dict input:  Input, as a form field -> value dictionary
 57        :param bool silently_correct:  If true, replace invalid values with the
 58        given default value; else, raise a QueryParametersException if a value
 59        is invalid.
 60
 61        :return dict:  Sanitised form input
 62        """
 63        from common.lib.helpers import convert_to_int
 64        parsed_input = {}
 65
 66        if type(input) is not dict and type(input) is not ImmutableMultiDict:
 67            raise TypeError("input must be a dictionary or ImmutableMultiDict")
 68
 69        if type(input) is ImmutableMultiDict:
 70            # we are not using to_dict, because that messes up multi-selects
 71            input = {key: input.getlist(key) for key in input}
 72            for key, value in input.items():
 73                if type(value) is list and len(value) == 1:
 74                    input[key] = value[0]
 75
 76        # all parameters are submitted as option-[parameter ID], this is an 
 77        # artifact of how the web interface works and we can simply remove the
 78        # prefix
 79        input = {re.sub(r"^option-", "", field): input[field] for field in input}
 80
 81        # re-order input so that the fields relying on the value of other
 82        # fields are parsed last
 83        options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)}
 84
 85        for option, settings in options.items():
 86            if settings.get("indirect"):
 87                # these are settings that are derived from and set by other
 88                # settings
 89                continue
 90
 91            if settings.get("type") in UserInput.OPTIONS_COSMETIC:
 92                # these are structural form elements and never have a value
 93                continue
 94
 95            elif settings.get("type") == UserInput.OPTION_DATERANGE:
 96                # special case, since it combines two inputs
 97                option_min = option + "-min"
 98                option_max = option + "-max"
 99
100                # normally this is taken care of client-side, but in case this
101                # didn't work, try to salvage it server-side
102                if option_min not in input or input.get(option_min) == "-1":
103                    option_min += "_proxy"
104
105                if option_max not in input or input.get(option_max) == "-1":
106                    option_max += "_proxy"
107
108                # save as a tuple of unix timestamps (or None)
109                try:
110                    after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct))
111
112                    if before and after and after > before:
113                        if not silently_correct:
114                            raise QueryParametersException("End of date range must be after beginning of date range.")
115                        else:
116                            before = after
117
118                    parsed_input[option] = (after, before)
119                except RequirementsNotMetException:
120                    pass
121
122            elif settings.get("type") == UserInput.OPTION_TOGGLE:
123                # special case too, since if a checkbox is unchecked, it simply
124                # does not show up in the input
125                try:
126                    if option in input:
127                        # Toggle needs to be parsed
128                        parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
129                    else:
130                        # Toggle was left blank
131                        parsed_input[option] = False
132                except RequirementsNotMetException:
133                    pass
134
135            elif settings.get("type") == UserInput.OPTION_DATASOURCES:
136                # special case, because this combines multiple inputs to
137                # configure data source availability and expiration
138                datasources = {datasource: {
139                    "enabled": f"{option}-enable-{datasource}" in input,
140                    "allow_optout": f"{option}-optout-{datasource}" in input,
141                    "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0)
142                } for datasource in input[option].split(",")}
143
144                parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]]
145                parsed_input[option.split(".")[0] + ".expiration"] = datasources
146
147            elif option not in input:
148                # not provided? use default
149                parsed_input[option] = settings.get("default", None)
150
151            else:
152                # normal parsing and sanitisation
153                try:
154                    parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct)
155                except RequirementsNotMetException:
156                    pass
157
158        return parsed_input

Parse form input for the provided options

Ignores all input not belonging to any of the defined options: parses and sanitises the rest, and returns a dictionary with the sanitised options. If an option is not present in the input, the default value is used, and if that is absent, None.

In other words, this ensures a dictionary with 1) only white-listed keys, 2) a value of an expected type for each key.

Parameters
  • dict options: Options, as a name -> settings dictionary
  • dict input: Input, as a form field -> value dictionary
  • bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns

Sanitised form input

@staticmethod
def parse_value(settings, choice, other_input=None, silently_correct=True):
160    @staticmethod
161    def parse_value(settings, choice, other_input=None, silently_correct=True):
162        """
163        Filter user input
164
165        Makes sure user input for post-processors is valid and within the
166        parameters specified by the post-processor
167
168        :param obj settings:  Settings, including defaults and valid options
169        :param choice:  The chosen option, to be parsed
170        :param dict other_input:  Other input, as parsed so far
171        :param bool silently_correct:  If true, replace invalid values with the
172        given default value; else, raise a QueryParametersException if a value
173        is invalid.
174
175        :return:  Validated and parsed input
176        """
177        # short-circuit if there is a requirement for the field to be parsed
178        # and the requirement isn't met
179        if settings.get("requires"):
180            try:
181                field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0]
182            except IndexError:
183                # invalid condition, interpret as 'does the field with this name have a value'
184                field, operator, value = (choice, "!=", "")
185
186            if field not in other_input:
187                raise RequirementsNotMetException()
188
189            other_value = other_input.get(field)
190            if type(other_value) is bool:
191                # evalues to a boolean, i.e. checkboxes etc
192                if operator == "!=":
193                    if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")):
194                        raise RequirementsNotMetException()
195                else:
196                    if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")):
197                        raise RequirementsNotMetException()
198
199            else:
200                if type(other_value) in (tuple, list):
201                # iterables are a bit special
202                    if len(other_value) == 1:
203                        # treat one-item lists as "normal" values
204                        other_value = other_value[0]
205                    elif operator == "~=":  # interpret as 'is in list?'
206                        if value not in other_value:
207                            raise RequirementsNotMetException()
208                    else:
209                        # condition doesn't make sense for a list, so assume it's not True
210                        raise RequirementsNotMetException()
211
212                if operator == "^=" and not str(other_value).startswith(value):
213                    raise RequirementsNotMetException()
214                elif operator == "$=" and not str(other_value).endswith(value):
215                    raise RequirementsNotMetException()
216                elif operator == "~=" and value not in str(other_value):
217                    raise RequirementsNotMetException()
218                elif operator == "!=" and value == other_value:
219                    raise RequirementsNotMetException()
220                elif operator in ("==", "=") and value != other_value:
221                    raise RequirementsNotMetException()
222
223        input_type = settings.get("type", "")
224        if input_type in UserInput.OPTIONS_COSMETIC:
225            # these are structural form elements and can never return a value
226            return None
227
228        elif input_type == UserInput.OPTION_TOGGLE:
229            # simple boolean toggle
230            if type(choice) == bool:
231                return choice
232            elif choice in ['false', 'False']:
233                # Sanitized options passed back to Flask can be converted to strings as 'false'
234                return False
235            elif choice in ['true', 'True', 'on']:
236                # Toggle will have value 'on', but may also becomes a string 'true'
237                return True
238            else:
239                raise QueryParametersException("Toggle invalid input")
240
241        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
242            # parse either integers (unix timestamps) or try to guess the date
243            # format (the latter may be used for input if JavaScript is turned
244            # off in the front-end and the input comes from there)
245            value = None
246            try:
247                value = int(choice)
248            except ValueError:
249                parsed_choice = parse_datetime(choice)
250                value = int(parsed_choice.timestamp())
251            finally:
252                return value
253
254        elif input_type == UserInput.OPTION_MULTI:
255            # any number of values out of a list of possible values
256            # comma-separated during input, returned as a list of valid options
257            if not choice:
258                return settings.get("default", [])
259
260            chosen = choice.split(",")
261            return [item for item in chosen if item in settings.get("options", [])]
262
263        elif input_type == UserInput.OPTION_MULTI_SELECT:
264            # multiple number of values out of a dropdown list of possible values
265            # comma-separated during input, returned as a list of valid options
266            if not choice:
267                return settings.get("default", [])
268
269            if type(choice) is str:
270                # should be a list if the form control was actually a multiselect
271                # but we have some client side UI helpers that may produce a string
272                # instead
273                choice = choice.split(",")
274
275            return [item for item in choice if item in settings.get("options", [])]
276
277        elif input_type == UserInput.OPTION_CHOICE:
278            # select box
279            # one out of multiple options
280            # return option if valid, or default
281            if choice not in settings.get("options"):
282                if not silently_correct:
283                    raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
284                else:
285                    return settings.get("default", "")
286            else:
287                return choice
288
289        elif input_type == UserInput.OPTION_TEXT_JSON:
290            # verify that this is actually json
291            try:
292                redumped_value = json.dumps(json.loads(choice))
293            except json.JSONDecodeError:
294                raise QueryParametersException("Invalid JSON value '%s'" % choice)
295
296            return json.loads(choice)
297
298        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE):
299            # text string
300            # optionally clamp it as an integer; return default if not a valid
301            # integer (or float; inferred from default or made explicit via the
302            # coerce_type setting)
303            if settings.get("coerce_type"):
304                value_type = settings["coerce_type"]
305            else:
306                value_type = type(settings.get("default"))
307                if value_type not in (int, float):
308                    value_type = int
309
310            if "max" in settings:
311                try:
312                    choice = min(settings["max"], value_type(choice))
313                except (ValueError, TypeError) as e:
314                    if not silently_correct:
315                        raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"]))
316
317                    choice = settings.get("default")
318
319            if "min" in settings:
320                try:
321                    choice = max(settings["min"], value_type(choice))
322                except (ValueError, TypeError) as e:
323                    if not silently_correct:
324                        raise QueryParametersException("Provide a value of %s or more." % str(settings["min"]))
325
326                    choice = settings.get("default")
327
328            if choice is None or choice == "":
329                choice = settings.get("default")
330
331            if choice is None:
332                choice = 0 if "min" in settings or "max" in settings else ""
333
334            if settings.get("coerce_type"):
335                try:
336                    return value_type(choice)
337                except (ValueError, TypeError):
338                    return settings.get("default")
339            else:
340                return choice
341
342        else:
343            # no filtering
344            return choice

Filter user input

Makes sure user input for post-processors is valid and within the parameters specified by the post-processor

Parameters
  • obj settings: Settings, including defaults and valid options
  • choice: The chosen option, to be parsed
  • dict other_input: Other input, as parsed so far
  • bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns

Validated and parsed input