common.lib.user_input
1from dateutil.parser import parse as parse_datetime 2from common.lib.exceptions import QueryParametersException 3from werkzeug.datastructures import ImmutableMultiDict 4import json 5 6import re 7 8class RequirementsNotMetException(Exception): 9 """ 10 If this is raised while parsing, that option is not included in the parsed 11 output. Used with the "requires" option setting. 12 """ 13 pass 14 15class UserInput: 16 """ 17 Class for handling user input 18 19 It is important to sanitise user input, as carelessly entered parameters 20 may in e.g. requesting far more data than needed, or lead to undefined 21 behaviour. This class offers a set of pre-defined value types that can be 22 consistently rendered as form elements in an interface and parsed. 23 """ 24 OPTION_TOGGLE = "toggle" # boolean toggle (checkbox) 25 OPTION_CHOICE = "choice" # one choice out of a list (select) 26 OPTION_TEXT = "string" # simple string or integer (input text) 27 OPTION_MULTI = "multi" # multiple values out of a list (select multiple) 28 OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) 29 OPTION_INFO = "info" # just a bit of text, not actual input 30 OPTION_TEXT_LARGE = "textarea" # longer text 31 OPTION_TEXT_JSON = "json" # text, but should be valid JSON 32 OPTION_DATE = "date" # a single date 33 OPTION_DATERANGE = "daterange" # a beginning and end date 34 OPTION_DIVIDER = "divider" # meta-option, divides related sets of options 35 OPTION_FILE = "file" # file upload 36 OPTION_HUE = "hue" # colour hue 37 OPTION_DATASOURCES = "datasources" # data source toggling 38 OPTION_EXTENSIONS = "extensions" # extension toggling 39 OPTION_DATASOURCES_TABLE = "datasources_table" # a table with settings per data source 40 OPTION_ANNOTATION = "annotation" # checkbox for whether to an annotation 41 OPTION_ANNOTATIONS = "annotations" # table for whether to write multiple annotations 42 43 OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER) 44 45 @staticmethod 46 def parse_all(options, input, silently_correct=True): 47 """ 48 Parse form input for the provided options 49 50 Ignores all input not belonging to any of the defined options: parses 51 and sanitises the rest, and returns a dictionary with the sanitised 52 options. If an option is *not* present in the input, the default value 53 is used, and if that is absent, `None`. 54 55 In other words, this ensures a dictionary with 1) only white-listed 56 keys, 2) a value of an expected type for each key. 57 58 :param dict options: Options, as a name -> settings dictionary 59 :param dict input: Input, as a form field -> value dictionary 60 :param bool silently_correct: If true, replace invalid values with the 61 given default value; else, raise a QueryParametersException if a value 62 is invalid. 63 64 :return dict: Sanitised form input 65 """ 66 67 from common.lib.helpers import convert_to_int 68 parsed_input = {} 69 70 if type(input) is not dict and type(input) is not ImmutableMultiDict: 71 raise TypeError("input must be a dictionary or ImmutableMultiDict") 72 73 if type(input) is ImmutableMultiDict: 74 # we are not using to_dict, because that messes up multi-selects 75 input = {key: input.getlist(key) for key in input} 76 for key, value in input.items(): 77 if type(value) is list and len(value) == 1: 78 input[key] = value[0] 79 80 # all parameters are submitted as option-[parameter ID], this is an 81 # artifact of how the web interface works and we can simply remove the 82 # prefix 83 input = {re.sub(r"^option-", "", field): input[field] for field in input} 84 85 # re-order input so that the fields relying on the value of other 86 # fields are parsed last 87 options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)} 88 89 for option, settings in options.items(): 90 if settings.get("indirect"): 91 # these are settings that are derived from and set by other 92 # settings 93 continue 94 95 if settings.get("type") in UserInput.OPTIONS_COSMETIC: 96 # these are structural form elements and never have a value 97 continue 98 99 elif settings.get("type") == UserInput.OPTION_DATERANGE: 100 # special case, since it combines two inputs 101 option_min = option + "-min" 102 option_max = option + "-max" 103 104 # normally this is taken care of client-side, but in case this 105 # didn't work, try to salvage it server-side 106 if option_min not in input or input.get(option_min) == "-1": 107 option_min += "_proxy" 108 109 if option_max not in input or input.get(option_max) == "-1": 110 option_max += "_proxy" 111 112 # save as a tuple of unix timestamps (or None) 113 try: 114 after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct)) 115 116 if before and after and after > before: 117 if not silently_correct: 118 raise QueryParametersException("End of date range must be after beginning of date range.") 119 else: 120 before = after 121 122 parsed_input[option] = (after, before) 123 except RequirementsNotMetException: 124 pass 125 126 elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 127 # special case too, since if a checkbox is unchecked, it simply 128 # does not show up in the input 129 try: 130 if option in input: 131 # Toggle needs to be parsed 132 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 133 else: 134 # Toggle was left blank 135 parsed_input[option] = False 136 except RequirementsNotMetException: 137 pass 138 139 elif settings.get("type") == UserInput.OPTION_DATASOURCES: 140 # special case, because this combines multiple inputs to 141 # configure data source availability and expiration 142 datasources = {datasource: { 143 "enabled": f"{option}-enable-{datasource}" in input, 144 "allow_optout": f"{option}-optout-{datasource}" in input, 145 "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0) 146 } for datasource in input[option].split(",")} 147 148 parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]] 149 parsed_input[option.split(".")[0] + ".expiration"] = datasources 150 151 elif settings.get("type") == UserInput.OPTION_EXTENSIONS: 152 # also a special case 153 parsed_input[option] = {extension: { 154 "enabled": f"{option}-enable-{extension}" in input 155 } for extension in input[option].split(",")} 156 157 elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE: 158 # special case, parse table values to generate a dict 159 columns = list(settings["columns"].keys()) 160 table_input = {} 161 162 for datasource in list(settings["default"].keys()): 163 table_input[datasource] = {} 164 for column in columns: 165 166 choice = input.get(option + "-" + datasource + "-" + column, False) 167 column_settings = settings["columns"][column] # sub-settings per column 168 table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True) 169 170 parsed_input[option] = table_input 171 172 elif option not in input: 173 # not provided? use default 174 parsed_input[option] = settings.get("default", None) 175 176 else: 177 # normal parsing and sanitisation 178 try: 179 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 180 except RequirementsNotMetException: 181 pass 182 183 return parsed_input 184 185 @staticmethod 186 def parse_value(settings, choice, other_input=None, silently_correct=True): 187 """ 188 Filter user input 189 190 Makes sure user input for post-processors is valid and within the 191 parameters specified by the post-processor 192 193 :param obj settings: Settings, including defaults and valid options 194 :param choice: The chosen option, to be parsed 195 :param dict other_input: Other input, as parsed so far 196 :param bool silently_correct: If true, replace invalid values with the 197 given default value; else, raise a QueryParametersException if a value 198 is invalid. 199 200 :return: Validated and parsed input 201 """ 202 # short-circuit if there is a requirement for the field to be parsed 203 # and the requirement isn't met 204 if settings.get("requires"): 205 try: 206 field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0] 207 except IndexError: 208 # invalid condition, interpret as 'does the field with this name have a value' 209 field, operator, value = (choice, "!=", "") 210 211 if field not in other_input: 212 raise RequirementsNotMetException() 213 214 other_value = other_input.get(field) 215 if type(other_value) is bool: 216 # evalues to a boolean, i.e. checkboxes etc 217 if operator == "!=": 218 if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")): 219 raise RequirementsNotMetException() 220 else: 221 if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")): 222 raise RequirementsNotMetException() 223 224 else: 225 if type(other_value) in (tuple, list): 226 # iterables are a bit special 227 if len(other_value) == 1: 228 # treat one-item lists as "normal" values 229 other_value = other_value[0] 230 elif operator == "~=": # interpret as 'is in list?' 231 if value not in other_value: 232 raise RequirementsNotMetException() 233 else: 234 # condition doesn't make sense for a list, so assume it's not True 235 raise RequirementsNotMetException() 236 237 if operator == "^=" and not str(other_value).startswith(value): 238 raise RequirementsNotMetException() 239 elif operator == "$=" and not str(other_value).endswith(value): 240 raise RequirementsNotMetException() 241 elif operator == "~=" and value not in str(other_value): 242 raise RequirementsNotMetException() 243 elif operator == "!=" and value == other_value: 244 raise RequirementsNotMetException() 245 elif operator in ("==", "=") and value != other_value: 246 raise RequirementsNotMetException() 247 248 input_type = settings.get("type", "") 249 if input_type in UserInput.OPTIONS_COSMETIC: 250 # these are structural form elements and can never return a value 251 return None 252 253 elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 254 # simple boolean toggle 255 if type(choice) is bool: 256 return choice 257 elif choice in ['false', 'False']: 258 # Sanitized options passed back to Flask can be converted to strings as 'false' 259 return False 260 elif choice in ['true', 'True', 'on']: 261 # Toggle will have value 'on', but may also becomes a string 'true' 262 return True 263 else: 264 raise QueryParametersException("Toggle invalid input") 265 266 elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE): 267 # parse either integers (unix timestamps) or try to guess the date 268 # format (the latter may be used for input if JavaScript is turned 269 # off in the front-end and the input comes from there) 270 value = None 271 try: 272 value = int(choice) 273 except ValueError: 274 parsed_choice = parse_datetime(choice) 275 value = int(parsed_choice.timestamp()) 276 finally: 277 return value 278 279 elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS): 280 # any number of values out of a list of possible values 281 # comma-separated during input, returned as a list of valid options 282 if not choice: 283 return settings.get("default", []) 284 285 chosen = choice.split(",") 286 return [item for item in chosen if item in settings.get("options", [])] 287 288 elif input_type == UserInput.OPTION_MULTI_SELECT: 289 # multiple number of values out of a dropdown list of possible values 290 # comma-separated during input, returned as a list of valid options 291 if not choice: 292 return settings.get("default", []) 293 294 if type(choice) is str: 295 # should be a list if the form control was actually a multiselect 296 # but we have some client side UI helpers that may produce a string 297 # instead 298 choice = choice.split(",") 299 300 return [item for item in choice if item in settings.get("options", [])] 301 302 elif input_type == UserInput.OPTION_CHOICE: 303 # select box 304 # one out of multiple options 305 # return option if valid, or default 306 if choice not in settings.get("options"): 307 if not silently_correct: 308 raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") 309 else: 310 return settings.get("default", "") 311 else: 312 return choice 313 314 elif input_type == UserInput.OPTION_TEXT_JSON: 315 # verify that this is actually json 316 try: 317 json.dumps(json.loads(choice)) 318 except json.JSONDecodeError: 319 raise QueryParametersException("Invalid JSON value '%s'" % choice) 320 321 return json.loads(choice) 322 323 elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE): 324 # text string 325 # optionally clamp it as an integer; return default if not a valid 326 # integer (or float; inferred from default or made explicit via the 327 # coerce_type setting) 328 if settings.get("coerce_type"): 329 value_type = settings["coerce_type"] 330 else: 331 value_type = type(settings.get("default")) 332 if value_type not in (int, float): 333 value_type = int 334 335 if "max" in settings: 336 try: 337 choice = min(settings["max"], value_type(choice)) 338 except (ValueError, TypeError): 339 if not silently_correct: 340 raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"])) 341 342 choice = settings.get("default") 343 344 if "min" in settings: 345 try: 346 choice = max(settings["min"], value_type(choice)) 347 except (ValueError, TypeError): 348 if not silently_correct: 349 raise QueryParametersException("Provide a value of %s or more." % str(settings["min"])) 350 351 choice = settings.get("default") 352 353 if choice is None or choice == "": 354 choice = settings.get("default") 355 356 if choice is None: 357 choice = 0 if "min" in settings or "max" in settings else "" 358 359 if settings.get("coerce_type"): 360 try: 361 return value_type(choice) 362 except (ValueError, TypeError): 363 return settings.get("default") 364 else: 365 return choice 366 367 else: 368 # no filtering 369 return choice
9class RequirementsNotMetException(Exception): 10 """ 11 If this is raised while parsing, that option is not included in the parsed 12 output. Used with the "requires" option setting. 13 """ 14 pass
If this is raised while parsing, that option is not included in the parsed output. Used with the "requires" option setting.
16class UserInput: 17 """ 18 Class for handling user input 19 20 It is important to sanitise user input, as carelessly entered parameters 21 may in e.g. requesting far more data than needed, or lead to undefined 22 behaviour. This class offers a set of pre-defined value types that can be 23 consistently rendered as form elements in an interface and parsed. 24 """ 25 OPTION_TOGGLE = "toggle" # boolean toggle (checkbox) 26 OPTION_CHOICE = "choice" # one choice out of a list (select) 27 OPTION_TEXT = "string" # simple string or integer (input text) 28 OPTION_MULTI = "multi" # multiple values out of a list (select multiple) 29 OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) 30 OPTION_INFO = "info" # just a bit of text, not actual input 31 OPTION_TEXT_LARGE = "textarea" # longer text 32 OPTION_TEXT_JSON = "json" # text, but should be valid JSON 33 OPTION_DATE = "date" # a single date 34 OPTION_DATERANGE = "daterange" # a beginning and end date 35 OPTION_DIVIDER = "divider" # meta-option, divides related sets of options 36 OPTION_FILE = "file" # file upload 37 OPTION_HUE = "hue" # colour hue 38 OPTION_DATASOURCES = "datasources" # data source toggling 39 OPTION_EXTENSIONS = "extensions" # extension toggling 40 OPTION_DATASOURCES_TABLE = "datasources_table" # a table with settings per data source 41 OPTION_ANNOTATION = "annotation" # checkbox for whether to an annotation 42 OPTION_ANNOTATIONS = "annotations" # table for whether to write multiple annotations 43 44 OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER) 45 46 @staticmethod 47 def parse_all(options, input, silently_correct=True): 48 """ 49 Parse form input for the provided options 50 51 Ignores all input not belonging to any of the defined options: parses 52 and sanitises the rest, and returns a dictionary with the sanitised 53 options. If an option is *not* present in the input, the default value 54 is used, and if that is absent, `None`. 55 56 In other words, this ensures a dictionary with 1) only white-listed 57 keys, 2) a value of an expected type for each key. 58 59 :param dict options: Options, as a name -> settings dictionary 60 :param dict input: Input, as a form field -> value dictionary 61 :param bool silently_correct: If true, replace invalid values with the 62 given default value; else, raise a QueryParametersException if a value 63 is invalid. 64 65 :return dict: Sanitised form input 66 """ 67 68 from common.lib.helpers import convert_to_int 69 parsed_input = {} 70 71 if type(input) is not dict and type(input) is not ImmutableMultiDict: 72 raise TypeError("input must be a dictionary or ImmutableMultiDict") 73 74 if type(input) is ImmutableMultiDict: 75 # we are not using to_dict, because that messes up multi-selects 76 input = {key: input.getlist(key) for key in input} 77 for key, value in input.items(): 78 if type(value) is list and len(value) == 1: 79 input[key] = value[0] 80 81 # all parameters are submitted as option-[parameter ID], this is an 82 # artifact of how the web interface works and we can simply remove the 83 # prefix 84 input = {re.sub(r"^option-", "", field): input[field] for field in input} 85 86 # re-order input so that the fields relying on the value of other 87 # fields are parsed last 88 options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)} 89 90 for option, settings in options.items(): 91 if settings.get("indirect"): 92 # these are settings that are derived from and set by other 93 # settings 94 continue 95 96 if settings.get("type") in UserInput.OPTIONS_COSMETIC: 97 # these are structural form elements and never have a value 98 continue 99 100 elif settings.get("type") == UserInput.OPTION_DATERANGE: 101 # special case, since it combines two inputs 102 option_min = option + "-min" 103 option_max = option + "-max" 104 105 # normally this is taken care of client-side, but in case this 106 # didn't work, try to salvage it server-side 107 if option_min not in input or input.get(option_min) == "-1": 108 option_min += "_proxy" 109 110 if option_max not in input or input.get(option_max) == "-1": 111 option_max += "_proxy" 112 113 # save as a tuple of unix timestamps (or None) 114 try: 115 after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct)) 116 117 if before and after and after > before: 118 if not silently_correct: 119 raise QueryParametersException("End of date range must be after beginning of date range.") 120 else: 121 before = after 122 123 parsed_input[option] = (after, before) 124 except RequirementsNotMetException: 125 pass 126 127 elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 128 # special case too, since if a checkbox is unchecked, it simply 129 # does not show up in the input 130 try: 131 if option in input: 132 # Toggle needs to be parsed 133 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 134 else: 135 # Toggle was left blank 136 parsed_input[option] = False 137 except RequirementsNotMetException: 138 pass 139 140 elif settings.get("type") == UserInput.OPTION_DATASOURCES: 141 # special case, because this combines multiple inputs to 142 # configure data source availability and expiration 143 datasources = {datasource: { 144 "enabled": f"{option}-enable-{datasource}" in input, 145 "allow_optout": f"{option}-optout-{datasource}" in input, 146 "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0) 147 } for datasource in input[option].split(",")} 148 149 parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]] 150 parsed_input[option.split(".")[0] + ".expiration"] = datasources 151 152 elif settings.get("type") == UserInput.OPTION_EXTENSIONS: 153 # also a special case 154 parsed_input[option] = {extension: { 155 "enabled": f"{option}-enable-{extension}" in input 156 } for extension in input[option].split(",")} 157 158 elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE: 159 # special case, parse table values to generate a dict 160 columns = list(settings["columns"].keys()) 161 table_input = {} 162 163 for datasource in list(settings["default"].keys()): 164 table_input[datasource] = {} 165 for column in columns: 166 167 choice = input.get(option + "-" + datasource + "-" + column, False) 168 column_settings = settings["columns"][column] # sub-settings per column 169 table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True) 170 171 parsed_input[option] = table_input 172 173 elif option not in input: 174 # not provided? use default 175 parsed_input[option] = settings.get("default", None) 176 177 else: 178 # normal parsing and sanitisation 179 try: 180 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 181 except RequirementsNotMetException: 182 pass 183 184 return parsed_input 185 186 @staticmethod 187 def parse_value(settings, choice, other_input=None, silently_correct=True): 188 """ 189 Filter user input 190 191 Makes sure user input for post-processors is valid and within the 192 parameters specified by the post-processor 193 194 :param obj settings: Settings, including defaults and valid options 195 :param choice: The chosen option, to be parsed 196 :param dict other_input: Other input, as parsed so far 197 :param bool silently_correct: If true, replace invalid values with the 198 given default value; else, raise a QueryParametersException if a value 199 is invalid. 200 201 :return: Validated and parsed input 202 """ 203 # short-circuit if there is a requirement for the field to be parsed 204 # and the requirement isn't met 205 if settings.get("requires"): 206 try: 207 field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0] 208 except IndexError: 209 # invalid condition, interpret as 'does the field with this name have a value' 210 field, operator, value = (choice, "!=", "") 211 212 if field not in other_input: 213 raise RequirementsNotMetException() 214 215 other_value = other_input.get(field) 216 if type(other_value) is bool: 217 # evalues to a boolean, i.e. checkboxes etc 218 if operator == "!=": 219 if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")): 220 raise RequirementsNotMetException() 221 else: 222 if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")): 223 raise RequirementsNotMetException() 224 225 else: 226 if type(other_value) in (tuple, list): 227 # iterables are a bit special 228 if len(other_value) == 1: 229 # treat one-item lists as "normal" values 230 other_value = other_value[0] 231 elif operator == "~=": # interpret as 'is in list?' 232 if value not in other_value: 233 raise RequirementsNotMetException() 234 else: 235 # condition doesn't make sense for a list, so assume it's not True 236 raise RequirementsNotMetException() 237 238 if operator == "^=" and not str(other_value).startswith(value): 239 raise RequirementsNotMetException() 240 elif operator == "$=" and not str(other_value).endswith(value): 241 raise RequirementsNotMetException() 242 elif operator == "~=" and value not in str(other_value): 243 raise RequirementsNotMetException() 244 elif operator == "!=" and value == other_value: 245 raise RequirementsNotMetException() 246 elif operator in ("==", "=") and value != other_value: 247 raise RequirementsNotMetException() 248 249 input_type = settings.get("type", "") 250 if input_type in UserInput.OPTIONS_COSMETIC: 251 # these are structural form elements and can never return a value 252 return None 253 254 elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 255 # simple boolean toggle 256 if type(choice) is bool: 257 return choice 258 elif choice in ['false', 'False']: 259 # Sanitized options passed back to Flask can be converted to strings as 'false' 260 return False 261 elif choice in ['true', 'True', 'on']: 262 # Toggle will have value 'on', but may also becomes a string 'true' 263 return True 264 else: 265 raise QueryParametersException("Toggle invalid input") 266 267 elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE): 268 # parse either integers (unix timestamps) or try to guess the date 269 # format (the latter may be used for input if JavaScript is turned 270 # off in the front-end and the input comes from there) 271 value = None 272 try: 273 value = int(choice) 274 except ValueError: 275 parsed_choice = parse_datetime(choice) 276 value = int(parsed_choice.timestamp()) 277 finally: 278 return value 279 280 elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS): 281 # any number of values out of a list of possible values 282 # comma-separated during input, returned as a list of valid options 283 if not choice: 284 return settings.get("default", []) 285 286 chosen = choice.split(",") 287 return [item for item in chosen if item in settings.get("options", [])] 288 289 elif input_type == UserInput.OPTION_MULTI_SELECT: 290 # multiple number of values out of a dropdown list of possible values 291 # comma-separated during input, returned as a list of valid options 292 if not choice: 293 return settings.get("default", []) 294 295 if type(choice) is str: 296 # should be a list if the form control was actually a multiselect 297 # but we have some client side UI helpers that may produce a string 298 # instead 299 choice = choice.split(",") 300 301 return [item for item in choice if item in settings.get("options", [])] 302 303 elif input_type == UserInput.OPTION_CHOICE: 304 # select box 305 # one out of multiple options 306 # return option if valid, or default 307 if choice not in settings.get("options"): 308 if not silently_correct: 309 raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") 310 else: 311 return settings.get("default", "") 312 else: 313 return choice 314 315 elif input_type == UserInput.OPTION_TEXT_JSON: 316 # verify that this is actually json 317 try: 318 json.dumps(json.loads(choice)) 319 except json.JSONDecodeError: 320 raise QueryParametersException("Invalid JSON value '%s'" % choice) 321 322 return json.loads(choice) 323 324 elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE): 325 # text string 326 # optionally clamp it as an integer; return default if not a valid 327 # integer (or float; inferred from default or made explicit via the 328 # coerce_type setting) 329 if settings.get("coerce_type"): 330 value_type = settings["coerce_type"] 331 else: 332 value_type = type(settings.get("default")) 333 if value_type not in (int, float): 334 value_type = int 335 336 if "max" in settings: 337 try: 338 choice = min(settings["max"], value_type(choice)) 339 except (ValueError, TypeError): 340 if not silently_correct: 341 raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"])) 342 343 choice = settings.get("default") 344 345 if "min" in settings: 346 try: 347 choice = max(settings["min"], value_type(choice)) 348 except (ValueError, TypeError): 349 if not silently_correct: 350 raise QueryParametersException("Provide a value of %s or more." % str(settings["min"])) 351 352 choice = settings.get("default") 353 354 if choice is None or choice == "": 355 choice = settings.get("default") 356 357 if choice is None: 358 choice = 0 if "min" in settings or "max" in settings else "" 359 360 if settings.get("coerce_type"): 361 try: 362 return value_type(choice) 363 except (ValueError, TypeError): 364 return settings.get("default") 365 else: 366 return choice 367 368 else: 369 # no filtering 370 return choice
Class for handling user input
It is important to sanitise user input, as carelessly entered parameters may in e.g. requesting far more data than needed, or lead to undefined behaviour. This class offers a set of pre-defined value types that can be consistently rendered as form elements in an interface and parsed.
46 @staticmethod 47 def parse_all(options, input, silently_correct=True): 48 """ 49 Parse form input for the provided options 50 51 Ignores all input not belonging to any of the defined options: parses 52 and sanitises the rest, and returns a dictionary with the sanitised 53 options. If an option is *not* present in the input, the default value 54 is used, and if that is absent, `None`. 55 56 In other words, this ensures a dictionary with 1) only white-listed 57 keys, 2) a value of an expected type for each key. 58 59 :param dict options: Options, as a name -> settings dictionary 60 :param dict input: Input, as a form field -> value dictionary 61 :param bool silently_correct: If true, replace invalid values with the 62 given default value; else, raise a QueryParametersException if a value 63 is invalid. 64 65 :return dict: Sanitised form input 66 """ 67 68 from common.lib.helpers import convert_to_int 69 parsed_input = {} 70 71 if type(input) is not dict and type(input) is not ImmutableMultiDict: 72 raise TypeError("input must be a dictionary or ImmutableMultiDict") 73 74 if type(input) is ImmutableMultiDict: 75 # we are not using to_dict, because that messes up multi-selects 76 input = {key: input.getlist(key) for key in input} 77 for key, value in input.items(): 78 if type(value) is list and len(value) == 1: 79 input[key] = value[0] 80 81 # all parameters are submitted as option-[parameter ID], this is an 82 # artifact of how the web interface works and we can simply remove the 83 # prefix 84 input = {re.sub(r"^option-", "", field): input[field] for field in input} 85 86 # re-order input so that the fields relying on the value of other 87 # fields are parsed last 88 options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)} 89 90 for option, settings in options.items(): 91 if settings.get("indirect"): 92 # these are settings that are derived from and set by other 93 # settings 94 continue 95 96 if settings.get("type") in UserInput.OPTIONS_COSMETIC: 97 # these are structural form elements and never have a value 98 continue 99 100 elif settings.get("type") == UserInput.OPTION_DATERANGE: 101 # special case, since it combines two inputs 102 option_min = option + "-min" 103 option_max = option + "-max" 104 105 # normally this is taken care of client-side, but in case this 106 # didn't work, try to salvage it server-side 107 if option_min not in input or input.get(option_min) == "-1": 108 option_min += "_proxy" 109 110 if option_max not in input or input.get(option_max) == "-1": 111 option_max += "_proxy" 112 113 # save as a tuple of unix timestamps (or None) 114 try: 115 after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct)) 116 117 if before and after and after > before: 118 if not silently_correct: 119 raise QueryParametersException("End of date range must be after beginning of date range.") 120 else: 121 before = after 122 123 parsed_input[option] = (after, before) 124 except RequirementsNotMetException: 125 pass 126 127 elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 128 # special case too, since if a checkbox is unchecked, it simply 129 # does not show up in the input 130 try: 131 if option in input: 132 # Toggle needs to be parsed 133 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 134 else: 135 # Toggle was left blank 136 parsed_input[option] = False 137 except RequirementsNotMetException: 138 pass 139 140 elif settings.get("type") == UserInput.OPTION_DATASOURCES: 141 # special case, because this combines multiple inputs to 142 # configure data source availability and expiration 143 datasources = {datasource: { 144 "enabled": f"{option}-enable-{datasource}" in input, 145 "allow_optout": f"{option}-optout-{datasource}" in input, 146 "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0) 147 } for datasource in input[option].split(",")} 148 149 parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]] 150 parsed_input[option.split(".")[0] + ".expiration"] = datasources 151 152 elif settings.get("type") == UserInput.OPTION_EXTENSIONS: 153 # also a special case 154 parsed_input[option] = {extension: { 155 "enabled": f"{option}-enable-{extension}" in input 156 } for extension in input[option].split(",")} 157 158 elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE: 159 # special case, parse table values to generate a dict 160 columns = list(settings["columns"].keys()) 161 table_input = {} 162 163 for datasource in list(settings["default"].keys()): 164 table_input[datasource] = {} 165 for column in columns: 166 167 choice = input.get(option + "-" + datasource + "-" + column, False) 168 column_settings = settings["columns"][column] # sub-settings per column 169 table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True) 170 171 parsed_input[option] = table_input 172 173 elif option not in input: 174 # not provided? use default 175 parsed_input[option] = settings.get("default", None) 176 177 else: 178 # normal parsing and sanitisation 179 try: 180 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 181 except RequirementsNotMetException: 182 pass 183 184 return parsed_input
Parse form input for the provided options
Ignores all input not belonging to any of the defined options: parses
and sanitises the rest, and returns a dictionary with the sanitised
options. If an option is not present in the input, the default value
is used, and if that is absent, None
.
In other words, this ensures a dictionary with 1) only white-listed keys, 2) a value of an expected type for each key.
Parameters
- dict options: Options, as a name -> settings dictionary
- dict input: Input, as a form field -> value dictionary
- bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns
Sanitised form input
186 @staticmethod 187 def parse_value(settings, choice, other_input=None, silently_correct=True): 188 """ 189 Filter user input 190 191 Makes sure user input for post-processors is valid and within the 192 parameters specified by the post-processor 193 194 :param obj settings: Settings, including defaults and valid options 195 :param choice: The chosen option, to be parsed 196 :param dict other_input: Other input, as parsed so far 197 :param bool silently_correct: If true, replace invalid values with the 198 given default value; else, raise a QueryParametersException if a value 199 is invalid. 200 201 :return: Validated and parsed input 202 """ 203 # short-circuit if there is a requirement for the field to be parsed 204 # and the requirement isn't met 205 if settings.get("requires"): 206 try: 207 field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0] 208 except IndexError: 209 # invalid condition, interpret as 'does the field with this name have a value' 210 field, operator, value = (choice, "!=", "") 211 212 if field not in other_input: 213 raise RequirementsNotMetException() 214 215 other_value = other_input.get(field) 216 if type(other_value) is bool: 217 # evalues to a boolean, i.e. checkboxes etc 218 if operator == "!=": 219 if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")): 220 raise RequirementsNotMetException() 221 else: 222 if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")): 223 raise RequirementsNotMetException() 224 225 else: 226 if type(other_value) in (tuple, list): 227 # iterables are a bit special 228 if len(other_value) == 1: 229 # treat one-item lists as "normal" values 230 other_value = other_value[0] 231 elif operator == "~=": # interpret as 'is in list?' 232 if value not in other_value: 233 raise RequirementsNotMetException() 234 else: 235 # condition doesn't make sense for a list, so assume it's not True 236 raise RequirementsNotMetException() 237 238 if operator == "^=" and not str(other_value).startswith(value): 239 raise RequirementsNotMetException() 240 elif operator == "$=" and not str(other_value).endswith(value): 241 raise RequirementsNotMetException() 242 elif operator == "~=" and value not in str(other_value): 243 raise RequirementsNotMetException() 244 elif operator == "!=" and value == other_value: 245 raise RequirementsNotMetException() 246 elif operator in ("==", "=") and value != other_value: 247 raise RequirementsNotMetException() 248 249 input_type = settings.get("type", "") 250 if input_type in UserInput.OPTIONS_COSMETIC: 251 # these are structural form elements and can never return a value 252 return None 253 254 elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 255 # simple boolean toggle 256 if type(choice) is bool: 257 return choice 258 elif choice in ['false', 'False']: 259 # Sanitized options passed back to Flask can be converted to strings as 'false' 260 return False 261 elif choice in ['true', 'True', 'on']: 262 # Toggle will have value 'on', but may also becomes a string 'true' 263 return True 264 else: 265 raise QueryParametersException("Toggle invalid input") 266 267 elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE): 268 # parse either integers (unix timestamps) or try to guess the date 269 # format (the latter may be used for input if JavaScript is turned 270 # off in the front-end and the input comes from there) 271 value = None 272 try: 273 value = int(choice) 274 except ValueError: 275 parsed_choice = parse_datetime(choice) 276 value = int(parsed_choice.timestamp()) 277 finally: 278 return value 279 280 elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS): 281 # any number of values out of a list of possible values 282 # comma-separated during input, returned as a list of valid options 283 if not choice: 284 return settings.get("default", []) 285 286 chosen = choice.split(",") 287 return [item for item in chosen if item in settings.get("options", [])] 288 289 elif input_type == UserInput.OPTION_MULTI_SELECT: 290 # multiple number of values out of a dropdown list of possible values 291 # comma-separated during input, returned as a list of valid options 292 if not choice: 293 return settings.get("default", []) 294 295 if type(choice) is str: 296 # should be a list if the form control was actually a multiselect 297 # but we have some client side UI helpers that may produce a string 298 # instead 299 choice = choice.split(",") 300 301 return [item for item in choice if item in settings.get("options", [])] 302 303 elif input_type == UserInput.OPTION_CHOICE: 304 # select box 305 # one out of multiple options 306 # return option if valid, or default 307 if choice not in settings.get("options"): 308 if not silently_correct: 309 raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") 310 else: 311 return settings.get("default", "") 312 else: 313 return choice 314 315 elif input_type == UserInput.OPTION_TEXT_JSON: 316 # verify that this is actually json 317 try: 318 json.dumps(json.loads(choice)) 319 except json.JSONDecodeError: 320 raise QueryParametersException("Invalid JSON value '%s'" % choice) 321 322 return json.loads(choice) 323 324 elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE): 325 # text string 326 # optionally clamp it as an integer; return default if not a valid 327 # integer (or float; inferred from default or made explicit via the 328 # coerce_type setting) 329 if settings.get("coerce_type"): 330 value_type = settings["coerce_type"] 331 else: 332 value_type = type(settings.get("default")) 333 if value_type not in (int, float): 334 value_type = int 335 336 if "max" in settings: 337 try: 338 choice = min(settings["max"], value_type(choice)) 339 except (ValueError, TypeError): 340 if not silently_correct: 341 raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"])) 342 343 choice = settings.get("default") 344 345 if "min" in settings: 346 try: 347 choice = max(settings["min"], value_type(choice)) 348 except (ValueError, TypeError): 349 if not silently_correct: 350 raise QueryParametersException("Provide a value of %s or more." % str(settings["min"])) 351 352 choice = settings.get("default") 353 354 if choice is None or choice == "": 355 choice = settings.get("default") 356 357 if choice is None: 358 choice = 0 if "min" in settings or "max" in settings else "" 359 360 if settings.get("coerce_type"): 361 try: 362 return value_type(choice) 363 except (ValueError, TypeError): 364 return settings.get("default") 365 else: 366 return choice 367 368 else: 369 # no filtering 370 return choice
Filter user input
Makes sure user input for post-processors is valid and within the parameters specified by the post-processor
Parameters
- obj settings: Settings, including defaults and valid options
- choice: The chosen option, to be parsed
- dict other_input: Other input, as parsed so far
- bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns
Validated and parsed input