common.lib.user_input
1from dateutil.parser import parse as parse_datetime 2from common.lib.exceptions import QueryParametersException 3from werkzeug.datastructures import ImmutableMultiDict 4import json 5 6import re 7 8class RequirementsNotMetException(Exception): 9 """ 10 If this is raised while parsing, that option is not included in the parsed 11 output. Used with the "requires" option setting. 12 """ 13 pass 14 15class UserInput: 16 """ 17 Class for handling user input 18 19 It is important to sanitise user input, as carelessly entered parameters 20 may in e.g. requesting far more data than needed, or lead to undefined 21 behaviour. This class offers a set of pre-defined value types that can be 22 consistently rendered as form elements in an interface and parsed. 23 """ 24 OPTION_TOGGLE = "toggle" # boolean toggle (checkbox) 25 OPTION_CHOICE = "choice" # one choice out of a list (select) 26 OPTION_TEXT = "string" # simple string or integer (input text) 27 OPTION_MULTI = "multi" # multiple values out of a list (select multiple) 28 OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) 29 OPTION_INFO = "info" # just a bit of text, not actual input 30 OPTION_TEXT_LARGE = "textarea" # longer text 31 OPTION_TEXT_JSON = "json" # text, but should be valid JSON 32 OPTION_DATE = "date" # a single date 33 OPTION_DATERANGE = "daterange" # a beginning and end date 34 OPTION_DIVIDER = "divider" # meta-option, divides related sets of options 35 OPTION_FILE = "file" # file upload 36 OPTION_HUE = "hue" # colour hue 37 OPTION_DATASOURCES = "datasources" # data source toggling 38 OPTION_DATASOURCES_TABLE = "datasources_table" # a table with settings per data source 39 OPTION_ANNOTATION = "annotation" # checkbox for whether to an annotation 40 OPTION_ANNOTATIONS = "annotations" # table for whether to write multiple annotations 41 42 OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER) 43 44 @staticmethod 45 def parse_all(options, input, silently_correct=True): 46 """ 47 Parse form input for the provided options 48 49 Ignores all input not belonging to any of the defined options: parses 50 and sanitises the rest, and returns a dictionary with the sanitised 51 options. If an option is *not* present in the input, the default value 52 is used, and if that is absent, `None`. 53 54 In other words, this ensures a dictionary with 1) only white-listed 55 keys, 2) a value of an expected type for each key. 56 57 :param dict options: Options, as a name -> settings dictionary 58 :param dict input: Input, as a form field -> value dictionary 59 :param bool silently_correct: If true, replace invalid values with the 60 given default value; else, raise a QueryParametersException if a value 61 is invalid. 62 63 :return dict: Sanitised form input 64 """ 65 66 from common.lib.helpers import convert_to_int 67 parsed_input = {} 68 69 if type(input) is not dict and type(input) is not ImmutableMultiDict: 70 raise TypeError("input must be a dictionary or ImmutableMultiDict") 71 72 if type(input) is ImmutableMultiDict: 73 # we are not using to_dict, because that messes up multi-selects 74 input = {key: input.getlist(key) for key in input} 75 for key, value in input.items(): 76 if type(value) is list and len(value) == 1: 77 input[key] = value[0] 78 79 # all parameters are submitted as option-[parameter ID], this is an 80 # artifact of how the web interface works and we can simply remove the 81 # prefix 82 input = {re.sub(r"^option-", "", field): input[field] for field in input} 83 84 # re-order input so that the fields relying on the value of other 85 # fields are parsed last 86 options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)} 87 88 for option, settings in options.items(): 89 if settings.get("indirect"): 90 # these are settings that are derived from and set by other 91 # settings 92 continue 93 94 if settings.get("type") in UserInput.OPTIONS_COSMETIC: 95 # these are structural form elements and never have a value 96 continue 97 98 elif settings.get("type") == UserInput.OPTION_DATERANGE: 99 # special case, since it combines two inputs 100 option_min = option + "-min" 101 option_max = option + "-max" 102 103 # normally this is taken care of client-side, but in case this 104 # didn't work, try to salvage it server-side 105 if option_min not in input or input.get(option_min) == "-1": 106 option_min += "_proxy" 107 108 if option_max not in input or input.get(option_max) == "-1": 109 option_max += "_proxy" 110 111 # save as a tuple of unix timestamps (or None) 112 try: 113 after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct)) 114 115 if before and after and after > before: 116 if not silently_correct: 117 raise QueryParametersException("End of date range must be after beginning of date range.") 118 else: 119 before = after 120 121 parsed_input[option] = (after, before) 122 except RequirementsNotMetException: 123 pass 124 125 elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 126 # special case too, since if a checkbox is unchecked, it simply 127 # does not show up in the input 128 try: 129 if option in input: 130 # Toggle needs to be parsed 131 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 132 else: 133 # Toggle was left blank 134 parsed_input[option] = False 135 except RequirementsNotMetException: 136 pass 137 138 elif settings.get("type") == UserInput.OPTION_DATASOURCES: 139 # special case, because this combines multiple inputs to 140 # configure data source availability and expiration 141 datasources = {datasource: { 142 "enabled": f"{option}-enable-{datasource}" in input, 143 "allow_optout": f"{option}-optout-{datasource}" in input, 144 "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0) 145 } for datasource in input[option].split(",")} 146 147 parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]] 148 parsed_input[option.split(".")[0] + ".expiration"] = datasources 149 150 elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE: 151 # special case, parse table values to generate a dict 152 columns = list(settings["columns"].keys()) 153 table_input = {} 154 155 for datasource in list(settings["default"].keys()): 156 table_input[datasource] = {} 157 for column in columns: 158 159 choice = input.get(option + "-" + datasource + "-" + column, False) 160 column_settings = settings["columns"][column] # sub-settings per column 161 table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True) 162 163 parsed_input[option] = table_input 164 165 elif option not in input: 166 # not provided? use default 167 parsed_input[option] = settings.get("default", None) 168 169 else: 170 # normal parsing and sanitisation 171 try: 172 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 173 except RequirementsNotMetException: 174 pass 175 176 return parsed_input 177 178 @staticmethod 179 def parse_value(settings, choice, other_input=None, silently_correct=True): 180 """ 181 Filter user input 182 183 Makes sure user input for post-processors is valid and within the 184 parameters specified by the post-processor 185 186 :param obj settings: Settings, including defaults and valid options 187 :param choice: The chosen option, to be parsed 188 :param dict other_input: Other input, as parsed so far 189 :param bool silently_correct: If true, replace invalid values with the 190 given default value; else, raise a QueryParametersException if a value 191 is invalid. 192 193 :return: Validated and parsed input 194 """ 195 # short-circuit if there is a requirement for the field to be parsed 196 # and the requirement isn't met 197 if settings.get("requires"): 198 try: 199 field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0] 200 except IndexError: 201 # invalid condition, interpret as 'does the field with this name have a value' 202 field, operator, value = (choice, "!=", "") 203 204 if field not in other_input: 205 raise RequirementsNotMetException() 206 207 other_value = other_input.get(field) 208 if type(other_value) is bool: 209 # evalues to a boolean, i.e. checkboxes etc 210 if operator == "!=": 211 if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")): 212 raise RequirementsNotMetException() 213 else: 214 if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")): 215 raise RequirementsNotMetException() 216 217 else: 218 if type(other_value) in (tuple, list): 219 # iterables are a bit special 220 if len(other_value) == 1: 221 # treat one-item lists as "normal" values 222 other_value = other_value[0] 223 elif operator == "~=": # interpret as 'is in list?' 224 if value not in other_value: 225 raise RequirementsNotMetException() 226 else: 227 # condition doesn't make sense for a list, so assume it's not True 228 raise RequirementsNotMetException() 229 230 if operator == "^=" and not str(other_value).startswith(value): 231 raise RequirementsNotMetException() 232 elif operator == "$=" and not str(other_value).endswith(value): 233 raise RequirementsNotMetException() 234 elif operator == "~=" and value not in str(other_value): 235 raise RequirementsNotMetException() 236 elif operator == "!=" and value == other_value: 237 raise RequirementsNotMetException() 238 elif operator in ("==", "=") and value != other_value: 239 raise RequirementsNotMetException() 240 241 input_type = settings.get("type", "") 242 if input_type in UserInput.OPTIONS_COSMETIC: 243 # these are structural form elements and can never return a value 244 return None 245 246 elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 247 # simple boolean toggle 248 if type(choice) is bool: 249 return choice 250 elif choice in ['false', 'False']: 251 # Sanitized options passed back to Flask can be converted to strings as 'false' 252 return False 253 elif choice in ['true', 'True', 'on']: 254 # Toggle will have value 'on', but may also becomes a string 'true' 255 return True 256 else: 257 raise QueryParametersException("Toggle invalid input") 258 259 elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE): 260 # parse either integers (unix timestamps) or try to guess the date 261 # format (the latter may be used for input if JavaScript is turned 262 # off in the front-end and the input comes from there) 263 value = None 264 try: 265 value = int(choice) 266 except ValueError: 267 parsed_choice = parse_datetime(choice) 268 value = int(parsed_choice.timestamp()) 269 finally: 270 return value 271 272 elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS): 273 # any number of values out of a list of possible values 274 # comma-separated during input, returned as a list of valid options 275 if not choice: 276 return settings.get("default", []) 277 278 chosen = choice.split(",") 279 return [item for item in chosen if item in settings.get("options", [])] 280 281 elif input_type == UserInput.OPTION_MULTI_SELECT: 282 # multiple number of values out of a dropdown list of possible values 283 # comma-separated during input, returned as a list of valid options 284 if not choice: 285 return settings.get("default", []) 286 287 if type(choice) is str: 288 # should be a list if the form control was actually a multiselect 289 # but we have some client side UI helpers that may produce a string 290 # instead 291 choice = choice.split(",") 292 293 return [item for item in choice if item in settings.get("options", [])] 294 295 elif input_type == UserInput.OPTION_CHOICE: 296 # select box 297 # one out of multiple options 298 # return option if valid, or default 299 if choice not in settings.get("options"): 300 if not silently_correct: 301 raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") 302 else: 303 return settings.get("default", "") 304 else: 305 return choice 306 307 elif input_type == UserInput.OPTION_TEXT_JSON: 308 # verify that this is actually json 309 try: 310 json.dumps(json.loads(choice)) 311 except json.JSONDecodeError: 312 raise QueryParametersException("Invalid JSON value '%s'" % choice) 313 314 return json.loads(choice) 315 316 elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE): 317 # text string 318 # optionally clamp it as an integer; return default if not a valid 319 # integer (or float; inferred from default or made explicit via the 320 # coerce_type setting) 321 if settings.get("coerce_type"): 322 value_type = settings["coerce_type"] 323 else: 324 value_type = type(settings.get("default")) 325 if value_type not in (int, float): 326 value_type = int 327 328 if "max" in settings: 329 try: 330 choice = min(settings["max"], value_type(choice)) 331 except (ValueError, TypeError): 332 if not silently_correct: 333 raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"])) 334 335 choice = settings.get("default") 336 337 if "min" in settings: 338 try: 339 choice = max(settings["min"], value_type(choice)) 340 except (ValueError, TypeError): 341 if not silently_correct: 342 raise QueryParametersException("Provide a value of %s or more." % str(settings["min"])) 343 344 choice = settings.get("default") 345 346 if choice is None or choice == "": 347 choice = settings.get("default") 348 349 if choice is None: 350 choice = 0 if "min" in settings or "max" in settings else "" 351 352 if settings.get("coerce_type"): 353 try: 354 return value_type(choice) 355 except (ValueError, TypeError): 356 return settings.get("default") 357 else: 358 return choice 359 360 else: 361 # no filtering 362 return choice
9class RequirementsNotMetException(Exception): 10 """ 11 If this is raised while parsing, that option is not included in the parsed 12 output. Used with the "requires" option setting. 13 """ 14 pass
If this is raised while parsing, that option is not included in the parsed output. Used with the "requires" option setting.
16class UserInput: 17 """ 18 Class for handling user input 19 20 It is important to sanitise user input, as carelessly entered parameters 21 may in e.g. requesting far more data than needed, or lead to undefined 22 behaviour. This class offers a set of pre-defined value types that can be 23 consistently rendered as form elements in an interface and parsed. 24 """ 25 OPTION_TOGGLE = "toggle" # boolean toggle (checkbox) 26 OPTION_CHOICE = "choice" # one choice out of a list (select) 27 OPTION_TEXT = "string" # simple string or integer (input text) 28 OPTION_MULTI = "multi" # multiple values out of a list (select multiple) 29 OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) 30 OPTION_INFO = "info" # just a bit of text, not actual input 31 OPTION_TEXT_LARGE = "textarea" # longer text 32 OPTION_TEXT_JSON = "json" # text, but should be valid JSON 33 OPTION_DATE = "date" # a single date 34 OPTION_DATERANGE = "daterange" # a beginning and end date 35 OPTION_DIVIDER = "divider" # meta-option, divides related sets of options 36 OPTION_FILE = "file" # file upload 37 OPTION_HUE = "hue" # colour hue 38 OPTION_DATASOURCES = "datasources" # data source toggling 39 OPTION_DATASOURCES_TABLE = "datasources_table" # a table with settings per data source 40 OPTION_ANNOTATION = "annotation" # checkbox for whether to an annotation 41 OPTION_ANNOTATIONS = "annotations" # table for whether to write multiple annotations 42 43 OPTIONS_COSMETIC = (OPTION_INFO, OPTION_DIVIDER) 44 45 @staticmethod 46 def parse_all(options, input, silently_correct=True): 47 """ 48 Parse form input for the provided options 49 50 Ignores all input not belonging to any of the defined options: parses 51 and sanitises the rest, and returns a dictionary with the sanitised 52 options. If an option is *not* present in the input, the default value 53 is used, and if that is absent, `None`. 54 55 In other words, this ensures a dictionary with 1) only white-listed 56 keys, 2) a value of an expected type for each key. 57 58 :param dict options: Options, as a name -> settings dictionary 59 :param dict input: Input, as a form field -> value dictionary 60 :param bool silently_correct: If true, replace invalid values with the 61 given default value; else, raise a QueryParametersException if a value 62 is invalid. 63 64 :return dict: Sanitised form input 65 """ 66 67 from common.lib.helpers import convert_to_int 68 parsed_input = {} 69 70 if type(input) is not dict and type(input) is not ImmutableMultiDict: 71 raise TypeError("input must be a dictionary or ImmutableMultiDict") 72 73 if type(input) is ImmutableMultiDict: 74 # we are not using to_dict, because that messes up multi-selects 75 input = {key: input.getlist(key) for key in input} 76 for key, value in input.items(): 77 if type(value) is list and len(value) == 1: 78 input[key] = value[0] 79 80 # all parameters are submitted as option-[parameter ID], this is an 81 # artifact of how the web interface works and we can simply remove the 82 # prefix 83 input = {re.sub(r"^option-", "", field): input[field] for field in input} 84 85 # re-order input so that the fields relying on the value of other 86 # fields are parsed last 87 options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)} 88 89 for option, settings in options.items(): 90 if settings.get("indirect"): 91 # these are settings that are derived from and set by other 92 # settings 93 continue 94 95 if settings.get("type") in UserInput.OPTIONS_COSMETIC: 96 # these are structural form elements and never have a value 97 continue 98 99 elif settings.get("type") == UserInput.OPTION_DATERANGE: 100 # special case, since it combines two inputs 101 option_min = option + "-min" 102 option_max = option + "-max" 103 104 # normally this is taken care of client-side, but in case this 105 # didn't work, try to salvage it server-side 106 if option_min not in input or input.get(option_min) == "-1": 107 option_min += "_proxy" 108 109 if option_max not in input or input.get(option_max) == "-1": 110 option_max += "_proxy" 111 112 # save as a tuple of unix timestamps (or None) 113 try: 114 after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct)) 115 116 if before and after and after > before: 117 if not silently_correct: 118 raise QueryParametersException("End of date range must be after beginning of date range.") 119 else: 120 before = after 121 122 parsed_input[option] = (after, before) 123 except RequirementsNotMetException: 124 pass 125 126 elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 127 # special case too, since if a checkbox is unchecked, it simply 128 # does not show up in the input 129 try: 130 if option in input: 131 # Toggle needs to be parsed 132 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 133 else: 134 # Toggle was left blank 135 parsed_input[option] = False 136 except RequirementsNotMetException: 137 pass 138 139 elif settings.get("type") == UserInput.OPTION_DATASOURCES: 140 # special case, because this combines multiple inputs to 141 # configure data source availability and expiration 142 datasources = {datasource: { 143 "enabled": f"{option}-enable-{datasource}" in input, 144 "allow_optout": f"{option}-optout-{datasource}" in input, 145 "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0) 146 } for datasource in input[option].split(",")} 147 148 parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]] 149 parsed_input[option.split(".")[0] + ".expiration"] = datasources 150 151 elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE: 152 # special case, parse table values to generate a dict 153 columns = list(settings["columns"].keys()) 154 table_input = {} 155 156 for datasource in list(settings["default"].keys()): 157 table_input[datasource] = {} 158 for column in columns: 159 160 choice = input.get(option + "-" + datasource + "-" + column, False) 161 column_settings = settings["columns"][column] # sub-settings per column 162 table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True) 163 164 parsed_input[option] = table_input 165 166 elif option not in input: 167 # not provided? use default 168 parsed_input[option] = settings.get("default", None) 169 170 else: 171 # normal parsing and sanitisation 172 try: 173 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 174 except RequirementsNotMetException: 175 pass 176 177 return parsed_input 178 179 @staticmethod 180 def parse_value(settings, choice, other_input=None, silently_correct=True): 181 """ 182 Filter user input 183 184 Makes sure user input for post-processors is valid and within the 185 parameters specified by the post-processor 186 187 :param obj settings: Settings, including defaults and valid options 188 :param choice: The chosen option, to be parsed 189 :param dict other_input: Other input, as parsed so far 190 :param bool silently_correct: If true, replace invalid values with the 191 given default value; else, raise a QueryParametersException if a value 192 is invalid. 193 194 :return: Validated and parsed input 195 """ 196 # short-circuit if there is a requirement for the field to be parsed 197 # and the requirement isn't met 198 if settings.get("requires"): 199 try: 200 field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0] 201 except IndexError: 202 # invalid condition, interpret as 'does the field with this name have a value' 203 field, operator, value = (choice, "!=", "") 204 205 if field not in other_input: 206 raise RequirementsNotMetException() 207 208 other_value = other_input.get(field) 209 if type(other_value) is bool: 210 # evalues to a boolean, i.e. checkboxes etc 211 if operator == "!=": 212 if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")): 213 raise RequirementsNotMetException() 214 else: 215 if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")): 216 raise RequirementsNotMetException() 217 218 else: 219 if type(other_value) in (tuple, list): 220 # iterables are a bit special 221 if len(other_value) == 1: 222 # treat one-item lists as "normal" values 223 other_value = other_value[0] 224 elif operator == "~=": # interpret as 'is in list?' 225 if value not in other_value: 226 raise RequirementsNotMetException() 227 else: 228 # condition doesn't make sense for a list, so assume it's not True 229 raise RequirementsNotMetException() 230 231 if operator == "^=" and not str(other_value).startswith(value): 232 raise RequirementsNotMetException() 233 elif operator == "$=" and not str(other_value).endswith(value): 234 raise RequirementsNotMetException() 235 elif operator == "~=" and value not in str(other_value): 236 raise RequirementsNotMetException() 237 elif operator == "!=" and value == other_value: 238 raise RequirementsNotMetException() 239 elif operator in ("==", "=") and value != other_value: 240 raise RequirementsNotMetException() 241 242 input_type = settings.get("type", "") 243 if input_type in UserInput.OPTIONS_COSMETIC: 244 # these are structural form elements and can never return a value 245 return None 246 247 elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 248 # simple boolean toggle 249 if type(choice) is bool: 250 return choice 251 elif choice in ['false', 'False']: 252 # Sanitized options passed back to Flask can be converted to strings as 'false' 253 return False 254 elif choice in ['true', 'True', 'on']: 255 # Toggle will have value 'on', but may also becomes a string 'true' 256 return True 257 else: 258 raise QueryParametersException("Toggle invalid input") 259 260 elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE): 261 # parse either integers (unix timestamps) or try to guess the date 262 # format (the latter may be used for input if JavaScript is turned 263 # off in the front-end and the input comes from there) 264 value = None 265 try: 266 value = int(choice) 267 except ValueError: 268 parsed_choice = parse_datetime(choice) 269 value = int(parsed_choice.timestamp()) 270 finally: 271 return value 272 273 elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS): 274 # any number of values out of a list of possible values 275 # comma-separated during input, returned as a list of valid options 276 if not choice: 277 return settings.get("default", []) 278 279 chosen = choice.split(",") 280 return [item for item in chosen if item in settings.get("options", [])] 281 282 elif input_type == UserInput.OPTION_MULTI_SELECT: 283 # multiple number of values out of a dropdown list of possible values 284 # comma-separated during input, returned as a list of valid options 285 if not choice: 286 return settings.get("default", []) 287 288 if type(choice) is str: 289 # should be a list if the form control was actually a multiselect 290 # but we have some client side UI helpers that may produce a string 291 # instead 292 choice = choice.split(",") 293 294 return [item for item in choice if item in settings.get("options", [])] 295 296 elif input_type == UserInput.OPTION_CHOICE: 297 # select box 298 # one out of multiple options 299 # return option if valid, or default 300 if choice not in settings.get("options"): 301 if not silently_correct: 302 raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") 303 else: 304 return settings.get("default", "") 305 else: 306 return choice 307 308 elif input_type == UserInput.OPTION_TEXT_JSON: 309 # verify that this is actually json 310 try: 311 json.dumps(json.loads(choice)) 312 except json.JSONDecodeError: 313 raise QueryParametersException("Invalid JSON value '%s'" % choice) 314 315 return json.loads(choice) 316 317 elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE): 318 # text string 319 # optionally clamp it as an integer; return default if not a valid 320 # integer (or float; inferred from default or made explicit via the 321 # coerce_type setting) 322 if settings.get("coerce_type"): 323 value_type = settings["coerce_type"] 324 else: 325 value_type = type(settings.get("default")) 326 if value_type not in (int, float): 327 value_type = int 328 329 if "max" in settings: 330 try: 331 choice = min(settings["max"], value_type(choice)) 332 except (ValueError, TypeError): 333 if not silently_correct: 334 raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"])) 335 336 choice = settings.get("default") 337 338 if "min" in settings: 339 try: 340 choice = max(settings["min"], value_type(choice)) 341 except (ValueError, TypeError): 342 if not silently_correct: 343 raise QueryParametersException("Provide a value of %s or more." % str(settings["min"])) 344 345 choice = settings.get("default") 346 347 if choice is None or choice == "": 348 choice = settings.get("default") 349 350 if choice is None: 351 choice = 0 if "min" in settings or "max" in settings else "" 352 353 if settings.get("coerce_type"): 354 try: 355 return value_type(choice) 356 except (ValueError, TypeError): 357 return settings.get("default") 358 else: 359 return choice 360 361 else: 362 # no filtering 363 return choice
Class for handling user input
It is important to sanitise user input, as carelessly entered parameters may in e.g. requesting far more data than needed, or lead to undefined behaviour. This class offers a set of pre-defined value types that can be consistently rendered as form elements in an interface and parsed.
45 @staticmethod 46 def parse_all(options, input, silently_correct=True): 47 """ 48 Parse form input for the provided options 49 50 Ignores all input not belonging to any of the defined options: parses 51 and sanitises the rest, and returns a dictionary with the sanitised 52 options. If an option is *not* present in the input, the default value 53 is used, and if that is absent, `None`. 54 55 In other words, this ensures a dictionary with 1) only white-listed 56 keys, 2) a value of an expected type for each key. 57 58 :param dict options: Options, as a name -> settings dictionary 59 :param dict input: Input, as a form field -> value dictionary 60 :param bool silently_correct: If true, replace invalid values with the 61 given default value; else, raise a QueryParametersException if a value 62 is invalid. 63 64 :return dict: Sanitised form input 65 """ 66 67 from common.lib.helpers import convert_to_int 68 parsed_input = {} 69 70 if type(input) is not dict and type(input) is not ImmutableMultiDict: 71 raise TypeError("input must be a dictionary or ImmutableMultiDict") 72 73 if type(input) is ImmutableMultiDict: 74 # we are not using to_dict, because that messes up multi-selects 75 input = {key: input.getlist(key) for key in input} 76 for key, value in input.items(): 77 if type(value) is list and len(value) == 1: 78 input[key] = value[0] 79 80 # all parameters are submitted as option-[parameter ID], this is an 81 # artifact of how the web interface works and we can simply remove the 82 # prefix 83 input = {re.sub(r"^option-", "", field): input[field] for field in input} 84 85 # re-order input so that the fields relying on the value of other 86 # fields are parsed last 87 options = {k: options[k] for k in sorted(options, key=lambda k: options[k].get("requires") is not None)} 88 89 for option, settings in options.items(): 90 if settings.get("indirect"): 91 # these are settings that are derived from and set by other 92 # settings 93 continue 94 95 if settings.get("type") in UserInput.OPTIONS_COSMETIC: 96 # these are structural form elements and never have a value 97 continue 98 99 elif settings.get("type") == UserInput.OPTION_DATERANGE: 100 # special case, since it combines two inputs 101 option_min = option + "-min" 102 option_max = option + "-max" 103 104 # normally this is taken care of client-side, but in case this 105 # didn't work, try to salvage it server-side 106 if option_min not in input or input.get(option_min) == "-1": 107 option_min += "_proxy" 108 109 if option_max not in input or input.get(option_max) == "-1": 110 option_max += "_proxy" 111 112 # save as a tuple of unix timestamps (or None) 113 try: 114 after, before = (UserInput.parse_value(settings, input.get(option_min), parsed_input, silently_correct), UserInput.parse_value(settings, input.get(option_max), parsed_input, silently_correct)) 115 116 if before and after and after > before: 117 if not silently_correct: 118 raise QueryParametersException("End of date range must be after beginning of date range.") 119 else: 120 before = after 121 122 parsed_input[option] = (after, before) 123 except RequirementsNotMetException: 124 pass 125 126 elif settings.get("type") in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 127 # special case too, since if a checkbox is unchecked, it simply 128 # does not show up in the input 129 try: 130 if option in input: 131 # Toggle needs to be parsed 132 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 133 else: 134 # Toggle was left blank 135 parsed_input[option] = False 136 except RequirementsNotMetException: 137 pass 138 139 elif settings.get("type") == UserInput.OPTION_DATASOURCES: 140 # special case, because this combines multiple inputs to 141 # configure data source availability and expiration 142 datasources = {datasource: { 143 "enabled": f"{option}-enable-{datasource}" in input, 144 "allow_optout": f"{option}-optout-{datasource}" in input, 145 "timeout": convert_to_int(input[f"{option}-timeout-{datasource}"], 0) 146 } for datasource in input[option].split(",")} 147 148 parsed_input[option] = [datasource for datasource, v in datasources.items() if v["enabled"]] 149 parsed_input[option.split(".")[0] + ".expiration"] = datasources 150 151 elif settings.get("type") == UserInput.OPTION_DATASOURCES_TABLE: 152 # special case, parse table values to generate a dict 153 columns = list(settings["columns"].keys()) 154 table_input = {} 155 156 for datasource in list(settings["default"].keys()): 157 table_input[datasource] = {} 158 for column in columns: 159 160 choice = input.get(option + "-" + datasource + "-" + column, False) 161 column_settings = settings["columns"][column] # sub-settings per column 162 table_input[datasource][column] = UserInput.parse_value(column_settings, choice, table_input, silently_correct=True) 163 164 parsed_input[option] = table_input 165 166 elif option not in input: 167 # not provided? use default 168 parsed_input[option] = settings.get("default", None) 169 170 else: 171 # normal parsing and sanitisation 172 try: 173 parsed_input[option] = UserInput.parse_value(settings, input[option], parsed_input, silently_correct) 174 except RequirementsNotMetException: 175 pass 176 177 return parsed_input
Parse form input for the provided options
Ignores all input not belonging to any of the defined options: parses
and sanitises the rest, and returns a dictionary with the sanitised
options. If an option is not present in the input, the default value
is used, and if that is absent, None
.
In other words, this ensures a dictionary with 1) only white-listed keys, 2) a value of an expected type for each key.
Parameters
- dict options: Options, as a name -> settings dictionary
- dict input: Input, as a form field -> value dictionary
- bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns
Sanitised form input
179 @staticmethod 180 def parse_value(settings, choice, other_input=None, silently_correct=True): 181 """ 182 Filter user input 183 184 Makes sure user input for post-processors is valid and within the 185 parameters specified by the post-processor 186 187 :param obj settings: Settings, including defaults and valid options 188 :param choice: The chosen option, to be parsed 189 :param dict other_input: Other input, as parsed so far 190 :param bool silently_correct: If true, replace invalid values with the 191 given default value; else, raise a QueryParametersException if a value 192 is invalid. 193 194 :return: Validated and parsed input 195 """ 196 # short-circuit if there is a requirement for the field to be parsed 197 # and the requirement isn't met 198 if settings.get("requires"): 199 try: 200 field, operator, value = re.findall(r"([a-zA-Z0-9_-]+)([!=$~^]+)(.*)", settings.get("requires"))[0] 201 except IndexError: 202 # invalid condition, interpret as 'does the field with this name have a value' 203 field, operator, value = (choice, "!=", "") 204 205 if field not in other_input: 206 raise RequirementsNotMetException() 207 208 other_value = other_input.get(field) 209 if type(other_value) is bool: 210 # evalues to a boolean, i.e. checkboxes etc 211 if operator == "!=": 212 if (other_value and value in ("", "false")) or (not other_value and value in ("true", "checked")): 213 raise RequirementsNotMetException() 214 else: 215 if (other_value and value not in ("true", "checked")) or (not other_value and value not in ("", "false")): 216 raise RequirementsNotMetException() 217 218 else: 219 if type(other_value) in (tuple, list): 220 # iterables are a bit special 221 if len(other_value) == 1: 222 # treat one-item lists as "normal" values 223 other_value = other_value[0] 224 elif operator == "~=": # interpret as 'is in list?' 225 if value not in other_value: 226 raise RequirementsNotMetException() 227 else: 228 # condition doesn't make sense for a list, so assume it's not True 229 raise RequirementsNotMetException() 230 231 if operator == "^=" and not str(other_value).startswith(value): 232 raise RequirementsNotMetException() 233 elif operator == "$=" and not str(other_value).endswith(value): 234 raise RequirementsNotMetException() 235 elif operator == "~=" and value not in str(other_value): 236 raise RequirementsNotMetException() 237 elif operator == "!=" and value == other_value: 238 raise RequirementsNotMetException() 239 elif operator in ("==", "=") and value != other_value: 240 raise RequirementsNotMetException() 241 242 input_type = settings.get("type", "") 243 if input_type in UserInput.OPTIONS_COSMETIC: 244 # these are structural form elements and can never return a value 245 return None 246 247 elif input_type in (UserInput.OPTION_TOGGLE, UserInput.OPTION_ANNOTATION): 248 # simple boolean toggle 249 if type(choice) is bool: 250 return choice 251 elif choice in ['false', 'False']: 252 # Sanitized options passed back to Flask can be converted to strings as 'false' 253 return False 254 elif choice in ['true', 'True', 'on']: 255 # Toggle will have value 'on', but may also becomes a string 'true' 256 return True 257 else: 258 raise QueryParametersException("Toggle invalid input") 259 260 elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE): 261 # parse either integers (unix timestamps) or try to guess the date 262 # format (the latter may be used for input if JavaScript is turned 263 # off in the front-end and the input comes from there) 264 value = None 265 try: 266 value = int(choice) 267 except ValueError: 268 parsed_choice = parse_datetime(choice) 269 value = int(parsed_choice.timestamp()) 270 finally: 271 return value 272 273 elif input_type in (UserInput.OPTION_MULTI, UserInput.OPTION_ANNOTATIONS): 274 # any number of values out of a list of possible values 275 # comma-separated during input, returned as a list of valid options 276 if not choice: 277 return settings.get("default", []) 278 279 chosen = choice.split(",") 280 return [item for item in chosen if item in settings.get("options", [])] 281 282 elif input_type == UserInput.OPTION_MULTI_SELECT: 283 # multiple number of values out of a dropdown list of possible values 284 # comma-separated during input, returned as a list of valid options 285 if not choice: 286 return settings.get("default", []) 287 288 if type(choice) is str: 289 # should be a list if the form control was actually a multiselect 290 # but we have some client side UI helpers that may produce a string 291 # instead 292 choice = choice.split(",") 293 294 return [item for item in choice if item in settings.get("options", [])] 295 296 elif input_type == UserInput.OPTION_CHOICE: 297 # select box 298 # one out of multiple options 299 # return option if valid, or default 300 if choice not in settings.get("options"): 301 if not silently_correct: 302 raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") 303 else: 304 return settings.get("default", "") 305 else: 306 return choice 307 308 elif input_type == UserInput.OPTION_TEXT_JSON: 309 # verify that this is actually json 310 try: 311 json.dumps(json.loads(choice)) 312 except json.JSONDecodeError: 313 raise QueryParametersException("Invalid JSON value '%s'" % choice) 314 315 return json.loads(choice) 316 317 elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE, UserInput.OPTION_HUE): 318 # text string 319 # optionally clamp it as an integer; return default if not a valid 320 # integer (or float; inferred from default or made explicit via the 321 # coerce_type setting) 322 if settings.get("coerce_type"): 323 value_type = settings["coerce_type"] 324 else: 325 value_type = type(settings.get("default")) 326 if value_type not in (int, float): 327 value_type = int 328 329 if "max" in settings: 330 try: 331 choice = min(settings["max"], value_type(choice)) 332 except (ValueError, TypeError): 333 if not silently_correct: 334 raise QueryParametersException("Provide a value of %s or lower." % str(settings["max"])) 335 336 choice = settings.get("default") 337 338 if "min" in settings: 339 try: 340 choice = max(settings["min"], value_type(choice)) 341 except (ValueError, TypeError): 342 if not silently_correct: 343 raise QueryParametersException("Provide a value of %s or more." % str(settings["min"])) 344 345 choice = settings.get("default") 346 347 if choice is None or choice == "": 348 choice = settings.get("default") 349 350 if choice is None: 351 choice = 0 if "min" in settings or "max" in settings else "" 352 353 if settings.get("coerce_type"): 354 try: 355 return value_type(choice) 356 except (ValueError, TypeError): 357 return settings.get("default") 358 else: 359 return choice 360 361 else: 362 # no filtering 363 return choice
Filter user input
Makes sure user input for post-processors is valid and within the parameters specified by the post-processor
Parameters
- obj settings: Settings, including defaults and valid options
- choice: The chosen option, to be parsed
- dict other_input: Other input, as parsed so far
- bool silently_correct: If true, replace invalid values with the given default value; else, raise a QueryParametersException if a value is invalid.
Returns
Validated and parsed input