backend.workers.expire_items
Delete old items
1""" 2Delete old items 3""" 4import datetime 5import time 6import re 7 8from backend.lib.worker import BasicWorker 9from common.lib.dataset import DataSet 10from common.lib.exceptions import DataSetNotFoundException, WorkerInterruptedException 11 12from common.lib.user import User 13from common.config_manager import ConfigWrapper 14 15 16class ThingExpirer(BasicWorker): 17 """ 18 Delete old items 19 20 Deletes expired datasets. This may be useful for two reasons: to conserve 21 disk space and if the user agreement of a particular data source does not 22 allow storing scraped or extracted data for longer than a given amount of 23 time, as is the case for e.g. Tumblr. 24 25 Also deletes users that have an expiration date that is not zero. Users 26 with a close expiration date get a notification. 27 28 Also deletes expired notifications. 29 """ 30 type = "expire-datasets" 31 max_workers = 1 32 33 @classmethod 34 def ensure_job(cls, config=None): 35 """ 36 Ensure that the expirer is always running 37 38 This is used to ensure that the expirer is always running, and if it is 39 not, it will be started by the WorkerManager. 40 41 :return: Job parameters for the worker 42 """ 43 return {"remote_id": "localhost", "interval": 300} 44 45 def work(self): 46 """ 47 Delete datasets, users and notifications 48 """ 49 50 self.expire_datasets() 51 self.expire_users() 52 self.expire_notifications() 53 54 self.job.finish() 55 56 def expire_datasets(self): 57 """ 58 Delete expired datasets 59 """ 60 # find candidates 61 # todo: make this better - this can be a lot of datasets! 62 datasets = self.db.fetchall(""" 63 SELECT key FROM datasets 64 WHERE parameters::json->>'keep' IS NULL 65 """) 66 67 for dataset in datasets: 68 if self.interrupted: 69 raise WorkerInterruptedException("Interrupted while expiring datasets") 70 71 # the dataset creator's configuration context determines expiration 72 try: 73 dataset = DataSet(key=dataset["key"], db=self.db, modules=self.modules) 74 wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator)) 75 if dataset.is_expired(config=wrapper): 76 self.log.info(f"Deleting dataset {dataset.key} (expired)") 77 dataset.delete() 78 79 except DataSetNotFoundException: 80 # dataset already deleted I guess? 81 pass 82 83 def expire_users(self): 84 """ 85 Delete expired users 86 87 Users can have a `delete-after` parameter in their user data which 88 indicates a date or time after which the account should be deleted. 89 90 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 91 the current date is after the given date the account is deleted. If the 92 expiration date is within 7 days a notification is added for the user 93 to warn them. 94 """ 95 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 96 now = datetime.datetime.now() 97 98 for expiring_user in expiring_users: 99 if self.interrupted: 100 raise WorkerInterruptedException("Interrupted while expiring users") 101 102 user = User.get_by_name(self.db, expiring_user["name"], config=self.config) 103 username = user.data["name"] 104 105 # parse expiration date if available 106 delete_after = user.get_value("delete-after") 107 if not delete_after: 108 continue 109 110 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 111 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 112 elif re.match(r"^[0-9]+$", str(delete_after)): 113 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 114 else: 115 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 116 continue 117 118 # check if expired... 119 if expires_at < now: 120 self.log.info(f"User {username} expired - deleting user and datasets") 121 user.delete(modules=self.modules) 122 else: 123 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 124 user.add_notification(warning_notification) 125 126 def expire_notifications(self): 127 """ 128 Delete expired notifications 129 130 Pretty simple! 131 """ 132 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
17class ThingExpirer(BasicWorker): 18 """ 19 Delete old items 20 21 Deletes expired datasets. This may be useful for two reasons: to conserve 22 disk space and if the user agreement of a particular data source does not 23 allow storing scraped or extracted data for longer than a given amount of 24 time, as is the case for e.g. Tumblr. 25 26 Also deletes users that have an expiration date that is not zero. Users 27 with a close expiration date get a notification. 28 29 Also deletes expired notifications. 30 """ 31 type = "expire-datasets" 32 max_workers = 1 33 34 @classmethod 35 def ensure_job(cls, config=None): 36 """ 37 Ensure that the expirer is always running 38 39 This is used to ensure that the expirer is always running, and if it is 40 not, it will be started by the WorkerManager. 41 42 :return: Job parameters for the worker 43 """ 44 return {"remote_id": "localhost", "interval": 300} 45 46 def work(self): 47 """ 48 Delete datasets, users and notifications 49 """ 50 51 self.expire_datasets() 52 self.expire_users() 53 self.expire_notifications() 54 55 self.job.finish() 56 57 def expire_datasets(self): 58 """ 59 Delete expired datasets 60 """ 61 # find candidates 62 # todo: make this better - this can be a lot of datasets! 63 datasets = self.db.fetchall(""" 64 SELECT key FROM datasets 65 WHERE parameters::json->>'keep' IS NULL 66 """) 67 68 for dataset in datasets: 69 if self.interrupted: 70 raise WorkerInterruptedException("Interrupted while expiring datasets") 71 72 # the dataset creator's configuration context determines expiration 73 try: 74 dataset = DataSet(key=dataset["key"], db=self.db, modules=self.modules) 75 wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator)) 76 if dataset.is_expired(config=wrapper): 77 self.log.info(f"Deleting dataset {dataset.key} (expired)") 78 dataset.delete() 79 80 except DataSetNotFoundException: 81 # dataset already deleted I guess? 82 pass 83 84 def expire_users(self): 85 """ 86 Delete expired users 87 88 Users can have a `delete-after` parameter in their user data which 89 indicates a date or time after which the account should be deleted. 90 91 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 92 the current date is after the given date the account is deleted. If the 93 expiration date is within 7 days a notification is added for the user 94 to warn them. 95 """ 96 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 97 now = datetime.datetime.now() 98 99 for expiring_user in expiring_users: 100 if self.interrupted: 101 raise WorkerInterruptedException("Interrupted while expiring users") 102 103 user = User.get_by_name(self.db, expiring_user["name"], config=self.config) 104 username = user.data["name"] 105 106 # parse expiration date if available 107 delete_after = user.get_value("delete-after") 108 if not delete_after: 109 continue 110 111 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 112 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 113 elif re.match(r"^[0-9]+$", str(delete_after)): 114 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 115 else: 116 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 117 continue 118 119 # check if expired... 120 if expires_at < now: 121 self.log.info(f"User {username} expired - deleting user and datasets") 122 user.delete(modules=self.modules) 123 else: 124 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 125 user.add_notification(warning_notification) 126 127 def expire_notifications(self): 128 """ 129 Delete expired notifications 130 131 Pretty simple! 132 """ 133 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
Delete old items
Deletes expired datasets. This may be useful for two reasons: to conserve disk space and if the user agreement of a particular data source does not allow storing scraped or extracted data for longer than a given amount of time, as is the case for e.g. Tumblr.
Also deletes users that have an expiration date that is not zero. Users with a close expiration date get a notification.
Also deletes expired notifications.
34 @classmethod 35 def ensure_job(cls, config=None): 36 """ 37 Ensure that the expirer is always running 38 39 This is used to ensure that the expirer is always running, and if it is 40 not, it will be started by the WorkerManager. 41 42 :return: Job parameters for the worker 43 """ 44 return {"remote_id": "localhost", "interval": 300}
Ensure that the expirer is always running
This is used to ensure that the expirer is always running, and if it is not, it will be started by the WorkerManager.
Returns
Job parameters for the worker
46 def work(self): 47 """ 48 Delete datasets, users and notifications 49 """ 50 51 self.expire_datasets() 52 self.expire_users() 53 self.expire_notifications() 54 55 self.job.finish()
Delete datasets, users and notifications
57 def expire_datasets(self): 58 """ 59 Delete expired datasets 60 """ 61 # find candidates 62 # todo: make this better - this can be a lot of datasets! 63 datasets = self.db.fetchall(""" 64 SELECT key FROM datasets 65 WHERE parameters::json->>'keep' IS NULL 66 """) 67 68 for dataset in datasets: 69 if self.interrupted: 70 raise WorkerInterruptedException("Interrupted while expiring datasets") 71 72 # the dataset creator's configuration context determines expiration 73 try: 74 dataset = DataSet(key=dataset["key"], db=self.db, modules=self.modules) 75 wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator)) 76 if dataset.is_expired(config=wrapper): 77 self.log.info(f"Deleting dataset {dataset.key} (expired)") 78 dataset.delete() 79 80 except DataSetNotFoundException: 81 # dataset already deleted I guess? 82 pass
Delete expired datasets
84 def expire_users(self): 85 """ 86 Delete expired users 87 88 Users can have a `delete-after` parameter in their user data which 89 indicates a date or time after which the account should be deleted. 90 91 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 92 the current date is after the given date the account is deleted. If the 93 expiration date is within 7 days a notification is added for the user 94 to warn them. 95 """ 96 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 97 now = datetime.datetime.now() 98 99 for expiring_user in expiring_users: 100 if self.interrupted: 101 raise WorkerInterruptedException("Interrupted while expiring users") 102 103 user = User.get_by_name(self.db, expiring_user["name"], config=self.config) 104 username = user.data["name"] 105 106 # parse expiration date if available 107 delete_after = user.get_value("delete-after") 108 if not delete_after: 109 continue 110 111 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 112 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 113 elif re.match(r"^[0-9]+$", str(delete_after)): 114 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 115 else: 116 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 117 continue 118 119 # check if expired... 120 if expires_at < now: 121 self.log.info(f"User {username} expired - deleting user and datasets") 122 user.delete(modules=self.modules) 123 else: 124 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 125 user.add_notification(warning_notification)
Delete expired users
Users can have a delete-after
parameter in their user data which
indicates a date or time after which the account should be deleted.
The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If the current date is after the given date the account is deleted. If the expiration date is within 7 days a notification is added for the user to warn them.
127 def expire_notifications(self): 128 """ 129 Delete expired notifications 130 131 Pretty simple! 132 """ 133 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
Delete expired notifications
Pretty simple!