backend.workers.expire_items
Delete old items
1""" 2Delete old items 3""" 4import datetime 5import time 6import re 7 8from backend.lib.worker import BasicWorker 9from common.lib.dataset import DataSet 10from common.lib.exceptions import DataSetNotFoundException, WorkerInterruptedException 11 12from common.lib.user import User 13from common.config_manager import ConfigWrapper 14 15 16class ThingExpirer(BasicWorker): 17 """ 18 Delete old items 19 20 Deletes expired datasets. This may be useful for two reasons: to conserve 21 disk space and if the user agreement of a particular data source does not 22 allow storing scraped or extracted data for longer than a given amount of 23 time, as is the case for e.g. Tumblr. 24 25 Also deletes users that have an expiration date that is not zero. Users 26 with a close expiration date get a notification. 27 28 Also deletes expired notifications. 29 """ 30 type = "expire-datasets" 31 max_workers = 1 32 33 ensure_job = {"remote_id": "localhost", "interval": 300} 34 35 def work(self): 36 """ 37 Delete datasets, users and notifications 38 """ 39 40 self.expire_datasets() 41 self.expire_users() 42 self.expire_notifications() 43 44 self.job.finish() 45 46 def expire_datasets(self): 47 """ 48 Delete expired datasets 49 """ 50 # find candidates 51 # todo: make this better - this can be a lot of datasets! 52 datasets = self.db.fetchall(""" 53 SELECT key FROM datasets 54 WHERE parameters::json->>'keep' IS NULL 55 """) 56 57 for dataset in datasets: 58 if self.interrupted: 59 raise WorkerInterruptedException("Interrupted while expiring datasets") 60 61 # the dataset creator's configuration context determines expiration 62 try: 63 dataset = DataSet(key=dataset["key"], db=self.db) 64 wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator)) 65 if dataset.is_expired(config=wrapper): 66 self.log.info(f"Deleting dataset {dataset.key} (expired)") 67 dataset.delete() 68 69 except DataSetNotFoundException: 70 # dataset already deleted I guess? 71 pass 72 73 def expire_users(self): 74 """ 75 Delete expired users 76 77 Users can have a `delete-after` parameter in their user data which 78 indicates a date or time after which the account should be deleted. 79 80 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 81 the current date is after the given date the account is deleted. If the 82 expiration date is within 7 days a notification is added for the user 83 to warn them. 84 """ 85 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 86 now = datetime.datetime.now() 87 88 for expiring_user in expiring_users: 89 if self.interrupted: 90 raise WorkerInterruptedException("Interrupted while expiring users") 91 92 user = User.get_by_name(self.db, expiring_user["name"], config=self.config) 93 username = user.data["name"] 94 95 # parse expiration date if available 96 delete_after = user.get_value("delete-after") 97 if not delete_after: 98 continue 99 100 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 101 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 102 elif re.match(r"^[0-9]+$", str(delete_after)): 103 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 104 else: 105 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 106 continue 107 108 # check if expired... 109 if expires_at < now: 110 self.log.info(f"User {username} expired - deleting user and datasets") 111 user.delete() 112 else: 113 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 114 user.add_notification(warning_notification) 115 116 def expire_notifications(self): 117 """ 118 Delete expired notifications 119 120 Pretty simple! 121 """ 122 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
17class ThingExpirer(BasicWorker): 18 """ 19 Delete old items 20 21 Deletes expired datasets. This may be useful for two reasons: to conserve 22 disk space and if the user agreement of a particular data source does not 23 allow storing scraped or extracted data for longer than a given amount of 24 time, as is the case for e.g. Tumblr. 25 26 Also deletes users that have an expiration date that is not zero. Users 27 with a close expiration date get a notification. 28 29 Also deletes expired notifications. 30 """ 31 type = "expire-datasets" 32 max_workers = 1 33 34 ensure_job = {"remote_id": "localhost", "interval": 300} 35 36 def work(self): 37 """ 38 Delete datasets, users and notifications 39 """ 40 41 self.expire_datasets() 42 self.expire_users() 43 self.expire_notifications() 44 45 self.job.finish() 46 47 def expire_datasets(self): 48 """ 49 Delete expired datasets 50 """ 51 # find candidates 52 # todo: make this better - this can be a lot of datasets! 53 datasets = self.db.fetchall(""" 54 SELECT key FROM datasets 55 WHERE parameters::json->>'keep' IS NULL 56 """) 57 58 for dataset in datasets: 59 if self.interrupted: 60 raise WorkerInterruptedException("Interrupted while expiring datasets") 61 62 # the dataset creator's configuration context determines expiration 63 try: 64 dataset = DataSet(key=dataset["key"], db=self.db) 65 wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator)) 66 if dataset.is_expired(config=wrapper): 67 self.log.info(f"Deleting dataset {dataset.key} (expired)") 68 dataset.delete() 69 70 except DataSetNotFoundException: 71 # dataset already deleted I guess? 72 pass 73 74 def expire_users(self): 75 """ 76 Delete expired users 77 78 Users can have a `delete-after` parameter in their user data which 79 indicates a date or time after which the account should be deleted. 80 81 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 82 the current date is after the given date the account is deleted. If the 83 expiration date is within 7 days a notification is added for the user 84 to warn them. 85 """ 86 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 87 now = datetime.datetime.now() 88 89 for expiring_user in expiring_users: 90 if self.interrupted: 91 raise WorkerInterruptedException("Interrupted while expiring users") 92 93 user = User.get_by_name(self.db, expiring_user["name"], config=self.config) 94 username = user.data["name"] 95 96 # parse expiration date if available 97 delete_after = user.get_value("delete-after") 98 if not delete_after: 99 continue 100 101 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 102 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 103 elif re.match(r"^[0-9]+$", str(delete_after)): 104 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 105 else: 106 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 107 continue 108 109 # check if expired... 110 if expires_at < now: 111 self.log.info(f"User {username} expired - deleting user and datasets") 112 user.delete() 113 else: 114 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 115 user.add_notification(warning_notification) 116 117 def expire_notifications(self): 118 """ 119 Delete expired notifications 120 121 Pretty simple! 122 """ 123 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
Delete old items
Deletes expired datasets. This may be useful for two reasons: to conserve disk space and if the user agreement of a particular data source does not allow storing scraped or extracted data for longer than a given amount of time, as is the case for e.g. Tumblr.
Also deletes users that have an expiration date that is not zero. Users with a close expiration date get a notification.
Also deletes expired notifications.
36 def work(self): 37 """ 38 Delete datasets, users and notifications 39 """ 40 41 self.expire_datasets() 42 self.expire_users() 43 self.expire_notifications() 44 45 self.job.finish()
Delete datasets, users and notifications
47 def expire_datasets(self): 48 """ 49 Delete expired datasets 50 """ 51 # find candidates 52 # todo: make this better - this can be a lot of datasets! 53 datasets = self.db.fetchall(""" 54 SELECT key FROM datasets 55 WHERE parameters::json->>'keep' IS NULL 56 """) 57 58 for dataset in datasets: 59 if self.interrupted: 60 raise WorkerInterruptedException("Interrupted while expiring datasets") 61 62 # the dataset creator's configuration context determines expiration 63 try: 64 dataset = DataSet(key=dataset["key"], db=self.db) 65 wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator)) 66 if dataset.is_expired(config=wrapper): 67 self.log.info(f"Deleting dataset {dataset.key} (expired)") 68 dataset.delete() 69 70 except DataSetNotFoundException: 71 # dataset already deleted I guess? 72 pass
Delete expired datasets
74 def expire_users(self): 75 """ 76 Delete expired users 77 78 Users can have a `delete-after` parameter in their user data which 79 indicates a date or time after which the account should be deleted. 80 81 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 82 the current date is after the given date the account is deleted. If the 83 expiration date is within 7 days a notification is added for the user 84 to warn them. 85 """ 86 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 87 now = datetime.datetime.now() 88 89 for expiring_user in expiring_users: 90 if self.interrupted: 91 raise WorkerInterruptedException("Interrupted while expiring users") 92 93 user = User.get_by_name(self.db, expiring_user["name"], config=self.config) 94 username = user.data["name"] 95 96 # parse expiration date if available 97 delete_after = user.get_value("delete-after") 98 if not delete_after: 99 continue 100 101 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 102 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 103 elif re.match(r"^[0-9]+$", str(delete_after)): 104 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 105 else: 106 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 107 continue 108 109 # check if expired... 110 if expires_at < now: 111 self.log.info(f"User {username} expired - deleting user and datasets") 112 user.delete() 113 else: 114 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 115 user.add_notification(warning_notification)
Delete expired users
Users can have a delete-after
parameter in their user data which
indicates a date or time after which the account should be deleted.
The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If the current date is after the given date the account is deleted. If the expiration date is within 7 days a notification is added for the user to warn them.
117 def expire_notifications(self): 118 """ 119 Delete expired notifications 120 121 Pretty simple! 122 """ 123 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
Delete expired notifications
Pretty simple!