backend.workers.expire_items
Delete old items
1""" 2Delete old items 3""" 4import datetime 5import time 6import json 7import re 8 9from backend.lib.worker import BasicWorker 10from common.lib.dataset import DataSet 11from common.lib.exceptions import DataSetNotFoundException, WorkerInterruptedException 12 13from common.lib.user import User 14 15 16class ThingExpirer(BasicWorker): 17 """ 18 Delete old items 19 20 Deletes expired datasets. This may be useful for two reasons: to conserve 21 disk space and if the user agreement of a particular data source does not 22 allow storing scraped or extracted data for longer than a given amount of 23 time, as is the case for e.g. Tumblr. 24 25 Also deletes users that have an expiration date that is not zero. Users 26 with a close expiration date get a notification. 27 28 Also deletes expired notifications. 29 """ 30 type = "expire-datasets" 31 max_workers = 1 32 33 ensure_job = {"remote_id": "localhost", "interval": 300} 34 35 def work(self): 36 """ 37 Delete datasets, users and notifications 38 """ 39 40 self.expire_datasets() 41 self.expire_users() 42 self.expire_notifications() 43 44 self.job.finish() 45 46 def expire_datasets(self): 47 """ 48 Delete expired datasets 49 """ 50 # find candidates 51 # todo: make this better - this can be a lot of datasets! 52 datasets = self.db.fetchall(""" 53 SELECT key FROM datasets 54 WHERE parameters::json->>'keep' IS NULL 55 """) 56 57 for dataset in datasets: 58 if self.interrupted: 59 raise WorkerInterruptedException("Interrupted while expiring datasets") 60 61 try: 62 dataset = DataSet(key=dataset["key"], db=self.db) 63 if dataset.is_expired(): 64 self.log.info(f"Deleting dataset {dataset.key} (expired)") 65 dataset.delete() 66 67 except DataSetNotFoundException: 68 # dataset already deleted I guess? 69 pass 70 71 def expire_users(self): 72 """ 73 Delete expired users 74 75 Users can have a `delete-after` parameter in their user data which 76 indicates a date or time after which the account should be deleted. 77 78 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 79 the current date is after the given date the account is deleted. If the 80 expiration date is within 7 days a notification is added for the user 81 to warn them. 82 """ 83 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 84 now = datetime.datetime.now() 85 86 for expiring_user in expiring_users: 87 if self.interrupted: 88 raise WorkerInterruptedException("Interrupted while expiring users") 89 90 user = User.get_by_name(self.db, expiring_user["name"]) 91 username = user.data["name"] 92 93 # parse expiration date if available 94 delete_after = user.get_value("delete-after") 95 if not delete_after: 96 continue 97 98 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 99 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 100 elif re.match(r"^[0-9]+$", str(delete_after)): 101 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 102 else: 103 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 104 continue 105 106 # check if expired... 107 if expires_at < now: 108 self.log.info(f"User {username} expired - deleting user and datasets") 109 user.delete() 110 else: 111 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 112 user.add_notification(warning_notification) 113 114 def expire_notifications(self): 115 """ 116 Delete expired notifications 117 118 Pretty simple! 119 """ 120 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
17class ThingExpirer(BasicWorker): 18 """ 19 Delete old items 20 21 Deletes expired datasets. This may be useful for two reasons: to conserve 22 disk space and if the user agreement of a particular data source does not 23 allow storing scraped or extracted data for longer than a given amount of 24 time, as is the case for e.g. Tumblr. 25 26 Also deletes users that have an expiration date that is not zero. Users 27 with a close expiration date get a notification. 28 29 Also deletes expired notifications. 30 """ 31 type = "expire-datasets" 32 max_workers = 1 33 34 ensure_job = {"remote_id": "localhost", "interval": 300} 35 36 def work(self): 37 """ 38 Delete datasets, users and notifications 39 """ 40 41 self.expire_datasets() 42 self.expire_users() 43 self.expire_notifications() 44 45 self.job.finish() 46 47 def expire_datasets(self): 48 """ 49 Delete expired datasets 50 """ 51 # find candidates 52 # todo: make this better - this can be a lot of datasets! 53 datasets = self.db.fetchall(""" 54 SELECT key FROM datasets 55 WHERE parameters::json->>'keep' IS NULL 56 """) 57 58 for dataset in datasets: 59 if self.interrupted: 60 raise WorkerInterruptedException("Interrupted while expiring datasets") 61 62 try: 63 dataset = DataSet(key=dataset["key"], db=self.db) 64 if dataset.is_expired(): 65 self.log.info(f"Deleting dataset {dataset.key} (expired)") 66 dataset.delete() 67 68 except DataSetNotFoundException: 69 # dataset already deleted I guess? 70 pass 71 72 def expire_users(self): 73 """ 74 Delete expired users 75 76 Users can have a `delete-after` parameter in their user data which 77 indicates a date or time after which the account should be deleted. 78 79 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 80 the current date is after the given date the account is deleted. If the 81 expiration date is within 7 days a notification is added for the user 82 to warn them. 83 """ 84 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 85 now = datetime.datetime.now() 86 87 for expiring_user in expiring_users: 88 if self.interrupted: 89 raise WorkerInterruptedException("Interrupted while expiring users") 90 91 user = User.get_by_name(self.db, expiring_user["name"]) 92 username = user.data["name"] 93 94 # parse expiration date if available 95 delete_after = user.get_value("delete-after") 96 if not delete_after: 97 continue 98 99 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 100 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 101 elif re.match(r"^[0-9]+$", str(delete_after)): 102 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 103 else: 104 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 105 continue 106 107 # check if expired... 108 if expires_at < now: 109 self.log.info(f"User {username} expired - deleting user and datasets") 110 user.delete() 111 else: 112 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 113 user.add_notification(warning_notification) 114 115 def expire_notifications(self): 116 """ 117 Delete expired notifications 118 119 Pretty simple! 120 """ 121 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
Delete old items
Deletes expired datasets. This may be useful for two reasons: to conserve disk space and if the user agreement of a particular data source does not allow storing scraped or extracted data for longer than a given amount of time, as is the case for e.g. Tumblr.
Also deletes users that have an expiration date that is not zero. Users with a close expiration date get a notification.
Also deletes expired notifications.
36 def work(self): 37 """ 38 Delete datasets, users and notifications 39 """ 40 41 self.expire_datasets() 42 self.expire_users() 43 self.expire_notifications() 44 45 self.job.finish()
Delete datasets, users and notifications
47 def expire_datasets(self): 48 """ 49 Delete expired datasets 50 """ 51 # find candidates 52 # todo: make this better - this can be a lot of datasets! 53 datasets = self.db.fetchall(""" 54 SELECT key FROM datasets 55 WHERE parameters::json->>'keep' IS NULL 56 """) 57 58 for dataset in datasets: 59 if self.interrupted: 60 raise WorkerInterruptedException("Interrupted while expiring datasets") 61 62 try: 63 dataset = DataSet(key=dataset["key"], db=self.db) 64 if dataset.is_expired(): 65 self.log.info(f"Deleting dataset {dataset.key} (expired)") 66 dataset.delete() 67 68 except DataSetNotFoundException: 69 # dataset already deleted I guess? 70 pass
Delete expired datasets
72 def expire_users(self): 73 """ 74 Delete expired users 75 76 Users can have a `delete-after` parameter in their user data which 77 indicates a date or time after which the account should be deleted. 78 79 The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If 80 the current date is after the given date the account is deleted. If the 81 expiration date is within 7 days a notification is added for the user 82 to warn them. 83 """ 84 expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;") 85 now = datetime.datetime.now() 86 87 for expiring_user in expiring_users: 88 if self.interrupted: 89 raise WorkerInterruptedException("Interrupted while expiring users") 90 91 user = User.get_by_name(self.db, expiring_user["name"]) 92 username = user.data["name"] 93 94 # parse expiration date if available 95 delete_after = user.get_value("delete-after") 96 if not delete_after: 97 continue 98 99 if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)): 100 expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d") 101 elif re.match(r"^[0-9]+$", str(delete_after)): 102 expires_at = datetime.datetime.fromtimestamp(int(delete_after)) 103 else: 104 self.log.warning(f"User {username} has invalid expiration date {delete_after}") 105 continue 106 107 # check if expired... 108 if expires_at < now: 109 self.log.info(f"User {username} expired - deleting user and datasets") 110 user.delete() 111 else: 112 warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then." 113 user.add_notification(warning_notification)
Delete expired users
Users can have a delete-after
parameter in their user data which
indicates a date or time after which the account should be deleted.
The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If the current date is after the given date the account is deleted. If the expiration date is within 7 days a notification is added for the user to warn them.
115 def expire_notifications(self): 116 """ 117 Delete expired notifications 118 119 Pretty simple! 120 """ 121 self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
Delete expired notifications
Pretty simple!