Edit on GitHub

backend.workers.expire_items

Delete old items

  1"""
  2Delete old items
  3"""
  4import datetime
  5import time
  6import re
  7
  8from backend.lib.worker import BasicWorker
  9from common.lib.dataset import DataSet
 10from common.lib.exceptions import DataSetNotFoundException, WorkerInterruptedException
 11
 12from common.lib.user import User
 13from common.config_manager import ConfigWrapper
 14
 15
 16class ThingExpirer(BasicWorker):
 17	"""
 18	Delete old items
 19
 20	Deletes expired datasets. This may be useful for two reasons: to conserve
 21	disk space and if the user agreement of a particular data source does not
 22	allow storing scraped or extracted data for longer than a given amount of
 23	time, as is the case for e.g. Tumblr.
 24
 25	Also deletes users that have an expiration date that is not zero. Users
 26	with a close expiration date get a notification.
 27
 28	Also deletes expired notifications.
 29	"""
 30	type = "expire-datasets"
 31	max_workers = 1
 32
 33	ensure_job = {"remote_id": "localhost", "interval": 300}
 34
 35	def work(self):
 36		"""
 37		Delete datasets, users and notifications
 38		"""
 39
 40		self.expire_datasets()
 41		self.expire_users()
 42		self.expire_notifications()
 43
 44		self.job.finish()
 45
 46	def expire_datasets(self):
 47		"""
 48		Delete expired datasets
 49		"""
 50		# find candidates
 51		# todo: make this better - this can be a lot of datasets!
 52		datasets = self.db.fetchall("""
 53			SELECT key FROM datasets
 54			 WHERE parameters::json->>'keep' IS NULL
 55		""")
 56
 57		for dataset in datasets:
 58			if self.interrupted:
 59				raise WorkerInterruptedException("Interrupted while expiring datasets")
 60
 61			# the dataset creator's configuration context determines expiration
 62			try:
 63				dataset = DataSet(key=dataset["key"], db=self.db)
 64				wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator))
 65				if dataset.is_expired(config=wrapper):
 66					self.log.info(f"Deleting dataset {dataset.key} (expired)")
 67					dataset.delete()
 68
 69			except DataSetNotFoundException:
 70				# dataset already deleted I guess?
 71				pass
 72
 73	def expire_users(self):
 74		"""
 75		Delete expired users
 76
 77		Users can have a `delete-after` parameter in their user data which
 78		indicates a date or time after which the account should be deleted.
 79
 80		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 81		the current date is after the given date the account is deleted. If the
 82		expiration date is within 7 days a notification is added for the user
 83		to warn them.
 84		"""
 85		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 86		now = datetime.datetime.now()
 87
 88		for expiring_user in expiring_users:
 89			if self.interrupted:
 90				raise WorkerInterruptedException("Interrupted while expiring users")
 91
 92			user = User.get_by_name(self.db, expiring_user["name"], config=self.config)
 93			username = user.data["name"]
 94
 95			# parse expiration date if available
 96			delete_after = user.get_value("delete-after")
 97			if not delete_after:
 98				continue
 99
100			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
101				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
102			elif re.match(r"^[0-9]+$", str(delete_after)):
103				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
104			else:
105				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
106				continue
107
108			# check if expired...
109			if expires_at < now:
110				self.log.info(f"User {username} expired - deleting user and datasets")
111				user.delete()
112			else:
113				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
114				user.add_notification(warning_notification)
115
116	def expire_notifications(self):
117		"""
118		Delete expired notifications
119
120		Pretty simple!
121		"""
122		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
class ThingExpirer(backend.lib.worker.BasicWorker):
 17class ThingExpirer(BasicWorker):
 18	"""
 19	Delete old items
 20
 21	Deletes expired datasets. This may be useful for two reasons: to conserve
 22	disk space and if the user agreement of a particular data source does not
 23	allow storing scraped or extracted data for longer than a given amount of
 24	time, as is the case for e.g. Tumblr.
 25
 26	Also deletes users that have an expiration date that is not zero. Users
 27	with a close expiration date get a notification.
 28
 29	Also deletes expired notifications.
 30	"""
 31	type = "expire-datasets"
 32	max_workers = 1
 33
 34	ensure_job = {"remote_id": "localhost", "interval": 300}
 35
 36	def work(self):
 37		"""
 38		Delete datasets, users and notifications
 39		"""
 40
 41		self.expire_datasets()
 42		self.expire_users()
 43		self.expire_notifications()
 44
 45		self.job.finish()
 46
 47	def expire_datasets(self):
 48		"""
 49		Delete expired datasets
 50		"""
 51		# find candidates
 52		# todo: make this better - this can be a lot of datasets!
 53		datasets = self.db.fetchall("""
 54			SELECT key FROM datasets
 55			 WHERE parameters::json->>'keep' IS NULL
 56		""")
 57
 58		for dataset in datasets:
 59			if self.interrupted:
 60				raise WorkerInterruptedException("Interrupted while expiring datasets")
 61
 62			# the dataset creator's configuration context determines expiration
 63			try:
 64				dataset = DataSet(key=dataset["key"], db=self.db)
 65				wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator))
 66				if dataset.is_expired(config=wrapper):
 67					self.log.info(f"Deleting dataset {dataset.key} (expired)")
 68					dataset.delete()
 69
 70			except DataSetNotFoundException:
 71				# dataset already deleted I guess?
 72				pass
 73
 74	def expire_users(self):
 75		"""
 76		Delete expired users
 77
 78		Users can have a `delete-after` parameter in their user data which
 79		indicates a date or time after which the account should be deleted.
 80
 81		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 82		the current date is after the given date the account is deleted. If the
 83		expiration date is within 7 days a notification is added for the user
 84		to warn them.
 85		"""
 86		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 87		now = datetime.datetime.now()
 88
 89		for expiring_user in expiring_users:
 90			if self.interrupted:
 91				raise WorkerInterruptedException("Interrupted while expiring users")
 92
 93			user = User.get_by_name(self.db, expiring_user["name"], config=self.config)
 94			username = user.data["name"]
 95
 96			# parse expiration date if available
 97			delete_after = user.get_value("delete-after")
 98			if not delete_after:
 99				continue
100
101			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
102				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
103			elif re.match(r"^[0-9]+$", str(delete_after)):
104				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
105			else:
106				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
107				continue
108
109			# check if expired...
110			if expires_at < now:
111				self.log.info(f"User {username} expired - deleting user and datasets")
112				user.delete()
113			else:
114				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
115				user.add_notification(warning_notification)
116
117	def expire_notifications(self):
118		"""
119		Delete expired notifications
120
121		Pretty simple!
122		"""
123		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")

Delete old items

Deletes expired datasets. This may be useful for two reasons: to conserve disk space and if the user agreement of a particular data source does not allow storing scraped or extracted data for longer than a given amount of time, as is the case for e.g. Tumblr.

Also deletes users that have an expiration date that is not zero. Users with a close expiration date get a notification.

Also deletes expired notifications.

type = 'expire-datasets'
max_workers = 1
ensure_job = {'remote_id': 'localhost', 'interval': 300}
def work(self):
36	def work(self):
37		"""
38		Delete datasets, users and notifications
39		"""
40
41		self.expire_datasets()
42		self.expire_users()
43		self.expire_notifications()
44
45		self.job.finish()

Delete datasets, users and notifications

def expire_datasets(self):
47	def expire_datasets(self):
48		"""
49		Delete expired datasets
50		"""
51		# find candidates
52		# todo: make this better - this can be a lot of datasets!
53		datasets = self.db.fetchall("""
54			SELECT key FROM datasets
55			 WHERE parameters::json->>'keep' IS NULL
56		""")
57
58		for dataset in datasets:
59			if self.interrupted:
60				raise WorkerInterruptedException("Interrupted while expiring datasets")
61
62			# the dataset creator's configuration context determines expiration
63			try:
64				dataset = DataSet(key=dataset["key"], db=self.db)
65				wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator))
66				if dataset.is_expired(config=wrapper):
67					self.log.info(f"Deleting dataset {dataset.key} (expired)")
68					dataset.delete()
69
70			except DataSetNotFoundException:
71				# dataset already deleted I guess?
72				pass

Delete expired datasets

def expire_users(self):
 74	def expire_users(self):
 75		"""
 76		Delete expired users
 77
 78		Users can have a `delete-after` parameter in their user data which
 79		indicates a date or time after which the account should be deleted.
 80
 81		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 82		the current date is after the given date the account is deleted. If the
 83		expiration date is within 7 days a notification is added for the user
 84		to warn them.
 85		"""
 86		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 87		now = datetime.datetime.now()
 88
 89		for expiring_user in expiring_users:
 90			if self.interrupted:
 91				raise WorkerInterruptedException("Interrupted while expiring users")
 92
 93			user = User.get_by_name(self.db, expiring_user["name"], config=self.config)
 94			username = user.data["name"]
 95
 96			# parse expiration date if available
 97			delete_after = user.get_value("delete-after")
 98			if not delete_after:
 99				continue
100
101			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
102				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
103			elif re.match(r"^[0-9]+$", str(delete_after)):
104				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
105			else:
106				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
107				continue
108
109			# check if expired...
110			if expires_at < now:
111				self.log.info(f"User {username} expired - deleting user and datasets")
112				user.delete()
113			else:
114				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
115				user.add_notification(warning_notification)

Delete expired users

Users can have a delete-after parameter in their user data which indicates a date or time after which the account should be deleted.

The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If the current date is after the given date the account is deleted. If the expiration date is within 7 days a notification is added for the user to warn them.

def expire_notifications(self):
117	def expire_notifications(self):
118		"""
119		Delete expired notifications
120
121		Pretty simple!
122		"""
123		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")

Delete expired notifications

Pretty simple!