Edit on GitHub

backend.workers.expire_items

Delete old items

  1"""
  2Delete old items
  3"""
  4import datetime
  5import time
  6import json
  7import re
  8
  9from backend.lib.worker import BasicWorker
 10from common.lib.dataset import DataSet
 11from common.lib.exceptions import DataSetNotFoundException, WorkerInterruptedException
 12
 13from common.lib.user import User
 14
 15
 16class ThingExpirer(BasicWorker):
 17	"""
 18	Delete old items
 19
 20	Deletes expired datasets. This may be useful for two reasons: to conserve
 21	disk space and if the user agreement of a particular data source does not
 22	allow storing scraped or extracted data for longer than a given amount of
 23	time, as is the case for e.g. Tumblr.
 24
 25	Also deletes users that have an expiration date that is not zero. Users
 26	with a close expiration date get a notification.
 27
 28	Also deletes expired notifications.
 29	"""
 30	type = "expire-datasets"
 31	max_workers = 1
 32
 33	ensure_job = {"remote_id": "localhost", "interval": 300}
 34
 35	def work(self):
 36		"""
 37		Delete datasets, users and notifications
 38		"""
 39
 40		self.expire_datasets()
 41		self.expire_users()
 42		self.expire_notifications()
 43
 44		self.job.finish()
 45
 46	def expire_datasets(self):
 47		"""
 48		Delete expired datasets
 49		"""
 50		# find candidates
 51		# todo: make this better - this can be a lot of datasets!
 52		datasets = self.db.fetchall("""
 53			SELECT key FROM datasets
 54			 WHERE parameters::json->>'keep' IS NULL
 55		""")
 56
 57		for dataset in datasets:
 58			if self.interrupted:
 59				raise WorkerInterruptedException("Interrupted while expiring datasets")
 60
 61			try:
 62				dataset = DataSet(key=dataset["key"], db=self.db)
 63				if dataset.is_expired():
 64					self.log.info(f"Deleting dataset {dataset.key} (expired)")
 65					dataset.delete()
 66
 67			except DataSetNotFoundException:
 68				# dataset already deleted I guess?
 69				pass
 70
 71	def expire_users(self):
 72		"""
 73		Delete expired users
 74
 75		Users can have a `delete-after` parameter in their user data which
 76		indicates a date or time after which the account should be deleted.
 77
 78		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 79		the current date is after the given date the account is deleted. If the
 80		expiration date is within 7 days a notification is added for the user
 81		to warn them.
 82		"""
 83		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 84		now = datetime.datetime.now()
 85
 86		for expiring_user in expiring_users:
 87			if self.interrupted:
 88				raise WorkerInterruptedException("Interrupted while expiring users")
 89
 90			user = User.get_by_name(self.db, expiring_user["name"])
 91			username = user.data["name"]
 92
 93			# parse expiration date if available
 94			delete_after = user.get_value("delete-after")
 95			if not delete_after:
 96				continue
 97
 98			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
 99				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
100			elif re.match(r"^[0-9]+$", str(delete_after)):
101				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
102			else:
103				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
104				continue
105
106			# check if expired...
107			if expires_at < now:
108				self.log.info(f"User {username} expired - deleting user and datasets")
109				user.delete()
110			else:
111				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
112				user.add_notification(warning_notification)
113
114	def expire_notifications(self):
115		"""
116		Delete expired notifications
117
118		Pretty simple!
119		"""
120		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
class ThingExpirer(backend.lib.worker.BasicWorker):
 17class ThingExpirer(BasicWorker):
 18	"""
 19	Delete old items
 20
 21	Deletes expired datasets. This may be useful for two reasons: to conserve
 22	disk space and if the user agreement of a particular data source does not
 23	allow storing scraped or extracted data for longer than a given amount of
 24	time, as is the case for e.g. Tumblr.
 25
 26	Also deletes users that have an expiration date that is not zero. Users
 27	with a close expiration date get a notification.
 28
 29	Also deletes expired notifications.
 30	"""
 31	type = "expire-datasets"
 32	max_workers = 1
 33
 34	ensure_job = {"remote_id": "localhost", "interval": 300}
 35
 36	def work(self):
 37		"""
 38		Delete datasets, users and notifications
 39		"""
 40
 41		self.expire_datasets()
 42		self.expire_users()
 43		self.expire_notifications()
 44
 45		self.job.finish()
 46
 47	def expire_datasets(self):
 48		"""
 49		Delete expired datasets
 50		"""
 51		# find candidates
 52		# todo: make this better - this can be a lot of datasets!
 53		datasets = self.db.fetchall("""
 54			SELECT key FROM datasets
 55			 WHERE parameters::json->>'keep' IS NULL
 56		""")
 57
 58		for dataset in datasets:
 59			if self.interrupted:
 60				raise WorkerInterruptedException("Interrupted while expiring datasets")
 61
 62			try:
 63				dataset = DataSet(key=dataset["key"], db=self.db)
 64				if dataset.is_expired():
 65					self.log.info(f"Deleting dataset {dataset.key} (expired)")
 66					dataset.delete()
 67
 68			except DataSetNotFoundException:
 69				# dataset already deleted I guess?
 70				pass
 71
 72	def expire_users(self):
 73		"""
 74		Delete expired users
 75
 76		Users can have a `delete-after` parameter in their user data which
 77		indicates a date or time after which the account should be deleted.
 78
 79		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 80		the current date is after the given date the account is deleted. If the
 81		expiration date is within 7 days a notification is added for the user
 82		to warn them.
 83		"""
 84		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 85		now = datetime.datetime.now()
 86
 87		for expiring_user in expiring_users:
 88			if self.interrupted:
 89				raise WorkerInterruptedException("Interrupted while expiring users")
 90
 91			user = User.get_by_name(self.db, expiring_user["name"])
 92			username = user.data["name"]
 93
 94			# parse expiration date if available
 95			delete_after = user.get_value("delete-after")
 96			if not delete_after:
 97				continue
 98
 99			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
100				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
101			elif re.match(r"^[0-9]+$", str(delete_after)):
102				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
103			else:
104				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
105				continue
106
107			# check if expired...
108			if expires_at < now:
109				self.log.info(f"User {username} expired - deleting user and datasets")
110				user.delete()
111			else:
112				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
113				user.add_notification(warning_notification)
114
115	def expire_notifications(self):
116		"""
117		Delete expired notifications
118
119		Pretty simple!
120		"""
121		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")

Delete old items

Deletes expired datasets. This may be useful for two reasons: to conserve disk space and if the user agreement of a particular data source does not allow storing scraped or extracted data for longer than a given amount of time, as is the case for e.g. Tumblr.

Also deletes users that have an expiration date that is not zero. Users with a close expiration date get a notification.

Also deletes expired notifications.

type = 'expire-datasets'
max_workers = 1
ensure_job = {'remote_id': 'localhost', 'interval': 300}
def work(self):
36	def work(self):
37		"""
38		Delete datasets, users and notifications
39		"""
40
41		self.expire_datasets()
42		self.expire_users()
43		self.expire_notifications()
44
45		self.job.finish()

Delete datasets, users and notifications

def expire_datasets(self):
47	def expire_datasets(self):
48		"""
49		Delete expired datasets
50		"""
51		# find candidates
52		# todo: make this better - this can be a lot of datasets!
53		datasets = self.db.fetchall("""
54			SELECT key FROM datasets
55			 WHERE parameters::json->>'keep' IS NULL
56		""")
57
58		for dataset in datasets:
59			if self.interrupted:
60				raise WorkerInterruptedException("Interrupted while expiring datasets")
61
62			try:
63				dataset = DataSet(key=dataset["key"], db=self.db)
64				if dataset.is_expired():
65					self.log.info(f"Deleting dataset {dataset.key} (expired)")
66					dataset.delete()
67
68			except DataSetNotFoundException:
69				# dataset already deleted I guess?
70				pass

Delete expired datasets

def expire_users(self):
 72	def expire_users(self):
 73		"""
 74		Delete expired users
 75
 76		Users can have a `delete-after` parameter in their user data which
 77		indicates a date or time after which the account should be deleted.
 78
 79		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 80		the current date is after the given date the account is deleted. If the
 81		expiration date is within 7 days a notification is added for the user
 82		to warn them.
 83		"""
 84		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 85		now = datetime.datetime.now()
 86
 87		for expiring_user in expiring_users:
 88			if self.interrupted:
 89				raise WorkerInterruptedException("Interrupted while expiring users")
 90
 91			user = User.get_by_name(self.db, expiring_user["name"])
 92			username = user.data["name"]
 93
 94			# parse expiration date if available
 95			delete_after = user.get_value("delete-after")
 96			if not delete_after:
 97				continue
 98
 99			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
100				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
101			elif re.match(r"^[0-9]+$", str(delete_after)):
102				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
103			else:
104				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
105				continue
106
107			# check if expired...
108			if expires_at < now:
109				self.log.info(f"User {username} expired - deleting user and datasets")
110				user.delete()
111			else:
112				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
113				user.add_notification(warning_notification)

Delete expired users

Users can have a delete-after parameter in their user data which indicates a date or time after which the account should be deleted.

The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If the current date is after the given date the account is deleted. If the expiration date is within 7 days a notification is added for the user to warn them.

def expire_notifications(self):
115	def expire_notifications(self):
116		"""
117		Delete expired notifications
118
119		Pretty simple!
120		"""
121		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")

Delete expired notifications

Pretty simple!