Edit on GitHub

backend.workers.expire_items

Delete old items

  1"""
  2Delete old items
  3"""
  4import datetime
  5import time
  6import re
  7
  8from backend.lib.worker import BasicWorker
  9from common.lib.dataset import DataSet
 10from common.lib.exceptions import DataSetNotFoundException, WorkerInterruptedException
 11
 12from common.lib.user import User
 13from common.config_manager import ConfigWrapper
 14
 15
 16class ThingExpirer(BasicWorker):
 17	"""
 18	Delete old items
 19
 20	Deletes expired datasets. This may be useful for two reasons: to conserve
 21	disk space and if the user agreement of a particular data source does not
 22	allow storing scraped or extracted data for longer than a given amount of
 23	time, as is the case for e.g. Tumblr.
 24
 25	Also deletes users that have an expiration date that is not zero. Users
 26	with a close expiration date get a notification.
 27
 28	Also deletes expired notifications.
 29	"""
 30	type = "expire-datasets"
 31	max_workers = 1
 32
 33	@classmethod
 34	def ensure_job(cls, config=None):
 35		"""
 36		Ensure that the expirer is always running
 37
 38		This is used to ensure that the expirer is always running, and if it is
 39		not, it will be started by the WorkerManager.
 40
 41		:return:  Job parameters for the worker
 42		"""
 43		return {"remote_id": "localhost", "interval": 300}
 44
 45	def work(self):
 46		"""
 47		Delete datasets, users and notifications
 48		"""
 49
 50		self.expire_datasets()
 51		self.expire_users()
 52		self.expire_notifications()
 53
 54		self.job.finish()
 55
 56	def expire_datasets(self):
 57		"""
 58		Delete expired datasets
 59		"""
 60		# find candidates
 61		# todo: make this better - this can be a lot of datasets!
 62		datasets = self.db.fetchall("""
 63			SELECT key FROM datasets
 64			 WHERE parameters::json->>'keep' IS NULL
 65		""")
 66
 67		for dataset in datasets:
 68			if self.interrupted:
 69				raise WorkerInterruptedException("Interrupted while expiring datasets")
 70
 71			# the dataset creator's configuration context determines expiration
 72			try:
 73				dataset = DataSet(key=dataset["key"], db=self.db, modules=self.modules)
 74				wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator))
 75				if dataset.is_expired(config=wrapper):
 76					self.log.info(f"Deleting dataset {dataset.key} (expired)")
 77					dataset.delete()
 78
 79			except DataSetNotFoundException:
 80				# dataset already deleted I guess?
 81				pass
 82
 83	def expire_users(self):
 84		"""
 85		Delete expired users
 86
 87		Users can have a `delete-after` parameter in their user data which
 88		indicates a date or time after which the account should be deleted.
 89
 90		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 91		the current date is after the given date the account is deleted. If the
 92		expiration date is within 7 days a notification is added for the user
 93		to warn them.
 94		"""
 95		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 96		now = datetime.datetime.now()
 97
 98		for expiring_user in expiring_users:
 99			if self.interrupted:
100				raise WorkerInterruptedException("Interrupted while expiring users")
101
102			user = User.get_by_name(self.db, expiring_user["name"], config=self.config)
103			username = user.data["name"]
104
105			# parse expiration date if available
106			delete_after = user.get_value("delete-after")
107			if not delete_after:
108				continue
109
110			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
111				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
112			elif re.match(r"^[0-9]+$", str(delete_after)):
113				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
114			else:
115				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
116				continue
117
118			# check if expired...
119			if expires_at < now:
120				self.log.info(f"User {username} expired - deleting user and datasets")
121				user.delete(modules=self.modules)
122			else:
123				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
124				user.add_notification(warning_notification)
125
126	def expire_notifications(self):
127		"""
128		Delete expired notifications
129
130		Pretty simple!
131		"""
132		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")
class ThingExpirer(backend.lib.worker.BasicWorker):
 17class ThingExpirer(BasicWorker):
 18	"""
 19	Delete old items
 20
 21	Deletes expired datasets. This may be useful for two reasons: to conserve
 22	disk space and if the user agreement of a particular data source does not
 23	allow storing scraped or extracted data for longer than a given amount of
 24	time, as is the case for e.g. Tumblr.
 25
 26	Also deletes users that have an expiration date that is not zero. Users
 27	with a close expiration date get a notification.
 28
 29	Also deletes expired notifications.
 30	"""
 31	type = "expire-datasets"
 32	max_workers = 1
 33
 34	@classmethod
 35	def ensure_job(cls, config=None):
 36		"""
 37		Ensure that the expirer is always running
 38
 39		This is used to ensure that the expirer is always running, and if it is
 40		not, it will be started by the WorkerManager.
 41
 42		:return:  Job parameters for the worker
 43		"""
 44		return {"remote_id": "localhost", "interval": 300}
 45
 46	def work(self):
 47		"""
 48		Delete datasets, users and notifications
 49		"""
 50
 51		self.expire_datasets()
 52		self.expire_users()
 53		self.expire_notifications()
 54
 55		self.job.finish()
 56
 57	def expire_datasets(self):
 58		"""
 59		Delete expired datasets
 60		"""
 61		# find candidates
 62		# todo: make this better - this can be a lot of datasets!
 63		datasets = self.db.fetchall("""
 64			SELECT key FROM datasets
 65			 WHERE parameters::json->>'keep' IS NULL
 66		""")
 67
 68		for dataset in datasets:
 69			if self.interrupted:
 70				raise WorkerInterruptedException("Interrupted while expiring datasets")
 71
 72			# the dataset creator's configuration context determines expiration
 73			try:
 74				dataset = DataSet(key=dataset["key"], db=self.db, modules=self.modules)
 75				wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator))
 76				if dataset.is_expired(config=wrapper):
 77					self.log.info(f"Deleting dataset {dataset.key} (expired)")
 78					dataset.delete()
 79
 80			except DataSetNotFoundException:
 81				# dataset already deleted I guess?
 82				pass
 83
 84	def expire_users(self):
 85		"""
 86		Delete expired users
 87
 88		Users can have a `delete-after` parameter in their user data which
 89		indicates a date or time after which the account should be deleted.
 90
 91		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 92		the current date is after the given date the account is deleted. If the
 93		expiration date is within 7 days a notification is added for the user
 94		to warn them.
 95		"""
 96		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 97		now = datetime.datetime.now()
 98
 99		for expiring_user in expiring_users:
100			if self.interrupted:
101				raise WorkerInterruptedException("Interrupted while expiring users")
102
103			user = User.get_by_name(self.db, expiring_user["name"], config=self.config)
104			username = user.data["name"]
105
106			# parse expiration date if available
107			delete_after = user.get_value("delete-after")
108			if not delete_after:
109				continue
110
111			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
112				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
113			elif re.match(r"^[0-9]+$", str(delete_after)):
114				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
115			else:
116				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
117				continue
118
119			# check if expired...
120			if expires_at < now:
121				self.log.info(f"User {username} expired - deleting user and datasets")
122				user.delete(modules=self.modules)
123			else:
124				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
125				user.add_notification(warning_notification)
126
127	def expire_notifications(self):
128		"""
129		Delete expired notifications
130
131		Pretty simple!
132		"""
133		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")

Delete old items

Deletes expired datasets. This may be useful for two reasons: to conserve disk space and if the user agreement of a particular data source does not allow storing scraped or extracted data for longer than a given amount of time, as is the case for e.g. Tumblr.

Also deletes users that have an expiration date that is not zero. Users with a close expiration date get a notification.

Also deletes expired notifications.

type = 'expire-datasets'
max_workers = 1
@classmethod
def ensure_job(cls, config=None):
34	@classmethod
35	def ensure_job(cls, config=None):
36		"""
37		Ensure that the expirer is always running
38
39		This is used to ensure that the expirer is always running, and if it is
40		not, it will be started by the WorkerManager.
41
42		:return:  Job parameters for the worker
43		"""
44		return {"remote_id": "localhost", "interval": 300}

Ensure that the expirer is always running

This is used to ensure that the expirer is always running, and if it is not, it will be started by the WorkerManager.

Returns

Job parameters for the worker

def work(self):
46	def work(self):
47		"""
48		Delete datasets, users and notifications
49		"""
50
51		self.expire_datasets()
52		self.expire_users()
53		self.expire_notifications()
54
55		self.job.finish()

Delete datasets, users and notifications

def expire_datasets(self):
57	def expire_datasets(self):
58		"""
59		Delete expired datasets
60		"""
61		# find candidates
62		# todo: make this better - this can be a lot of datasets!
63		datasets = self.db.fetchall("""
64			SELECT key FROM datasets
65			 WHERE parameters::json->>'keep' IS NULL
66		""")
67
68		for dataset in datasets:
69			if self.interrupted:
70				raise WorkerInterruptedException("Interrupted while expiring datasets")
71
72			# the dataset creator's configuration context determines expiration
73			try:
74				dataset = DataSet(key=dataset["key"], db=self.db, modules=self.modules)
75				wrapper = ConfigWrapper(self.config, user=User.get_by_name(self.db, dataset.creator))
76				if dataset.is_expired(config=wrapper):
77					self.log.info(f"Deleting dataset {dataset.key} (expired)")
78					dataset.delete()
79
80			except DataSetNotFoundException:
81				# dataset already deleted I guess?
82				pass

Delete expired datasets

def expire_users(self):
 84	def expire_users(self):
 85		"""
 86		Delete expired users
 87
 88		Users can have a `delete-after` parameter in their user data which
 89		indicates a date or time after which the account should be deleted.
 90
 91		The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If
 92		the current date is after the given date the account is deleted. If the
 93		expiration date is within 7 days a notification is added for the user
 94		to warn them.
 95		"""
 96		expiring_users = self.db.fetchall("SELECT * FROM users WHERE userdata::json->>'delete-after' IS NOT NULL;")
 97		now = datetime.datetime.now()
 98
 99		for expiring_user in expiring_users:
100			if self.interrupted:
101				raise WorkerInterruptedException("Interrupted while expiring users")
102
103			user = User.get_by_name(self.db, expiring_user["name"], config=self.config)
104			username = user.data["name"]
105
106			# parse expiration date if available
107			delete_after = user.get_value("delete-after")
108			if not delete_after:
109				continue
110
111			if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$", str(delete_after)):
112				expires_at = datetime.datetime.strptime(delete_after, "%Y-%m-%d")
113			elif re.match(r"^[0-9]+$", str(delete_after)):
114				expires_at = datetime.datetime.fromtimestamp(int(delete_after))
115			else:
116				self.log.warning(f"User {username} has invalid expiration date {delete_after}")
117				continue
118
119			# check if expired...
120			if expires_at < now:
121				self.log.info(f"User {username} expired - deleting user and datasets")
122				user.delete(modules=self.modules)
123			else:
124				warning_notification = f"WARNING: This account will be deleted at <time datetime=\"{expires_at.strftime('%C')}\">{expires_at.strftime('%-d %B %Y %H:%M')}</time>. Make sure to back up your data before then."
125				user.add_notification(warning_notification)

Delete expired users

Users can have a delete-after parameter in their user data which indicates a date or time after which the account should be deleted.

The date can be in YYYY-MM-DD format or a unix (UTC) timestamp. If the current date is after the given date the account is deleted. If the expiration date is within 7 days a notification is added for the user to warn them.

def expire_notifications(self):
127	def expire_notifications(self):
128		"""
129		Delete expired notifications
130
131		Pretty simple!
132		"""
133		self.db.execute(f"DELETE FROM users_notifications WHERE timestamp_expires IS NOT NULL AND timestamp_expires < {time.time()}")

Delete expired notifications

Pretty simple!