Edit on GitHub

backend.workers.manage_extension

Manage a 4CAT extension

  1"""
  2Manage a 4CAT extension
  3"""
  4import datetime
  5import subprocess
  6import requests
  7import logging
  8import zipfile
  9import shutil
 10import shlex
 11import json
 12import ural
 13import os
 14import re
 15
 16from logging.handlers import RotatingFileHandler
 17from pathlib import Path
 18
 19from backend.lib.worker import BasicWorker
 20
 21
 22class ExtensionManipulator(BasicWorker):
 23    """
 24    Manage 4CAT extensions
 25
 26    4CAT extensions are essentially git repositories. This worker can clone the
 27    relevant git repository or delete it and clean up after it.
 28
 29    This is done in a worker instead of in the front-end code because cloning
 30    a large git repository can take some time so it is best to do it
 31    asynchronously. This is also future-proof in that it is easy to add support
 32    for installation code etc here later.
 33
 34    Results are logged to a separate log file that can then be inspected in the
 35    web interface.
 36    """
 37    type = "manage-extension"
 38    max_workers = 1
 39
 40    def work(self):
 41        """
 42        Do something with extensions
 43        """
 44        extension_reference = self.job.data["remote_id"]
 45        task = self.job.details.get("task")
 46
 47        # this worker uses its own log file instead of the main 4CAT log
 48        # this is so that it is easier to monitor error messages about failed
 49        # installations etc and display those separately in e.g. the web
 50        # interface
 51
 52        log_file = self.config.get("PATH_LOGS").joinpath("extensions.log")
 53        logger = logging.getLogger(self.type)
 54        if not logger.handlers:
 55            handler = RotatingFileHandler(log_file, backupCount=1, maxBytes=50000)
 56            handler.level = logging.INFO
 57            handler.setFormatter(logging.Formatter("%(asctime)-15s | %(levelname)s: %(message)s",
 58                                                   "%d-%m-%Y %H:%M:%S"))
 59            logger.addHandler(handler)
 60        logger.level = logging.INFO
 61        self.extension_log = logger
 62
 63        if task == "install":
 64            success = self.install_extension(extension_reference)
 65            if success:
 66                # Add job to restart 4CAT; include upgrade to ensure migrate.py runs which will install any extension updates and new python packages
 67                lock_file = self.config.get("PATH_CONFIG").joinpath("restart.lock")
 68                if lock_file.exists():
 69                    # restart already in progress
 70                    self.extension_log.info("4CAT restart already in progress. Upgrade will be applied on next restart.")
 71                else:
 72                    self.extension_log.info("Adding job to restart 4CAT and apply extension upgrade.")
 73                     # ensure lockfile exists - will be written to later by worker
 74                    lock_file.touch()
 75                    # this log file is used to keep track of the progress, and will also
 76                    # be viewable in the web interface
 77                    restart_log_file = self.config.get("PATH_LOGS").joinpath("restart.log")
 78                    with restart_log_file.open("w") as outfile:
 79                        outfile.write(
 80                            f"Upgrade initiated at server timestamp {datetime.datetime.now().strftime('%c')}\n")
 81                        outfile.write("Telling 4CAT to upgrade via job queue...\n")
 82                    
 83                    # add job to restart 4CAT
 84                    self.queue.add_job(worker_or_type="restart-4cat", details={}, remote_id="upgrade")
 85                
 86        elif task == "uninstall":
 87            self.uninstall_extension(extension_reference)
 88
 89        self.job.finish()
 90
 91    def uninstall_extension(self, extension_name):
 92        """
 93        Remove extension
 94
 95        Currently as simple as deleting the folder, but could add further
 96        cleaning up code later.
 97
 98        While an extension can define configuration settings, we do not
 99        explicitly remove these here. 4CAT has general cleanup code for
100        unreferenced settings and it may be beneficial to keep them in case
101        the extension is re-installed later.
102
103        :param str extension_name:  ID of the extension (i.e. name of the
104        folder it is in)
105        """
106        self.extension_log.info(f"Uninstalling extension {extension_name}.")
107        extensions_root = self.config.get("PATH_EXTENSIONS")
108        target_folder = extensions_root.joinpath(extension_name)
109
110        if not target_folder.exists():
111            return self.extension_log.error(f"Extension {extension_name} does not exist - cannot remove it.")
112        
113        # Collect job types and uninstall function in extension workers
114        extension_jobtypes = []
115        for extension_worker in self.modules.workers.values():
116            if extension_worker.is_extension and extension_worker.extension_name == extension_name:
117                extension_jobtypes.append(extension_worker.type)
118                if hasattr(extension_worker, "uninstall"):
119                    self.extension_log.info(f"Running uninstall function for extension {extension_name}.")
120                    extension_worker.uninstall()
121        
122        # Remove existing jobs for this extension
123        for job in self.queue.get_all_jobs(restrict_claimable=False):
124            if job.data["jobtype"] in extension_jobtypes:
125                self.extension_log.info(f"Removing job {job.data['jobtype']} - {job.data['remote_id']}.")
126                job.finish(delete=True)     
127
128        try:
129            shutil.rmtree(target_folder)
130            self.extension_log.info(f"Finished uninstalling extension {extension_name}.")
131        except OSError as e:
132            self.extension_log.error(f"Could not uninstall extension {extension_name}. There may be an issue with "
133                                     f"file privileges, or the extension is installed via a symbolic link which 4CAT "
134                                     f"cannot manipulate. The system error message was: '{e}'")
135
136    def install_extension(self, repository_reference, overwrite=False):
137        """
138        Install a 4CAT extension
139
140        4CAT extensions can be installed from a git URL or a zip archive. In
141        either case, the files are first put into a temporary folder, after
142        which the manifest in that folder is read to complete installation.
143
144        :param str repository_reference:  Git repository URL, or zip archive
145        path.
146        :param bool overwrite:  Overwrite extension if one exists? Set to
147        `true` to upgrade existing extensions (for example)
148        :return bool:  `True` if installation was successful, `False` otherwise
149        """
150        if self.job.details.get("source") == "remote":
151            extension_folder, extension_name = self.clone_from_url(repository_reference)
152        else:
153            extension_folder, extension_name = self.unpack_from_zip(repository_reference)
154
155        if not extension_name:
156            self.extension_log.error("The 4CAT extension could not be installed.")
157            return False
158
159        # read manifest file
160        manifest_file = extension_folder.joinpath("metadata.json")
161        if not manifest_file.exists():
162            shutil.rmtree(extension_folder)
163            self.extension_log.error(f"Manifest file of newly cloned 4CAT extension {repository_reference} does "
164                                            f"not exist. Cannot install as a 4CAT extension.")
165            return False
166        else:
167            try:
168                with manifest_file.open() as infile:
169                    manifest_data = json.load(infile)
170            except json.JSONDecodeError:
171                shutil.rmtree(extension_folder)
172                self.extension_log.error(f"Manifest file of newly cloned 4CAT extension {repository_reference} "
173                                                f"could not be parsed. Cannot install as a 4CAT extension.")
174                return False
175
176        canonical_name = manifest_data.get("name", extension_name)
177        canonical_id = manifest_data.get("id", extension_name)
178
179        canonical_folder = extension_folder.with_name(canonical_id)
180        existing_name = canonical_id
181        existing_version = "unknown"
182
183        if canonical_folder.exists():
184            if canonical_folder.joinpath("metadata.json").exists():
185                with canonical_folder.joinpath("metadata.json").open() as infile:
186                    try:
187                        existing_manifest = json.load(infile)
188                        existing_name = existing_manifest.get("name", canonical_id)
189                        existing_version = existing_manifest.get("version", "unknown")
190                    except json.JSONDecodeError:
191                        pass
192
193            if overwrite:
194                self.extension_log.warning(f"Uninstalling existing 4CAT extension {existing_name} (version "
195                                           f"{existing_version}.")
196                shutil.rmtree(canonical_folder)
197            else:
198                self.extension_log.error(f"An extension with ID {canonical_id} is already installed "
199                                                f"({extension_name}, version {existing_version}). Cannot install "
200                                                f"another one with the same ID - uninstall it first.")
201                return False
202
203        extension_folder.rename(canonical_folder)
204        self.extension_log.info(f"Finished installing extension {canonical_name} (version {manifest_data.get('version', 'unknown')}) with ID "
205                                f"{canonical_id}.")
206        return True
207
208
209    def unpack_from_zip(self, archive_path):
210        """
211        Unpack extension files from a zip archive
212
213        Pretty straightforward - Make a temporary folder and extract the zip
214        archive's contents into it.
215
216        :param str archive_path: Path to the zip file to extract
217        :return tuple:  Tuple of folder and extension name, or `None, None` on
218        failure.
219        """
220        archive_path = Path(archive_path)
221        if not archive_path.exists():
222            return self.extension_log.error(f"Extension file does not exist at {archive_path} - cannot install."), None
223
224        extension_name = archive_path.stem
225        extensions_root = self.config.get("PATH_EXTENSIONS")
226        temp_name = self.get_temporary_folder(extensions_root)
227        try:
228            with zipfile.ZipFile(archive_path, "r") as archive_file:
229                archive_file.extractall(temp_name)
230        except Exception as e:
231            return self.extension_log.error(f"Could not extract extension zip archive {archive_path.name}: {e}. Cannot "
232                                            f"install."), None
233        finally:
234            archive_path.unlink()
235
236        return temp_name, extension_name
237
238
239    def clone_from_url(self, repository_url):
240        """
241        Clone the extension files from a git repository URL
242
243        :param str repository_url:  Git repository URL to clone extension from
244        :return tuple:  Tuple of folder and extension name, or `None, None` on
245        failure.
246        """
247        # we only know how to install extensions from URLs for now
248        if not ural.is_url(repository_url):
249            return self.extension_log.error(f"Cannot install 4CAT extension - invalid repository url: "
250                                            f"{repository_url}"), None
251
252        # normalize URL and extract name
253        repository_url = repository_url.strip().split("#")[-1]
254        if repository_url.endswith("/"):
255            repository_url = repository_url[:-1]
256        repository_url_name = re.sub(r"\.git$", "", repository_url.split("/")[-1].split("?")[0].lower())
257
258        try:
259            test_url = requests.head(repository_url)
260            if test_url.status_code >= 400:
261                return self.extension_log.error(
262                    f"Cannot install 4CAT extension - the repository URL is unreachable (status code "
263                    f"{test_url.status_code})"), None
264        except requests.RequestException as e:
265            return self.extension_log.error(
266                f"Cannot install 4CAT extension - the repository URL seems invalid or unreachable ({e})"), None
267
268        # ok, we have a valid URL that is reachable - try cloning from it
269        extensions_root = self.config.get("PATH_EXTENSIONS")
270        os.chdir(extensions_root)
271
272        temp_name = self.get_temporary_folder(extensions_root)
273
274        extension_folder = extensions_root.joinpath(temp_name)
275        clone_command = f"git clone {shlex.quote(repository_url)} {temp_name}"
276        clone_outcome = subprocess.run(shlex.split(clone_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
277
278        cloned_correctly = True
279        if clone_outcome.returncode != 0:
280            cloned_correctly = False
281            self.extension_log.info(clone_outcome.stdout.decode("utf-8"))
282            self.extension_log.error(f"Could not clone 4CAT extension repository from {repository_url}:\n\n{clone_outcome.stderr}")
283
284        if not cloned_correctly:
285            if extension_folder.exists():
286                shutil.rmtree(extension_folder)
287            return self.extension_log.error(f"4CAT extension {repository_url} was not installed."), None
288
289        return extension_folder, repository_url_name
290
291
292    def get_temporary_folder(self, extensions_root):
293        # clone into a temporary folder, which we will rename as needed
294        # this is because the repository name is not necessarily the extension
295        # name
296        temp_base = "new-extension"
297        temp_name = temp_base
298        temp_index = 0
299        while extensions_root.joinpath(temp_name).exists():
300            temp_index += 1
301            temp_name = f"{temp_base}-{temp_index}"
302
303        return extensions_root.joinpath(temp_name)
class ExtensionManipulator(backend.lib.worker.BasicWorker):
 23class ExtensionManipulator(BasicWorker):
 24    """
 25    Manage 4CAT extensions
 26
 27    4CAT extensions are essentially git repositories. This worker can clone the
 28    relevant git repository or delete it and clean up after it.
 29
 30    This is done in a worker instead of in the front-end code because cloning
 31    a large git repository can take some time so it is best to do it
 32    asynchronously. This is also future-proof in that it is easy to add support
 33    for installation code etc here later.
 34
 35    Results are logged to a separate log file that can then be inspected in the
 36    web interface.
 37    """
 38    type = "manage-extension"
 39    max_workers = 1
 40
 41    def work(self):
 42        """
 43        Do something with extensions
 44        """
 45        extension_reference = self.job.data["remote_id"]
 46        task = self.job.details.get("task")
 47
 48        # this worker uses its own log file instead of the main 4CAT log
 49        # this is so that it is easier to monitor error messages about failed
 50        # installations etc and display those separately in e.g. the web
 51        # interface
 52
 53        log_file = self.config.get("PATH_LOGS").joinpath("extensions.log")
 54        logger = logging.getLogger(self.type)
 55        if not logger.handlers:
 56            handler = RotatingFileHandler(log_file, backupCount=1, maxBytes=50000)
 57            handler.level = logging.INFO
 58            handler.setFormatter(logging.Formatter("%(asctime)-15s | %(levelname)s: %(message)s",
 59                                                   "%d-%m-%Y %H:%M:%S"))
 60            logger.addHandler(handler)
 61        logger.level = logging.INFO
 62        self.extension_log = logger
 63
 64        if task == "install":
 65            success = self.install_extension(extension_reference)
 66            if success:
 67                # Add job to restart 4CAT; include upgrade to ensure migrate.py runs which will install any extension updates and new python packages
 68                lock_file = self.config.get("PATH_CONFIG").joinpath("restart.lock")
 69                if lock_file.exists():
 70                    # restart already in progress
 71                    self.extension_log.info("4CAT restart already in progress. Upgrade will be applied on next restart.")
 72                else:
 73                    self.extension_log.info("Adding job to restart 4CAT and apply extension upgrade.")
 74                     # ensure lockfile exists - will be written to later by worker
 75                    lock_file.touch()
 76                    # this log file is used to keep track of the progress, and will also
 77                    # be viewable in the web interface
 78                    restart_log_file = self.config.get("PATH_LOGS").joinpath("restart.log")
 79                    with restart_log_file.open("w") as outfile:
 80                        outfile.write(
 81                            f"Upgrade initiated at server timestamp {datetime.datetime.now().strftime('%c')}\n")
 82                        outfile.write("Telling 4CAT to upgrade via job queue...\n")
 83                    
 84                    # add job to restart 4CAT
 85                    self.queue.add_job(worker_or_type="restart-4cat", details={}, remote_id="upgrade")
 86                
 87        elif task == "uninstall":
 88            self.uninstall_extension(extension_reference)
 89
 90        self.job.finish()
 91
 92    def uninstall_extension(self, extension_name):
 93        """
 94        Remove extension
 95
 96        Currently as simple as deleting the folder, but could add further
 97        cleaning up code later.
 98
 99        While an extension can define configuration settings, we do not
100        explicitly remove these here. 4CAT has general cleanup code for
101        unreferenced settings and it may be beneficial to keep them in case
102        the extension is re-installed later.
103
104        :param str extension_name:  ID of the extension (i.e. name of the
105        folder it is in)
106        """
107        self.extension_log.info(f"Uninstalling extension {extension_name}.")
108        extensions_root = self.config.get("PATH_EXTENSIONS")
109        target_folder = extensions_root.joinpath(extension_name)
110
111        if not target_folder.exists():
112            return self.extension_log.error(f"Extension {extension_name} does not exist - cannot remove it.")
113        
114        # Collect job types and uninstall function in extension workers
115        extension_jobtypes = []
116        for extension_worker in self.modules.workers.values():
117            if extension_worker.is_extension and extension_worker.extension_name == extension_name:
118                extension_jobtypes.append(extension_worker.type)
119                if hasattr(extension_worker, "uninstall"):
120                    self.extension_log.info(f"Running uninstall function for extension {extension_name}.")
121                    extension_worker.uninstall()
122        
123        # Remove existing jobs for this extension
124        for job in self.queue.get_all_jobs(restrict_claimable=False):
125            if job.data["jobtype"] in extension_jobtypes:
126                self.extension_log.info(f"Removing job {job.data['jobtype']} - {job.data['remote_id']}.")
127                job.finish(delete=True)     
128
129        try:
130            shutil.rmtree(target_folder)
131            self.extension_log.info(f"Finished uninstalling extension {extension_name}.")
132        except OSError as e:
133            self.extension_log.error(f"Could not uninstall extension {extension_name}. There may be an issue with "
134                                     f"file privileges, or the extension is installed via a symbolic link which 4CAT "
135                                     f"cannot manipulate. The system error message was: '{e}'")
136
137    def install_extension(self, repository_reference, overwrite=False):
138        """
139        Install a 4CAT extension
140
141        4CAT extensions can be installed from a git URL or a zip archive. In
142        either case, the files are first put into a temporary folder, after
143        which the manifest in that folder is read to complete installation.
144
145        :param str repository_reference:  Git repository URL, or zip archive
146        path.
147        :param bool overwrite:  Overwrite extension if one exists? Set to
148        `true` to upgrade existing extensions (for example)
149        :return bool:  `True` if installation was successful, `False` otherwise
150        """
151        if self.job.details.get("source") == "remote":
152            extension_folder, extension_name = self.clone_from_url(repository_reference)
153        else:
154            extension_folder, extension_name = self.unpack_from_zip(repository_reference)
155
156        if not extension_name:
157            self.extension_log.error("The 4CAT extension could not be installed.")
158            return False
159
160        # read manifest file
161        manifest_file = extension_folder.joinpath("metadata.json")
162        if not manifest_file.exists():
163            shutil.rmtree(extension_folder)
164            self.extension_log.error(f"Manifest file of newly cloned 4CAT extension {repository_reference} does "
165                                            f"not exist. Cannot install as a 4CAT extension.")
166            return False
167        else:
168            try:
169                with manifest_file.open() as infile:
170                    manifest_data = json.load(infile)
171            except json.JSONDecodeError:
172                shutil.rmtree(extension_folder)
173                self.extension_log.error(f"Manifest file of newly cloned 4CAT extension {repository_reference} "
174                                                f"could not be parsed. Cannot install as a 4CAT extension.")
175                return False
176
177        canonical_name = manifest_data.get("name", extension_name)
178        canonical_id = manifest_data.get("id", extension_name)
179
180        canonical_folder = extension_folder.with_name(canonical_id)
181        existing_name = canonical_id
182        existing_version = "unknown"
183
184        if canonical_folder.exists():
185            if canonical_folder.joinpath("metadata.json").exists():
186                with canonical_folder.joinpath("metadata.json").open() as infile:
187                    try:
188                        existing_manifest = json.load(infile)
189                        existing_name = existing_manifest.get("name", canonical_id)
190                        existing_version = existing_manifest.get("version", "unknown")
191                    except json.JSONDecodeError:
192                        pass
193
194            if overwrite:
195                self.extension_log.warning(f"Uninstalling existing 4CAT extension {existing_name} (version "
196                                           f"{existing_version}.")
197                shutil.rmtree(canonical_folder)
198            else:
199                self.extension_log.error(f"An extension with ID {canonical_id} is already installed "
200                                                f"({extension_name}, version {existing_version}). Cannot install "
201                                                f"another one with the same ID - uninstall it first.")
202                return False
203
204        extension_folder.rename(canonical_folder)
205        self.extension_log.info(f"Finished installing extension {canonical_name} (version {manifest_data.get('version', 'unknown')}) with ID "
206                                f"{canonical_id}.")
207        return True
208
209
210    def unpack_from_zip(self, archive_path):
211        """
212        Unpack extension files from a zip archive
213
214        Pretty straightforward - Make a temporary folder and extract the zip
215        archive's contents into it.
216
217        :param str archive_path: Path to the zip file to extract
218        :return tuple:  Tuple of folder and extension name, or `None, None` on
219        failure.
220        """
221        archive_path = Path(archive_path)
222        if not archive_path.exists():
223            return self.extension_log.error(f"Extension file does not exist at {archive_path} - cannot install."), None
224
225        extension_name = archive_path.stem
226        extensions_root = self.config.get("PATH_EXTENSIONS")
227        temp_name = self.get_temporary_folder(extensions_root)
228        try:
229            with zipfile.ZipFile(archive_path, "r") as archive_file:
230                archive_file.extractall(temp_name)
231        except Exception as e:
232            return self.extension_log.error(f"Could not extract extension zip archive {archive_path.name}: {e}. Cannot "
233                                            f"install."), None
234        finally:
235            archive_path.unlink()
236
237        return temp_name, extension_name
238
239
240    def clone_from_url(self, repository_url):
241        """
242        Clone the extension files from a git repository URL
243
244        :param str repository_url:  Git repository URL to clone extension from
245        :return tuple:  Tuple of folder and extension name, or `None, None` on
246        failure.
247        """
248        # we only know how to install extensions from URLs for now
249        if not ural.is_url(repository_url):
250            return self.extension_log.error(f"Cannot install 4CAT extension - invalid repository url: "
251                                            f"{repository_url}"), None
252
253        # normalize URL and extract name
254        repository_url = repository_url.strip().split("#")[-1]
255        if repository_url.endswith("/"):
256            repository_url = repository_url[:-1]
257        repository_url_name = re.sub(r"\.git$", "", repository_url.split("/")[-1].split("?")[0].lower())
258
259        try:
260            test_url = requests.head(repository_url)
261            if test_url.status_code >= 400:
262                return self.extension_log.error(
263                    f"Cannot install 4CAT extension - the repository URL is unreachable (status code "
264                    f"{test_url.status_code})"), None
265        except requests.RequestException as e:
266            return self.extension_log.error(
267                f"Cannot install 4CAT extension - the repository URL seems invalid or unreachable ({e})"), None
268
269        # ok, we have a valid URL that is reachable - try cloning from it
270        extensions_root = self.config.get("PATH_EXTENSIONS")
271        os.chdir(extensions_root)
272
273        temp_name = self.get_temporary_folder(extensions_root)
274
275        extension_folder = extensions_root.joinpath(temp_name)
276        clone_command = f"git clone {shlex.quote(repository_url)} {temp_name}"
277        clone_outcome = subprocess.run(shlex.split(clone_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
278
279        cloned_correctly = True
280        if clone_outcome.returncode != 0:
281            cloned_correctly = False
282            self.extension_log.info(clone_outcome.stdout.decode("utf-8"))
283            self.extension_log.error(f"Could not clone 4CAT extension repository from {repository_url}:\n\n{clone_outcome.stderr}")
284
285        if not cloned_correctly:
286            if extension_folder.exists():
287                shutil.rmtree(extension_folder)
288            return self.extension_log.error(f"4CAT extension {repository_url} was not installed."), None
289
290        return extension_folder, repository_url_name
291
292
293    def get_temporary_folder(self, extensions_root):
294        # clone into a temporary folder, which we will rename as needed
295        # this is because the repository name is not necessarily the extension
296        # name
297        temp_base = "new-extension"
298        temp_name = temp_base
299        temp_index = 0
300        while extensions_root.joinpath(temp_name).exists():
301            temp_index += 1
302            temp_name = f"{temp_base}-{temp_index}"
303
304        return extensions_root.joinpath(temp_name)

Manage 4CAT extensions

4CAT extensions are essentially git repositories. This worker can clone the relevant git repository or delete it and clean up after it.

This is done in a worker instead of in the front-end code because cloning a large git repository can take some time so it is best to do it asynchronously. This is also future-proof in that it is easy to add support for installation code etc here later.

Results are logged to a separate log file that can then be inspected in the web interface.

type = 'manage-extension'
max_workers = 1
def work(self):
41    def work(self):
42        """
43        Do something with extensions
44        """
45        extension_reference = self.job.data["remote_id"]
46        task = self.job.details.get("task")
47
48        # this worker uses its own log file instead of the main 4CAT log
49        # this is so that it is easier to monitor error messages about failed
50        # installations etc and display those separately in e.g. the web
51        # interface
52
53        log_file = self.config.get("PATH_LOGS").joinpath("extensions.log")
54        logger = logging.getLogger(self.type)
55        if not logger.handlers:
56            handler = RotatingFileHandler(log_file, backupCount=1, maxBytes=50000)
57            handler.level = logging.INFO
58            handler.setFormatter(logging.Formatter("%(asctime)-15s | %(levelname)s: %(message)s",
59                                                   "%d-%m-%Y %H:%M:%S"))
60            logger.addHandler(handler)
61        logger.level = logging.INFO
62        self.extension_log = logger
63
64        if task == "install":
65            success = self.install_extension(extension_reference)
66            if success:
67                # Add job to restart 4CAT; include upgrade to ensure migrate.py runs which will install any extension updates and new python packages
68                lock_file = self.config.get("PATH_CONFIG").joinpath("restart.lock")
69                if lock_file.exists():
70                    # restart already in progress
71                    self.extension_log.info("4CAT restart already in progress. Upgrade will be applied on next restart.")
72                else:
73                    self.extension_log.info("Adding job to restart 4CAT and apply extension upgrade.")
74                     # ensure lockfile exists - will be written to later by worker
75                    lock_file.touch()
76                    # this log file is used to keep track of the progress, and will also
77                    # be viewable in the web interface
78                    restart_log_file = self.config.get("PATH_LOGS").joinpath("restart.log")
79                    with restart_log_file.open("w") as outfile:
80                        outfile.write(
81                            f"Upgrade initiated at server timestamp {datetime.datetime.now().strftime('%c')}\n")
82                        outfile.write("Telling 4CAT to upgrade via job queue...\n")
83                    
84                    # add job to restart 4CAT
85                    self.queue.add_job(worker_or_type="restart-4cat", details={}, remote_id="upgrade")
86                
87        elif task == "uninstall":
88            self.uninstall_extension(extension_reference)
89
90        self.job.finish()

Do something with extensions

def uninstall_extension(self, extension_name):
 92    def uninstall_extension(self, extension_name):
 93        """
 94        Remove extension
 95
 96        Currently as simple as deleting the folder, but could add further
 97        cleaning up code later.
 98
 99        While an extension can define configuration settings, we do not
100        explicitly remove these here. 4CAT has general cleanup code for
101        unreferenced settings and it may be beneficial to keep them in case
102        the extension is re-installed later.
103
104        :param str extension_name:  ID of the extension (i.e. name of the
105        folder it is in)
106        """
107        self.extension_log.info(f"Uninstalling extension {extension_name}.")
108        extensions_root = self.config.get("PATH_EXTENSIONS")
109        target_folder = extensions_root.joinpath(extension_name)
110
111        if not target_folder.exists():
112            return self.extension_log.error(f"Extension {extension_name} does not exist - cannot remove it.")
113        
114        # Collect job types and uninstall function in extension workers
115        extension_jobtypes = []
116        for extension_worker in self.modules.workers.values():
117            if extension_worker.is_extension and extension_worker.extension_name == extension_name:
118                extension_jobtypes.append(extension_worker.type)
119                if hasattr(extension_worker, "uninstall"):
120                    self.extension_log.info(f"Running uninstall function for extension {extension_name}.")
121                    extension_worker.uninstall()
122        
123        # Remove existing jobs for this extension
124        for job in self.queue.get_all_jobs(restrict_claimable=False):
125            if job.data["jobtype"] in extension_jobtypes:
126                self.extension_log.info(f"Removing job {job.data['jobtype']} - {job.data['remote_id']}.")
127                job.finish(delete=True)     
128
129        try:
130            shutil.rmtree(target_folder)
131            self.extension_log.info(f"Finished uninstalling extension {extension_name}.")
132        except OSError as e:
133            self.extension_log.error(f"Could not uninstall extension {extension_name}. There may be an issue with "
134                                     f"file privileges, or the extension is installed via a symbolic link which 4CAT "
135                                     f"cannot manipulate. The system error message was: '{e}'")

Remove extension

Currently as simple as deleting the folder, but could add further cleaning up code later.

While an extension can define configuration settings, we do not explicitly remove these here. 4CAT has general cleanup code for unreferenced settings and it may be beneficial to keep them in case the extension is re-installed later.

Parameters
  • str extension_name: ID of the extension (i.e. name of the folder it is in)
def install_extension(self, repository_reference, overwrite=False):
137    def install_extension(self, repository_reference, overwrite=False):
138        """
139        Install a 4CAT extension
140
141        4CAT extensions can be installed from a git URL or a zip archive. In
142        either case, the files are first put into a temporary folder, after
143        which the manifest in that folder is read to complete installation.
144
145        :param str repository_reference:  Git repository URL, or zip archive
146        path.
147        :param bool overwrite:  Overwrite extension if one exists? Set to
148        `true` to upgrade existing extensions (for example)
149        :return bool:  `True` if installation was successful, `False` otherwise
150        """
151        if self.job.details.get("source") == "remote":
152            extension_folder, extension_name = self.clone_from_url(repository_reference)
153        else:
154            extension_folder, extension_name = self.unpack_from_zip(repository_reference)
155
156        if not extension_name:
157            self.extension_log.error("The 4CAT extension could not be installed.")
158            return False
159
160        # read manifest file
161        manifest_file = extension_folder.joinpath("metadata.json")
162        if not manifest_file.exists():
163            shutil.rmtree(extension_folder)
164            self.extension_log.error(f"Manifest file of newly cloned 4CAT extension {repository_reference} does "
165                                            f"not exist. Cannot install as a 4CAT extension.")
166            return False
167        else:
168            try:
169                with manifest_file.open() as infile:
170                    manifest_data = json.load(infile)
171            except json.JSONDecodeError:
172                shutil.rmtree(extension_folder)
173                self.extension_log.error(f"Manifest file of newly cloned 4CAT extension {repository_reference} "
174                                                f"could not be parsed. Cannot install as a 4CAT extension.")
175                return False
176
177        canonical_name = manifest_data.get("name", extension_name)
178        canonical_id = manifest_data.get("id", extension_name)
179
180        canonical_folder = extension_folder.with_name(canonical_id)
181        existing_name = canonical_id
182        existing_version = "unknown"
183
184        if canonical_folder.exists():
185            if canonical_folder.joinpath("metadata.json").exists():
186                with canonical_folder.joinpath("metadata.json").open() as infile:
187                    try:
188                        existing_manifest = json.load(infile)
189                        existing_name = existing_manifest.get("name", canonical_id)
190                        existing_version = existing_manifest.get("version", "unknown")
191                    except json.JSONDecodeError:
192                        pass
193
194            if overwrite:
195                self.extension_log.warning(f"Uninstalling existing 4CAT extension {existing_name} (version "
196                                           f"{existing_version}.")
197                shutil.rmtree(canonical_folder)
198            else:
199                self.extension_log.error(f"An extension with ID {canonical_id} is already installed "
200                                                f"({extension_name}, version {existing_version}). Cannot install "
201                                                f"another one with the same ID - uninstall it first.")
202                return False
203
204        extension_folder.rename(canonical_folder)
205        self.extension_log.info(f"Finished installing extension {canonical_name} (version {manifest_data.get('version', 'unknown')}) with ID "
206                                f"{canonical_id}.")
207        return True

Install a 4CAT extension

4CAT extensions can be installed from a git URL or a zip archive. In either case, the files are first put into a temporary folder, after which the manifest in that folder is read to complete installation.

Parameters
  • str repository_reference: Git repository URL, or zip archive path.
  • bool overwrite: Overwrite extension if one exists? Set to true to upgrade existing extensions (for example)
Returns

True if installation was successful, False otherwise

def unpack_from_zip(self, archive_path):
210    def unpack_from_zip(self, archive_path):
211        """
212        Unpack extension files from a zip archive
213
214        Pretty straightforward - Make a temporary folder and extract the zip
215        archive's contents into it.
216
217        :param str archive_path: Path to the zip file to extract
218        :return tuple:  Tuple of folder and extension name, or `None, None` on
219        failure.
220        """
221        archive_path = Path(archive_path)
222        if not archive_path.exists():
223            return self.extension_log.error(f"Extension file does not exist at {archive_path} - cannot install."), None
224
225        extension_name = archive_path.stem
226        extensions_root = self.config.get("PATH_EXTENSIONS")
227        temp_name = self.get_temporary_folder(extensions_root)
228        try:
229            with zipfile.ZipFile(archive_path, "r") as archive_file:
230                archive_file.extractall(temp_name)
231        except Exception as e:
232            return self.extension_log.error(f"Could not extract extension zip archive {archive_path.name}: {e}. Cannot "
233                                            f"install."), None
234        finally:
235            archive_path.unlink()
236
237        return temp_name, extension_name

Unpack extension files from a zip archive

Pretty straightforward - Make a temporary folder and extract the zip archive's contents into it.

Parameters
  • str archive_path: Path to the zip file to extract
Returns

Tuple of folder and extension name, or None, None on failure.

def clone_from_url(self, repository_url):
240    def clone_from_url(self, repository_url):
241        """
242        Clone the extension files from a git repository URL
243
244        :param str repository_url:  Git repository URL to clone extension from
245        :return tuple:  Tuple of folder and extension name, or `None, None` on
246        failure.
247        """
248        # we only know how to install extensions from URLs for now
249        if not ural.is_url(repository_url):
250            return self.extension_log.error(f"Cannot install 4CAT extension - invalid repository url: "
251                                            f"{repository_url}"), None
252
253        # normalize URL and extract name
254        repository_url = repository_url.strip().split("#")[-1]
255        if repository_url.endswith("/"):
256            repository_url = repository_url[:-1]
257        repository_url_name = re.sub(r"\.git$", "", repository_url.split("/")[-1].split("?")[0].lower())
258
259        try:
260            test_url = requests.head(repository_url)
261            if test_url.status_code >= 400:
262                return self.extension_log.error(
263                    f"Cannot install 4CAT extension - the repository URL is unreachable (status code "
264                    f"{test_url.status_code})"), None
265        except requests.RequestException as e:
266            return self.extension_log.error(
267                f"Cannot install 4CAT extension - the repository URL seems invalid or unreachable ({e})"), None
268
269        # ok, we have a valid URL that is reachable - try cloning from it
270        extensions_root = self.config.get("PATH_EXTENSIONS")
271        os.chdir(extensions_root)
272
273        temp_name = self.get_temporary_folder(extensions_root)
274
275        extension_folder = extensions_root.joinpath(temp_name)
276        clone_command = f"git clone {shlex.quote(repository_url)} {temp_name}"
277        clone_outcome = subprocess.run(shlex.split(clone_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
278
279        cloned_correctly = True
280        if clone_outcome.returncode != 0:
281            cloned_correctly = False
282            self.extension_log.info(clone_outcome.stdout.decode("utf-8"))
283            self.extension_log.error(f"Could not clone 4CAT extension repository from {repository_url}:\n\n{clone_outcome.stderr}")
284
285        if not cloned_correctly:
286            if extension_folder.exists():
287                shutil.rmtree(extension_folder)
288            return self.extension_log.error(f"4CAT extension {repository_url} was not installed."), None
289
290        return extension_folder, repository_url_name

Clone the extension files from a git repository URL

Parameters
  • str repository_url: Git repository URL to clone extension from
Returns

Tuple of folder and extension name, or None, None on failure.

def get_temporary_folder(self, extensions_root):
293    def get_temporary_folder(self, extensions_root):
294        # clone into a temporary folder, which we will rename as needed
295        # this is because the repository name is not necessarily the extension
296        # name
297        temp_base = "new-extension"
298        temp_name = temp_base
299        temp_index = 0
300        while extensions_root.joinpath(temp_name).exists():
301            temp_index += 1
302            temp_name = f"{temp_base}-{temp_index}"
303
304        return extensions_root.joinpath(temp_name)