Edit on GitHub

backend.workers.restart_4cat

Restart 4CAT and optionally upgrade it to the latest release

  1"""
  2Restart 4CAT and optionally upgrade it to the latest release
  3"""
  4import subprocess
  5import requests
  6import hashlib
  7import oslex
  8import json
  9import time
 10import uuid
 11import sys
 12
 13from backend.lib.worker import BasicWorker
 14from common.lib.exceptions import WorkerInterruptedException
 15
 16
 17class FourcatRestarterAndUpgrader(BasicWorker):
 18    """
 19    Restart 4CAT and optionally upgrade it to the latest release
 20
 21    Why implement this as a worker? Trying to have 4CAT restart itself leads
 22    to an interesting conundrum: it will not be able to report the outcome of
 23    the restart, because whatever bit of code is keeping track of that will be
 24    interrupted by restarting 4CAT.
 25
 26    Using a worker has the benefit of it restarting after 4CAT restarts, so it
 27    can then figure out that 4CAT was just restarted and report the outcome. It
 28    then uses a log file to keep track of the results. The log file can then be
 29    used by other parts of 4CAT to see if the restart was successful.
 30
 31    It does lead to another conundrum - what if due to some error, 4CAT never
 32    restarts? Then this worker will not be run again to report its own failure.
 33    There seem to be no clean ways around this, so anything watching the
 34    outcome of the worker probably needs to implement some timeout after which
 35    it is assumed that the restart/upgrade process failed catastrophically.
 36    """
 37    type = "restart-4cat"
 38    max_workers = 1
 39
 40    def work(self):
 41        """
 42        Restart 4CAT and optionally upgrade it to the latest release
 43        """
 44        # figure out if we're starting the restart or checking the result
 45        # after 4cat has been restarted
 46        is_resuming = self.job.data["attempts"] > 0
 47
 48        # prevent multiple restarts running at the same time which could blow
 49        # up really fast
 50        lock_file = self.config.get("PATH_CONFIG").joinpath("restart.lock")
 51
 52        # this file has the log of the restart worker itself and is checked by
 53        # the frontend to see how far we are
 54        log_file_restart = self.config.get("PATH_LOGS").joinpath("restart.log")
 55        log_stream_restart = log_file_restart.open("a")
 56
 57        if not is_resuming:
 58            log_stream_restart.write("Initiating 4CAT restart worker\n")
 59            self.log.info("New restart initiated.")
 60
 61            # this lock file will ensure that people don't start two
 62            # simultaneous upgrades or something
 63            with lock_file.open("w") as outfile:
 64                hasher = hashlib.blake2b()
 65                hasher.update(str(uuid.uuid4()).encode("utf-8"))
 66                outfile.write(hasher.hexdigest())
 67
 68            # trigger a restart and/or upgrade
 69            # returns a JSON with a 'status' key and a message, the message
 70            # being the process output
 71
 72            if self.job.data["remote_id"].startswith("upgrade"):
 73                command = sys.executable + " helper-scripts/migrate.py --repository %s --yes --restart" % \
 74                          (oslex.quote(self.config.get("4cat.github_url")))
 75                if self.job.details and self.job.details.get("branch"):
 76                    # migrate to code in specific branch
 77                    command += f" --branch {oslex.quote(self.job.details['branch'])}"
 78                else:
 79                    # migrate to latest release
 80                    command += " --release"
 81
 82            else:
 83                command = sys.executable + " 4cat-daemon.py --no-version-check force-restart"
 84
 85            try:
 86                # flush any writes before the other process starts writing to
 87                # the stream
 88                self.log.info(f"Running command {command}")
 89                log_stream_restart.flush()
 90
 91                # the tricky part is that this command will interrupt the
 92                # daemon, i.e. this worker!
 93                # so we'll never get to actually send a response, if all goes
 94                # well. but the file descriptor that stdout is piped to remains
 95                # open, somehow, so we can use that to keep track of the output
 96                # stdin needs to be /dev/null here because else when 4CAT
 97                # restarts and we re-attempt to make a daemon, it will fail
 98                # when trying to close the stdin file descriptor of the
 99                # subprocess (man, that was a fun bug to hunt down)
100                process = subprocess.Popen(oslex.split(command), cwd=str(self.config.get("PATH_ROOT")),
101                                           stdout=log_stream_restart, stderr=log_stream_restart,
102                                           stdin=subprocess.DEVNULL)
103
104                while not self.interrupted:
105                    # basically wait for either the process to quit or 4CAT to
106                    # be restarted (hopefully the latter)
107                    try:
108                        # now see if the process is finished - if not a
109                        # TimeoutExpired will be raised
110                        process.wait(1)
111                        break
112                    except subprocess.TimeoutExpired:
113                        pass
114
115                if process.returncode is not None:
116                    # if we reach this, 4CAT was never restarted, and so the job failed
117                    log_stream_restart.write(
118                        f"\nUnexpected outcome of restart call ({process.returncode})\n")
119
120                    raise RuntimeError()
121                else:
122                    # interrupted before the process could finish (as it should)
123                    self.log.info("Restart triggered. Restarting 4CAT.\n")
124                    raise WorkerInterruptedException()
125
126            except (RuntimeError, subprocess.CalledProcessError) as e:
127                log_stream_restart.write(str(e))
128                log_stream_restart.write(
129                    "[Worker] Error while restarting 4CAT. The script returned a non-standard error code "
130                    "(see above). You may need to restart 4CAT manually.\n")
131                self.log.error(f"Error restarting 4CAT. See {log_stream_restart.name} for details.")
132                lock_file.unlink()
133                self.job.finish()
134
135            finally:
136                log_stream_restart.close()
137
138        else:
139            # 4CAT back-end was restarted - now check the results and make the
140            # front-end restart or upgrade too
141            self.log.info("Restart worker resumed after restarting 4CAT, restart successful.")
142            log_stream_restart.write("4CAT restarted.\n")
143            with self.config.get("PATH_CONFIG").joinpath(".current-version").open() as infile:
144                log_stream_restart.write(f"4CAT is now running version {infile.readline().strip()}.\n")
145
146            # we're gonna use some specific Flask routes to trigger this, i.e.
147            # we're interacting with the front-end through HTTP
148            api_host = "https://" if self.config.get("flask.https") else "http://"
149            if self.config.get("USING_DOCKER"):
150                import os
151                docker_exposed_port = os.environ['PUBLIC_PORT']
152                api_host += f"host.docker.internal{':' + docker_exposed_port if docker_exposed_port != '80' else ''}"
153            else:
154                api_host += self.config.get("flask.server_name")
155
156            if self.job.data["remote_id"].startswith("upgrade") and self.config.get("USING_DOCKER"):
157                # when using Docker, the front-end needs to update separately
158                log_stream_restart.write("Telling front-end Docker container to upgrade...\n")
159                log_stream_restart.close()  # close, because front-end will be writing to it
160                upgrade_ok = False
161                upgrade_timeout = False
162                upgrade_error_message = False
163                try:
164                    upgrade_url = api_host + "/admin/trigger-frontend-upgrade/"
165                    with lock_file.open() as infile:
166                        frontend_upgrade = requests.post(upgrade_url, data={"token": infile.read()}, timeout=(10 * 60))
167                    upgrade_ok = frontend_upgrade.json()["status"] == "OK"
168                    upgrade_error_message = frontend_upgrade.json().get("message")
169                except requests.RequestException:
170                    pass
171                except TimeoutError:
172                    upgrade_timeout = True
173
174                log_stream_restart = log_file_restart.open("a")
175                if not upgrade_ok:
176                    if upgrade_timeout:
177                        log_stream_restart.write("Upgrade timed out.")
178                    log_stream_restart.write("Error upgrading front-end container. You may need to upgrade and restart "
179                                             "containers manually.\n")
180                    if upgrade_error_message:
181                        log_stream_restart.write(f"Error message: {upgrade_error_message}\n")
182                    self.log.error("Error upgrading front-end container. See %s for details." % log_stream_restart.name)
183                    lock_file.unlink()
184                    return self.job.finish()
185
186            # restart front-end
187            log_stream_restart.write("Asking front-end to restart itself...\n")
188            log_stream_restart.flush()
189            try:
190                restart_url = api_host + "/admin/trigger-frontend-restart/"
191                with lock_file.open() as infile:
192                    response = requests.post(restart_url, data={"token": infile.read()}, timeout=5).json()
193
194                if response.get("message"):
195                    log_stream_restart.write(response.get("message") + "\n")
196            except (json.JSONDecodeError, requests.RequestException):
197                # this may happen because the server restarts and interrupts
198                # the request
199                pass
200
201            # wait for front-end to come online after a restart
202            time.sleep(3)  # give some time for the restart to trigger
203            start_time = time.time()
204            frontend_ok = False
205            while time.time() < start_time + 60:
206                try:
207                    frontend = requests.get(api_host + "/", timeout=5)
208                    if frontend.status_code > 401:
209                        time.sleep(2)
210                        continue
211                    frontend_ok = True
212                    break
213                except requests.RequestException:
214                    time.sleep(1)
215                    continue
216
217            # too bad
218            if not frontend_ok:
219                log_stream_restart.write("Timed out waiting for front-end to restart. You may need to restart it "
220                                         "manually.\n")
221                self.log.error("Front-end did not come back online after restart")
222            else:
223                log_stream_restart.write("Front-end is available. Restart complete.")
224                self.log.info("Front-end is available. Restart complete.")
225
226            log_stream_restart.close()
227            lock_file.unlink()
228
229            self.job.finish()
class FourcatRestarterAndUpgrader(backend.lib.worker.BasicWorker):
 18class FourcatRestarterAndUpgrader(BasicWorker):
 19    """
 20    Restart 4CAT and optionally upgrade it to the latest release
 21
 22    Why implement this as a worker? Trying to have 4CAT restart itself leads
 23    to an interesting conundrum: it will not be able to report the outcome of
 24    the restart, because whatever bit of code is keeping track of that will be
 25    interrupted by restarting 4CAT.
 26
 27    Using a worker has the benefit of it restarting after 4CAT restarts, so it
 28    can then figure out that 4CAT was just restarted and report the outcome. It
 29    then uses a log file to keep track of the results. The log file can then be
 30    used by other parts of 4CAT to see if the restart was successful.
 31
 32    It does lead to another conundrum - what if due to some error, 4CAT never
 33    restarts? Then this worker will not be run again to report its own failure.
 34    There seem to be no clean ways around this, so anything watching the
 35    outcome of the worker probably needs to implement some timeout after which
 36    it is assumed that the restart/upgrade process failed catastrophically.
 37    """
 38    type = "restart-4cat"
 39    max_workers = 1
 40
 41    def work(self):
 42        """
 43        Restart 4CAT and optionally upgrade it to the latest release
 44        """
 45        # figure out if we're starting the restart or checking the result
 46        # after 4cat has been restarted
 47        is_resuming = self.job.data["attempts"] > 0
 48
 49        # prevent multiple restarts running at the same time which could blow
 50        # up really fast
 51        lock_file = self.config.get("PATH_CONFIG").joinpath("restart.lock")
 52
 53        # this file has the log of the restart worker itself and is checked by
 54        # the frontend to see how far we are
 55        log_file_restart = self.config.get("PATH_LOGS").joinpath("restart.log")
 56        log_stream_restart = log_file_restart.open("a")
 57
 58        if not is_resuming:
 59            log_stream_restart.write("Initiating 4CAT restart worker\n")
 60            self.log.info("New restart initiated.")
 61
 62            # this lock file will ensure that people don't start two
 63            # simultaneous upgrades or something
 64            with lock_file.open("w") as outfile:
 65                hasher = hashlib.blake2b()
 66                hasher.update(str(uuid.uuid4()).encode("utf-8"))
 67                outfile.write(hasher.hexdigest())
 68
 69            # trigger a restart and/or upgrade
 70            # returns a JSON with a 'status' key and a message, the message
 71            # being the process output
 72
 73            if self.job.data["remote_id"].startswith("upgrade"):
 74                command = sys.executable + " helper-scripts/migrate.py --repository %s --yes --restart" % \
 75                          (oslex.quote(self.config.get("4cat.github_url")))
 76                if self.job.details and self.job.details.get("branch"):
 77                    # migrate to code in specific branch
 78                    command += f" --branch {oslex.quote(self.job.details['branch'])}"
 79                else:
 80                    # migrate to latest release
 81                    command += " --release"
 82
 83            else:
 84                command = sys.executable + " 4cat-daemon.py --no-version-check force-restart"
 85
 86            try:
 87                # flush any writes before the other process starts writing to
 88                # the stream
 89                self.log.info(f"Running command {command}")
 90                log_stream_restart.flush()
 91
 92                # the tricky part is that this command will interrupt the
 93                # daemon, i.e. this worker!
 94                # so we'll never get to actually send a response, if all goes
 95                # well. but the file descriptor that stdout is piped to remains
 96                # open, somehow, so we can use that to keep track of the output
 97                # stdin needs to be /dev/null here because else when 4CAT
 98                # restarts and we re-attempt to make a daemon, it will fail
 99                # when trying to close the stdin file descriptor of the
100                # subprocess (man, that was a fun bug to hunt down)
101                process = subprocess.Popen(oslex.split(command), cwd=str(self.config.get("PATH_ROOT")),
102                                           stdout=log_stream_restart, stderr=log_stream_restart,
103                                           stdin=subprocess.DEVNULL)
104
105                while not self.interrupted:
106                    # basically wait for either the process to quit or 4CAT to
107                    # be restarted (hopefully the latter)
108                    try:
109                        # now see if the process is finished - if not a
110                        # TimeoutExpired will be raised
111                        process.wait(1)
112                        break
113                    except subprocess.TimeoutExpired:
114                        pass
115
116                if process.returncode is not None:
117                    # if we reach this, 4CAT was never restarted, and so the job failed
118                    log_stream_restart.write(
119                        f"\nUnexpected outcome of restart call ({process.returncode})\n")
120
121                    raise RuntimeError()
122                else:
123                    # interrupted before the process could finish (as it should)
124                    self.log.info("Restart triggered. Restarting 4CAT.\n")
125                    raise WorkerInterruptedException()
126
127            except (RuntimeError, subprocess.CalledProcessError) as e:
128                log_stream_restart.write(str(e))
129                log_stream_restart.write(
130                    "[Worker] Error while restarting 4CAT. The script returned a non-standard error code "
131                    "(see above). You may need to restart 4CAT manually.\n")
132                self.log.error(f"Error restarting 4CAT. See {log_stream_restart.name} for details.")
133                lock_file.unlink()
134                self.job.finish()
135
136            finally:
137                log_stream_restart.close()
138
139        else:
140            # 4CAT back-end was restarted - now check the results and make the
141            # front-end restart or upgrade too
142            self.log.info("Restart worker resumed after restarting 4CAT, restart successful.")
143            log_stream_restart.write("4CAT restarted.\n")
144            with self.config.get("PATH_CONFIG").joinpath(".current-version").open() as infile:
145                log_stream_restart.write(f"4CAT is now running version {infile.readline().strip()}.\n")
146
147            # we're gonna use some specific Flask routes to trigger this, i.e.
148            # we're interacting with the front-end through HTTP
149            api_host = "https://" if self.config.get("flask.https") else "http://"
150            if self.config.get("USING_DOCKER"):
151                import os
152                docker_exposed_port = os.environ['PUBLIC_PORT']
153                api_host += f"host.docker.internal{':' + docker_exposed_port if docker_exposed_port != '80' else ''}"
154            else:
155                api_host += self.config.get("flask.server_name")
156
157            if self.job.data["remote_id"].startswith("upgrade") and self.config.get("USING_DOCKER"):
158                # when using Docker, the front-end needs to update separately
159                log_stream_restart.write("Telling front-end Docker container to upgrade...\n")
160                log_stream_restart.close()  # close, because front-end will be writing to it
161                upgrade_ok = False
162                upgrade_timeout = False
163                upgrade_error_message = False
164                try:
165                    upgrade_url = api_host + "/admin/trigger-frontend-upgrade/"
166                    with lock_file.open() as infile:
167                        frontend_upgrade = requests.post(upgrade_url, data={"token": infile.read()}, timeout=(10 * 60))
168                    upgrade_ok = frontend_upgrade.json()["status"] == "OK"
169                    upgrade_error_message = frontend_upgrade.json().get("message")
170                except requests.RequestException:
171                    pass
172                except TimeoutError:
173                    upgrade_timeout = True
174
175                log_stream_restart = log_file_restart.open("a")
176                if not upgrade_ok:
177                    if upgrade_timeout:
178                        log_stream_restart.write("Upgrade timed out.")
179                    log_stream_restart.write("Error upgrading front-end container. You may need to upgrade and restart "
180                                             "containers manually.\n")
181                    if upgrade_error_message:
182                        log_stream_restart.write(f"Error message: {upgrade_error_message}\n")
183                    self.log.error("Error upgrading front-end container. See %s for details." % log_stream_restart.name)
184                    lock_file.unlink()
185                    return self.job.finish()
186
187            # restart front-end
188            log_stream_restart.write("Asking front-end to restart itself...\n")
189            log_stream_restart.flush()
190            try:
191                restart_url = api_host + "/admin/trigger-frontend-restart/"
192                with lock_file.open() as infile:
193                    response = requests.post(restart_url, data={"token": infile.read()}, timeout=5).json()
194
195                if response.get("message"):
196                    log_stream_restart.write(response.get("message") + "\n")
197            except (json.JSONDecodeError, requests.RequestException):
198                # this may happen because the server restarts and interrupts
199                # the request
200                pass
201
202            # wait for front-end to come online after a restart
203            time.sleep(3)  # give some time for the restart to trigger
204            start_time = time.time()
205            frontend_ok = False
206            while time.time() < start_time + 60:
207                try:
208                    frontend = requests.get(api_host + "/", timeout=5)
209                    if frontend.status_code > 401:
210                        time.sleep(2)
211                        continue
212                    frontend_ok = True
213                    break
214                except requests.RequestException:
215                    time.sleep(1)
216                    continue
217
218            # too bad
219            if not frontend_ok:
220                log_stream_restart.write("Timed out waiting for front-end to restart. You may need to restart it "
221                                         "manually.\n")
222                self.log.error("Front-end did not come back online after restart")
223            else:
224                log_stream_restart.write("Front-end is available. Restart complete.")
225                self.log.info("Front-end is available. Restart complete.")
226
227            log_stream_restart.close()
228            lock_file.unlink()
229
230            self.job.finish()

Restart 4CAT and optionally upgrade it to the latest release

Why implement this as a worker? Trying to have 4CAT restart itself leads to an interesting conundrum: it will not be able to report the outcome of the restart, because whatever bit of code is keeping track of that will be interrupted by restarting 4CAT.

Using a worker has the benefit of it restarting after 4CAT restarts, so it can then figure out that 4CAT was just restarted and report the outcome. It then uses a log file to keep track of the results. The log file can then be used by other parts of 4CAT to see if the restart was successful.

It does lead to another conundrum - what if due to some error, 4CAT never restarts? Then this worker will not be run again to report its own failure. There seem to be no clean ways around this, so anything watching the outcome of the worker probably needs to implement some timeout after which it is assumed that the restart/upgrade process failed catastrophically.

type = 'restart-4cat'
max_workers = 1
def work(self):
 41    def work(self):
 42        """
 43        Restart 4CAT and optionally upgrade it to the latest release
 44        """
 45        # figure out if we're starting the restart or checking the result
 46        # after 4cat has been restarted
 47        is_resuming = self.job.data["attempts"] > 0
 48
 49        # prevent multiple restarts running at the same time which could blow
 50        # up really fast
 51        lock_file = self.config.get("PATH_CONFIG").joinpath("restart.lock")
 52
 53        # this file has the log of the restart worker itself and is checked by
 54        # the frontend to see how far we are
 55        log_file_restart = self.config.get("PATH_LOGS").joinpath("restart.log")
 56        log_stream_restart = log_file_restart.open("a")
 57
 58        if not is_resuming:
 59            log_stream_restart.write("Initiating 4CAT restart worker\n")
 60            self.log.info("New restart initiated.")
 61
 62            # this lock file will ensure that people don't start two
 63            # simultaneous upgrades or something
 64            with lock_file.open("w") as outfile:
 65                hasher = hashlib.blake2b()
 66                hasher.update(str(uuid.uuid4()).encode("utf-8"))
 67                outfile.write(hasher.hexdigest())
 68
 69            # trigger a restart and/or upgrade
 70            # returns a JSON with a 'status' key and a message, the message
 71            # being the process output
 72
 73            if self.job.data["remote_id"].startswith("upgrade"):
 74                command = sys.executable + " helper-scripts/migrate.py --repository %s --yes --restart" % \
 75                          (oslex.quote(self.config.get("4cat.github_url")))
 76                if self.job.details and self.job.details.get("branch"):
 77                    # migrate to code in specific branch
 78                    command += f" --branch {oslex.quote(self.job.details['branch'])}"
 79                else:
 80                    # migrate to latest release
 81                    command += " --release"
 82
 83            else:
 84                command = sys.executable + " 4cat-daemon.py --no-version-check force-restart"
 85
 86            try:
 87                # flush any writes before the other process starts writing to
 88                # the stream
 89                self.log.info(f"Running command {command}")
 90                log_stream_restart.flush()
 91
 92                # the tricky part is that this command will interrupt the
 93                # daemon, i.e. this worker!
 94                # so we'll never get to actually send a response, if all goes
 95                # well. but the file descriptor that stdout is piped to remains
 96                # open, somehow, so we can use that to keep track of the output
 97                # stdin needs to be /dev/null here because else when 4CAT
 98                # restarts and we re-attempt to make a daemon, it will fail
 99                # when trying to close the stdin file descriptor of the
100                # subprocess (man, that was a fun bug to hunt down)
101                process = subprocess.Popen(oslex.split(command), cwd=str(self.config.get("PATH_ROOT")),
102                                           stdout=log_stream_restart, stderr=log_stream_restart,
103                                           stdin=subprocess.DEVNULL)
104
105                while not self.interrupted:
106                    # basically wait for either the process to quit or 4CAT to
107                    # be restarted (hopefully the latter)
108                    try:
109                        # now see if the process is finished - if not a
110                        # TimeoutExpired will be raised
111                        process.wait(1)
112                        break
113                    except subprocess.TimeoutExpired:
114                        pass
115
116                if process.returncode is not None:
117                    # if we reach this, 4CAT was never restarted, and so the job failed
118                    log_stream_restart.write(
119                        f"\nUnexpected outcome of restart call ({process.returncode})\n")
120
121                    raise RuntimeError()
122                else:
123                    # interrupted before the process could finish (as it should)
124                    self.log.info("Restart triggered. Restarting 4CAT.\n")
125                    raise WorkerInterruptedException()
126
127            except (RuntimeError, subprocess.CalledProcessError) as e:
128                log_stream_restart.write(str(e))
129                log_stream_restart.write(
130                    "[Worker] Error while restarting 4CAT. The script returned a non-standard error code "
131                    "(see above). You may need to restart 4CAT manually.\n")
132                self.log.error(f"Error restarting 4CAT. See {log_stream_restart.name} for details.")
133                lock_file.unlink()
134                self.job.finish()
135
136            finally:
137                log_stream_restart.close()
138
139        else:
140            # 4CAT back-end was restarted - now check the results and make the
141            # front-end restart or upgrade too
142            self.log.info("Restart worker resumed after restarting 4CAT, restart successful.")
143            log_stream_restart.write("4CAT restarted.\n")
144            with self.config.get("PATH_CONFIG").joinpath(".current-version").open() as infile:
145                log_stream_restart.write(f"4CAT is now running version {infile.readline().strip()}.\n")
146
147            # we're gonna use some specific Flask routes to trigger this, i.e.
148            # we're interacting with the front-end through HTTP
149            api_host = "https://" if self.config.get("flask.https") else "http://"
150            if self.config.get("USING_DOCKER"):
151                import os
152                docker_exposed_port = os.environ['PUBLIC_PORT']
153                api_host += f"host.docker.internal{':' + docker_exposed_port if docker_exposed_port != '80' else ''}"
154            else:
155                api_host += self.config.get("flask.server_name")
156
157            if self.job.data["remote_id"].startswith("upgrade") and self.config.get("USING_DOCKER"):
158                # when using Docker, the front-end needs to update separately
159                log_stream_restart.write("Telling front-end Docker container to upgrade...\n")
160                log_stream_restart.close()  # close, because front-end will be writing to it
161                upgrade_ok = False
162                upgrade_timeout = False
163                upgrade_error_message = False
164                try:
165                    upgrade_url = api_host + "/admin/trigger-frontend-upgrade/"
166                    with lock_file.open() as infile:
167                        frontend_upgrade = requests.post(upgrade_url, data={"token": infile.read()}, timeout=(10 * 60))
168                    upgrade_ok = frontend_upgrade.json()["status"] == "OK"
169                    upgrade_error_message = frontend_upgrade.json().get("message")
170                except requests.RequestException:
171                    pass
172                except TimeoutError:
173                    upgrade_timeout = True
174
175                log_stream_restart = log_file_restart.open("a")
176                if not upgrade_ok:
177                    if upgrade_timeout:
178                        log_stream_restart.write("Upgrade timed out.")
179                    log_stream_restart.write("Error upgrading front-end container. You may need to upgrade and restart "
180                                             "containers manually.\n")
181                    if upgrade_error_message:
182                        log_stream_restart.write(f"Error message: {upgrade_error_message}\n")
183                    self.log.error("Error upgrading front-end container. See %s for details." % log_stream_restart.name)
184                    lock_file.unlink()
185                    return self.job.finish()
186
187            # restart front-end
188            log_stream_restart.write("Asking front-end to restart itself...\n")
189            log_stream_restart.flush()
190            try:
191                restart_url = api_host + "/admin/trigger-frontend-restart/"
192                with lock_file.open() as infile:
193                    response = requests.post(restart_url, data={"token": infile.read()}, timeout=5).json()
194
195                if response.get("message"):
196                    log_stream_restart.write(response.get("message") + "\n")
197            except (json.JSONDecodeError, requests.RequestException):
198                # this may happen because the server restarts and interrupts
199                # the request
200                pass
201
202            # wait for front-end to come online after a restart
203            time.sleep(3)  # give some time for the restart to trigger
204            start_time = time.time()
205            frontend_ok = False
206            while time.time() < start_time + 60:
207                try:
208                    frontend = requests.get(api_host + "/", timeout=5)
209                    if frontend.status_code > 401:
210                        time.sleep(2)
211                        continue
212                    frontend_ok = True
213                    break
214                except requests.RequestException:
215                    time.sleep(1)
216                    continue
217
218            # too bad
219            if not frontend_ok:
220                log_stream_restart.write("Timed out waiting for front-end to restart. You may need to restart it "
221                                         "manually.\n")
222                self.log.error("Front-end did not come back online after restart")
223            else:
224                log_stream_restart.write("Front-end is available. Restart complete.")
225                self.log.info("Front-end is available. Restart complete.")
226
227            log_stream_restart.close()
228            lock_file.unlink()
229
230            self.job.finish()

Restart 4CAT and optionally upgrade it to the latest release