Source code for tbot_contrib.locking

""" Machine Locking implementations for tbot """

import abc
import contextlib
import typing

import tbot
import tbot.tc.shell
from tbot.machine import linux, machine


class LockManagerBase(abc.ABC):
    """
    Defines the "interface" that each locking implementation needs to
    implement.
    """

    @abc.abstractmethod
    def request_machine_lock(
        self, name: str, *, expiry: typing.Optional[int] = None
    ) -> bool:
        """
        Request lock for machine named ``name``.

        This method will usually be called via the
        :py:class:`~tbot_contrib.locking.MachineLock` mixin.

        :param str name: Name of the lock to be acquired.
        :param int expiry: Optional timeout after which a lock should 'expire'.
            When a lock is expired, followup locking requests will treat it as
            unlocked.  This can be used as a safeguard if a testcase fails
            without unlocking.
        :returns: ``True`` if the lock has been acquired successfully and
            ``False`` otherwise.
        """
        raise tbot.error.AbstractMethodError()

    @abc.abstractmethod
    def release_machine_lock(self, name: str) -> None:
        """
        Release lock for machine named ``name``.
        """
        raise tbot.error.AbstractMethodError()

    @abc.abstractmethod
    def get_active_machine_locks(self) -> typing.Set[str]:
        """
        Return the active machine locks managed by this LockManager instance.
        """
        raise tbot.error.AbstractMethodError()


[docs]class MachineLock(machine.PreConnectInitializer): """ This is the initializer that is inherited by the board machine. It just calls the lab-host's locking implementation. .. versionadded:: 0.9.1 """ lock_expiry: typing.Optional[int] = None """ Timeout after which the lock should be considered expired. This provides a safeguard in case a testcase crashes without unlocking a lock - After the lock has expired, it will be considered unlocked again and a new testcase can acquire it. """ @property def lock_name(self) -> str: """ Prefix from which lock file name is derived. Defaults to the machine's name: ``self.name``. """ return self.name @contextlib.contextmanager def _init_pre_connect(self) -> typing.Iterator: with tbot.ctx.request(tbot.role.LabHost, reset_on_error=False) as labhost: if not isinstance(labhost, LockManagerBase): raise Exception("selected lab-host is not a lock manager") if not labhost.request_machine_lock( self.lock_name, expiry=self.lock_expiry ): raise Exception("could not acquire the lock") tbot.log.message(f"Acquired lock {self.lock_name}") yield None
class PooledMachineLock(machine.PreConnectInitializer): """ A 'replacement' for `MachineLock` which acquires a lock from a pool. .. versionadded:: 0.9.1 """ lock_expiry: typing.Optional[int] = None selected_machine: typing.Optional[str] @property @abc.abstractmethod def available_machines(self) -> typing.List[str]: """ Abstract property containing names of machines in the pool """ raise tbot.error.AbstractMethodError() @contextlib.contextmanager def _init_pre_connect(self) -> typing.Iterator: with tbot.ctx.request(tbot.role.LabHost, reset_on_error=False) as labhost: if not isinstance(labhost, LockManagerBase): raise Exception("selected lab-host is not a lock manager") self.selected_machine = None for name in labhost.get_active_machine_locks(): # iterate all locks if name in self.available_machines: # if lock is one of ours if labhost.request_machine_lock(name, expiry=self.lock_expiry): self.selected_machine = name break if self.selected_machine is None: for name in self.available_machines: # iterate through machines if labhost.request_machine_lock(name, expiry=self.lock_expiry): self.selected_machine = name break if self.selected_machine is None: raise Exception("Could not get free lock") tbot.log.message(f"Acquired lock for {self.selected_machine}") yield None @contextlib.contextmanager def flock_file_mutex(path: linux.Path, lock_fd: int) -> typing.Iterator[None]: """ A context for holding a flock lock while running mutual exclusive code """ host = path.host try: host.exec0("exec", linux.Raw(f"{lock_fd}>"), path) host.exec0("flock", str(lock_fd)) host.exec("chmod", "0666", path) yield None finally: host.exec0("flock", "-u", str(lock_fd)) host.exec0("exec", linux.Raw(f"{lock_fd}>&-"))
[docs]class LockManager(LockManagerBase, machine.PostShellInitializer, linux.LinuxShell): """ Machine locking implementation based on Python, bash and flock(1) .. versionadded:: 0.9.1 """ lock_checkpid: bool = True """ Make tbot check whether the PID associated with a lockfile is still alive. If this check is enabled and the PID is found, the lock will be considered active, even if it would otherwise have been assumed expired. """ lock_fd: int = 9 # Default file descriptor in shell for lock file _active_locks: typing.Set[str] # list of active locks @property def lock_dir(self) -> linux.Path: """ The directory where tbot locks are stored. Defaults to ``/tmp/tbot-locks``. If this directory does not exist, it will be created and given ``0777`` access mode to allow all users to write lockfiles to it. """ return self.fsroot / "tmp" / "tbot-locks" def _lock_try_acquire(self, name: str, expiry: typing.Optional[int]) -> bool: lockfile = self.lock_dir / name retval, result = self.exec("mktemp", "-p", self.lock_dir, "lock.XXX") if retval != 0: raise Exception("Could not create tempfile") tempfile = linux.Path(self, result.strip()) if (expiry is None) or (expiry < 1): # if lock does not expire time_str = "0" else: # else store the Unix time timestamp of expiry time_str = str(int(self.exec0("date", "+%s")) + expiry) shell_pid = self.env("$") if self.lock_checkpid else "0" self.exec0("chmod", "0666", tempfile) tempfile.write_text(f"{time_str} {shell_pid}\n") # Try linking lockfile, hard-linking is atomic lock_acquired = self.test("ln", tempfile, lockfile) self.exec0("rm", tempfile) return lock_acquired def _lock_handle_expiry(self, name: str) -> bool: lockfile = self.lock_dir / name locklock = self.lock_dir / "lock-lock" with flock_file_mutex(locklock, self.lock_fd): # Read the lock, perhaps it is stale... retval, result = self.exec("cat", lockfile) if retval != 0: return False retry = False results = result.split() # Check if the lock can expire, and whether it has expired. if (int(results[0]) != 0) and ( int(results[0]) < int(self.exec0("date", "+%s")) ): # Check whether the lock PID is still in use by a bash instance retval, result = self.exec("ps", "-ocomm=", results[1]) if (retval == 0) and (result.strip() == "bash"): tbot.log.message( f"Expired lock {lockfile} PID still appears valid!" ) else: self.release_machine_lock(name) # Delete stale Lock tbot.log.message(f"Lock {lockfile} expired, deleted.") retry = True # The stale lock may be available next time round. return retry
[docs] def request_machine_lock( self, name: str, *, expiry: typing.Optional[int] = None ) -> bool: if name in self._active_locks: return True if not self.lock_dir.is_dir(): self.exec0("mkdir", self.lock_dir) self.exec0("chmod", "0777", self.lock_dir) if not self._lock_try_acquire(name, expiry): if not self._lock_handle_expiry(name): return False # locked and not expired if not self._lock_try_acquire(name, expiry): # expired, can retry return False # no luck this time self._active_locks.add(name) return True
[docs] def release_machine_lock(self, name: str) -> None: """ Release lock for machine named ``name``. If not explicitly released, the :py:class:`~tbot_contrib.locking.LockManager` will automatically unlock all locks it is holding when the lab-host machine is deinitialized. """ lockfile = self.lock_dir / name self._active_locks.discard(name) self.exec("rm", lockfile)
[docs] def get_active_machine_locks(self) -> typing.Set[str]: return self._active_locks
@contextlib.contextmanager def _init_post_shell(self) -> typing.Iterator: self._active_locks = set() if not tbot.tc.shell.check_for_tool(self, "flock"): raise NotImplementedError() try: yield None finally: # Locks are released when the lab-host exits. for lock in list(self._active_locks): self.release_machine_lock(lock)