pyhooks Python package

`pyhooks`

A Python library that lets Vivaria agents interact with Vivaria. pyhooks also contains other code shared between METR agents.

`Actions`

Functions that agents can use to implement actions, e.g. running bash and Python commands.

Source code in pyhooks/pyhooks/__init__.py

class Actions:
    """
    Functions that agents can use to implement actions, e.g. running bash and Python commands.
    """

    def __init__(self, envs: CommonEnvs | None = None):
        self.envs = envs or CommonEnvs.from_env()

    async def run_bash(self, script: str, timeout: float) -> str:
        await self.check_safety(script)
        return await run_bash(script, timeout)

    async def run_python(self, script: str, timeout: float) -> str:
        return await run_python(script, timeout)

    async def check_safety(self, action: str):
        safety_policy_notice = (
            await trpc_server_request(
                "mutation",
                "checkActionSafety",
                {
                    "runId": self.envs.run_id,
                    "agentBranchNumber": self.envs.branch,
                    "action": action,
                },
                envs=self.envs,
            )
        )["notice"]

        if safety_policy_notice:
            raise ActionViolatesSafetyPolicyException(safety_policy_notice)

`Hooks`

Bases: BaseModel

Source code in pyhooks/pyhooks/__init__.py

class Hooks(BaseModel):
    class Config:
        arbitrary_types_allowed = True

    def __init__(
        self,
        task_id: str | None = None,
        envs: CommonEnvs | None = None,
    ):
        super().__init__()
        self._task_id = task_id or env.TASK_ID
        self._envs = envs or CommonEnvs.from_env()

    @property
    def task_id(self) -> str:
        if not self._task_id:
            raise Exception("TASK_ID not set")
        return self._task_id

    def _send_background_request(
        self,
        reqtype: str,
        route: str,
        data: dict,
        session: aiohttp.ClientSession | None = None,
    ):
        try:
            # Try to get the currently running event loop
            loop = asyncio.get_running_loop()
            # If successful, create a task in the running loop
            return loop.create_task(
                self._send_trpc_server_request(reqtype, route, data, session)
            )
        except RuntimeError:
            # No event loop is running, so we create a new one and run the task
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)

            async def coro():
                return await self._send_trpc_server_request(
                    reqtype,
                    route,
                    data,
                    session,
                )

            task = loop.run_until_complete(coro())
            loop.close()
            return task

    async def _send_trpc_server_request(
        self,
        reqtype: str,
        route: str,
        data: dict,
        session: aiohttp.ClientSession | None = None,
        record_pause_on_error: bool = True,
    ) -> Any:
        return await trpc_server_request(
            reqtype,
            route,
            data,
            session=session,
            record_pause_on_error=record_pause_on_error,
            envs=self._envs,
        )

    def main(self, main_function: Callable):
        async def error_handler_wrapper():
            try:
                import pdb_attach

                pdb_attach.listen(50000)
            except Exception as e:
                print("Failed to start pdb attach", repr(e))
            nonlocal main_function
            exit_code = 0
            try:
                await main_function(self)
            except SystemExit as e:
                if e.code is not None:
                    exit_code = e.code
            except Exception as e:
                if env.TESTING:
                    print("fatal error:", e, file=sys.stderr)
                exit_code = 1
                await self._send_trpc_server_request(
                    "mutation",
                    "logFatalError",
                    self.make_trace_entry(
                        {
                            "detail": str(e),
                            "from": "agent",
                            "trace": traceback.format_exc(),
                            "extra": None,
                        }
                    ),
                )
            finally:
                current_task = asyncio.current_task()
                all_tasks = [x for x in asyncio.all_tasks() if x is not current_task]
                all_tasks = await asyncio.gather(*all_tasks)
                return exit_code

        exit_code = asyncio.run(error_handler_wrapper())
        exit(exit_code)

    def make_trace_entry(self, x: dict[str, Any]) -> dict[str, Any]:
        result = self._new_base_event() | {"content": x}
        return result

    # Don't wait for log, action, observation, frameStart, or frameEnd. Instead, run them in the background

    def log(self, *content: Any):
        return self.log_with_attributes(None, *content)

    def log_with_attributes(self, attributes: dict | None, *content: Any):
        entry = self.make_trace_entry({"content": content, "attributes": attributes})
        return self._send_background_request("mutation", "log", entry)

    def log_image(self, image_url: str, description: str | None = None):
        entry = self.make_trace_entry(
            {"content": [{"image_url": image_url, "description": description}]}
        )
        return self._send_background_request("mutation", "log", entry)

    def action(self, action: dict):
        entry = self.make_trace_entry({"action": action})
        return self._send_background_request("mutation", "action", entry)

    def observation(self, observation: dict):
        entry = self.make_trace_entry({"observation": observation})
        return self._send_background_request("mutation", "observation", entry)

    async def log_error(self, detail: Any, extra: Any = None):
        # don't cause another error just because error failed (would be messy)
        entry = self.make_trace_entry(
            {
                "detail": str(detail),
                "from": "agent",
                "trace": "".join(traceback.format_stack()[:-2]),
                "extra": extra,
            }
        )
        await self._send_trpc_server_request("mutation", "logError", entry)

    def start_frame(self, name: str):
        req = self.make_trace_entry({"name": name})
        return self._send_background_request("mutation", "frameStart", req)

    def end_frame(self):
        req = self.make_trace_entry({})
        return self._send_background_request("mutation", "frameEnd", req)

    def save_state(self, state: Any):
        req = self.make_trace_entry({"state": state})
        return self._send_background_request("mutation", "saveState", req)

    def frame(self, name: str):
        def decorator(func):
            @functools.wraps(func)
            async def wrapper(*args, **kwargs):
                self.start_frame(name)
                result = await func(*args, **kwargs)
                self.end_frame()
                return result

            return wrapper

        return decorator

    # do wait for submit, generate
    async def getTask(self) -> TaskInfo:
        res = await self._send_trpc_server_request(
            "query",
            "getTaskInstructions",
            {
                "taskId": self.task_id,
                "runId": self._envs.run_id,
                "agentBranchNumber": self._envs.branch,
            },
        )
        return TaskInfo(**res)

    async def submit(self, submission: str):
        if not isinstance(submission, str):
            raise TypeError(f"submission must be a string, got {type(submission)}")

        async with aiohttp.ClientSession(
            # No timeout because scoring the submission can take a long time
            timeout=aiohttp.ClientTimeout(),
        ) as session:
            await self._send_trpc_server_request(
                "mutation",
                "submit",
                self.make_trace_entry({"value": submission}),
                session=session,
            )

        exit(0)

    async def score(self) -> ScoreResult:
        async with aiohttp.ClientSession(
            # No timeout because scoring the task environment can take a long time
            timeout=aiohttp.ClientTimeout(),
        ) as session:
            res = await self._send_trpc_server_request(
                "mutation",
                "score",
                {"runId": self._envs.run_id, "agentBranchNumber": self._envs.branch},
                session=session,
            )
            return ScoreResult(**res)

    async def scoreLog(self) -> list[ScoreLogEntry]:
        async with aiohttp.ClientSession(
            # No timeout because scoring the task environment can take a long time
            timeout=aiohttp.ClientTimeout(),
        ) as session:
            res = await self._send_trpc_server_request(
                "query",
                "getScoreLog",
                {"runId": self._envs.run_id, "agentBranchNumber": self._envs.branch},
                session=session,
            )
            return [ScoreLogEntry(**x) for x in res]

    async def generate(
        self,
        settings: MiddlemanSettings,
        template: str | None = None,
        templateValues: dict[str, Any] | None = None,
        prompt: str | None = None,
        messages: list[OpenaiChatMessage] | None = None,
        description: Optional[str] = None,
        functions: Optional[Any] = None,
        extraParameters: dict[str, Any] | None = None,
        session: aiohttp.ClientSession | None = None,
    ) -> MiddlemanResult:
        gen_request = GenerationRequest(
            settings=settings,
            template=template,
            templateValues=templateValues,
            messages=messages,
            description=description,
            functions=functions,
            prompt=prompt,
            extraParameters=extraParameters,
        )
        req = self._new_base_event() | {"genRequest": gen_request.model_dump()}
        return MiddlemanResult(
            **(
                await self._send_trpc_server_request(
                    "mutation",
                    "generate",
                    req,
                    session=session,
                )
            )
        )

    async def generate_with_anthropic_prompt_caching(
        self,
        settings: MiddlemanSettings,
        messages: list[OpenaiChatMessage],
        add_cache_control: bool = True,
        **kwargs,
    ) -> list[MiddlemanResult]:
        """
        Generates multiple completions for a single prompt by first submitting a generation request
        with `n=1`, to write the prompt to Anthropic's prompt cache, then submitting more requests
        until `settings.n` completions have been generated. Loops because `generate` may return fewer
        generations than requested for Anthropic models. That's because Anthropic doesn't support `n>1`
        natively, so Middleman makes `n` parallel API requests to get `n` completions. Some or all of
        these requests may fail due to rate limits or other errors.

        If `add_cache_control` is True and the last message of the prompt has a `content` field that is a list,
        this method will automatically add a `cache_control` key to the last element of the content list.
        This way, Anthropic will cache the entire prompt.
        """
        if settings.n <= 1:
            return [await self.generate(settings=settings, messages=messages, **kwargs)]

        messages = [message.model_copy() for message in messages]
        if not isinstance(messages[-1].content, str) and add_cache_control:
            messages[-1].content[-1]["cache_control"] = {"type": "ephemeral"}

        results: list[MiddlemanResult] = []

        first_request_settings = settings.model_copy(update={"n": 1})
        results.append(
            await self.generate(
                settings=first_request_settings, messages=messages, **kwargs
            )
        )

        while True:
            completions_so_far = sum(
                len(r.outputs) if r.outputs else 0 for r in results
            )
            if completions_so_far >= settings.n:
                break

            next_request_settings = settings.model_copy(
                update={"n": settings.n - completions_so_far}
            )
            results.append(
                await self.generate(
                    settings=next_request_settings, messages=messages, **kwargs
                )
            )

        return results

    async def count_prompt_tokens(
        self,
        settings: MiddlemanSettings,
        messages: list[OpenaiChatMessage],
        functions: Optional[Any] = None,
        extraParameters: dict[str, Any] | None = None,
    ) -> int:
        """Returns the number of prompt tokens that a generation request will use."""
        genReq = GenerationRequest(
            settings=settings,
            messages=messages,
            functions=functions,
            extraParameters=extraParameters,
        )
        req = {"genRequest": genReq.model_dump()}
        res = await self._send_trpc_server_request("mutation", "countPromptTokens", req)
        return res["tokens"]

    async def burn_tokens(
        self,
        n_prompt_tokens: int,
        n_completion_tokens: int,
        n_serial_action_tokens: int | None = None,
    ):
        req = self._new_base_event() | {
            "n_prompt_tokens": n_prompt_tokens,
            "n_completion_tokens": n_completion_tokens,
            "n_serial_action_tokens": n_serial_action_tokens,
        }
        await self._send_trpc_server_request(
            "mutation",
            "burnTokens",
            req,
        )

    async def generate_one(
        self,
        settings: MiddlemanSettings,
        template: str | None = None,
        templateValues: dict[str, Any] | None = None,
        prompt: str | None = None,
        messages: list[OpenaiChatMessage] | None = None,
        description: Optional[str] = None,
        extraParameters: dict[str, Any] | None = None,
    ) -> str:
        if settings.n != 1:
            raise Exception(
                "in generate_one, n must be 1. use generate for n>1 and full middleman output"
            )
        result = await self.generate(
            settings=settings,
            template=template,
            templateValues=templateValues,
            messages=messages,
            description=description,
            prompt=prompt,
            extraParameters=extraParameters,
        )
        if result.error is not None or result.outputs is None:
            raise Exception("Generation error", result.error)
        return result.outputs[0].completion

    async def generate_many(
        self,
        settings: MiddlemanSettings,
        template: str | None = None,
        templateValues: dict[str, Any] | None = None,
        prompt: str | None = None,
        messages: list[OpenaiChatMessage] | None = None,
        description: Optional[str] = None,
        extraParameters: dict[str, Any] | None = None,
    ) -> list[str]:
        result = await self.generate(
            settings=settings,
            template=template,
            templateValues=templateValues,
            messages=messages,
            description=description,
            prompt=prompt,
            extraParameters=extraParameters,
        )
        if result.error is not None or result.outputs is None:
            raise Exception("Generation error", result.error)
        return [x.completion for x in result.outputs]

    async def rate_options(
        self,
        rating_model: str,
        rating_template: str,
        transcript: str,
        options: list[RatingOption],
        description: Optional[str] = None,
    ) -> RatedOption:
        trace_entry = self.make_trace_entry(
            {
                "options": [x.dict() for x in options],
                "description": description,
                "ratingModel": (rating_model),
                "ratingTemplate": rating_template,
                "transcript": transcript,
            }
        )
        chosen_option = await self._send_trpc_server_request(
            "mutation",
            "rateOptions",
            trace_entry,
        )
        entry_key = {
            "runId": trace_entry["runId"],
            "index": trace_entry["index"],
            "agentBranchNumber": trace_entry["agentBranchNumber"],
        }
        while chosen_option is None:
            print("Waiting for human interaction")
            chosen_option = await self._send_trpc_server_request(
                "query",
                "retrieveRatings",
                entry_key,
            )
        return RatedOption(**chosen_option)

    async def embed(self, req):
        return await self._send_trpc_server_request("mutation", "embeddings", req)

    def get_tokenizer(self, tokenizer_name: str = "cl100k_base"):
        try:
            return tiktoken.get_encoding(tokenizer_name)
        except Exception:
            return tiktoken.get_encoding("cl100k_base")

    async def get_input(self, description: str, default_input: str) -> str:
        "get input from user or use default if not in intervention mode"
        trace_entry = self.make_trace_entry(
            {
                "description": description,
                "defaultInput": default_input,
            }
        )
        entry_key = {
            "runId": trace_entry["runId"],
            "index": trace_entry["index"],
            "agentBranchNumber": trace_entry["agentBranchNumber"],
        }
        await self._send_trpc_server_request("mutation", "requestInput", trace_entry)
        input = await self._send_trpc_server_request(
            "query", "retrieveInput", entry_key
        )
        while input is None:
            print("Waiting for human interaction")
            input = await self._send_trpc_server_request(
                "query", "retrieveInput", entry_key
            )
            if input is None:
                await asyncio.sleep(10)
        return input

    def token_lengths(
        self, texts: list[str], tokenizer_or_model_name: str = "cl100k_base"
    ) -> list[int]:
        if "gpt-4" in tokenizer_or_model_name or "turbo" in tokenizer_or_model_name:
            tokenizer_or_model_name = "cl100k_base"
        try:
            tokenizer = self.get_tokenizer(tokenizer_or_model_name)
        except Exception as e:
            print("can't find tokenizer", tokenizer_or_model_name, repr(e))
            tokenizer = self.get_tokenizer("cl100k_base")
        return [len(x) for x in tokenizer.encode_batch(texts, disallowed_special=())]

    def token_length(self, text, tokenizer_or_model_name: str = "cl100k_base") -> int:
        return self.token_lengths([text], tokenizer_or_model_name)[0]

    def oai_message_token_lengths(self, messages: list[OpenaiChatMessage]) -> list[int]:
        return [
            x + 3
            for x in self.token_lengths(
                [
                    # TODO Handle the case where x.content is a list[dict], as it can be for
                    # gpt-4-vision-preview: https://platform.openai.com/docs/guides/vision/quick-start
                    (x.content if isinstance(x.content, str) else "")
                    + (json.dumps(x.function_call) if x.function_call else "")
                    + (x.name if x.name else "")
                    for x in messages
                ],
                "cl100k_base",
            )
        ]

    async def get_permitted_models_info(self) -> dict[str, ModelInfo]:
        global permitted_models_cache
        if permitted_models_cache:
            return permitted_models_cache
        res = await self._send_trpc_server_request(
            "query",
            "getPermittedModelsInfo",
            {},
        )
        permitted_models_info = {mi["name"]: ModelInfo(**mi) for mi in res}
        permitted_models_cache = permitted_models_info
        return permitted_models_info

    # Deprecated; use Actions#run_bash instead
    async def run_bash(self, script, timeout) -> str:
        await Actions().check_safety(script)
        return await run_bash(script, timeout)

    # Deprecated; use Actions#run_python instead
    async def run_python(self, script, timeout) -> str:
        return await run_python(script, timeout)

    def deduplicate_options(self, options: list[RatingOption]) -> list[RatingOption]:
        return deduplicate_options(options)

    async def update_agent_command_result(
        self,
        stdout_to_append: str,
        stderr_to_append: str,
        exit_status: int | None,
        agent_pid: int | None,
    ):
        req = {
            "runId": self._envs.run_id,
            "agentBranchNumber": self._envs.branch,
            "stdoutToAppend": stdout_to_append,
            "stderrToAppend": stderr_to_append,
            "exitStatus": exit_status,
            "agentPid": agent_pid,
        }
        await self._send_trpc_server_request(
            "mutation",
            "updateAgentCommandResult",
            req,
        )

    async def get_usage(self) -> RunUsageAndLimits:
        res = await self._send_trpc_server_request(
            "query",
            "getRunUsageHooks",
            {
                "runId": self._envs.run_id,
                "agentBranchNumber": self._envs.branch,
            },
        )
        return RunUsageAndLimits(**res)

    async def pause(self):
        await self._send_trpc_server_request(
            "mutation",
            "pause",
            {
                "runId": self._envs.run_id,
                "agentBranchNumber": self._envs.branch,
                "start": timestamp_now(),
                "reason": "pauseHook",
            },
            record_pause_on_error=False,
        )

    async def unpause(self):
        await self._send_trpc_server_request(
            "mutation",
            "unpause",
            {
                "runId": self._envs.run_id,
                "agentBranchNumber": self._envs.branch,
                "reason": "unpauseHook",
            },
            record_pause_on_error=False,
        )

    def _new_base_event(self) -> dict[str, Any]:
        return {
            "runId": self._envs.run_id,
            "index": random_index(),
            "agentBranchNumber": self._envs.branch,
            "calledAt": timestamp_strictly_increasing(),
        }

`count_prompt_tokens(settings, messages, functions=None, extraParameters=None)` `async`

Returns the number of prompt tokens that a generation request will use.

Source code in pyhooks/pyhooks/__init__.py

async def count_prompt_tokens(
    self,
    settings: MiddlemanSettings,
    messages: list[OpenaiChatMessage],
    functions: Optional[Any] = None,
    extraParameters: dict[str, Any] | None = None,
) -> int:
    """Returns the number of prompt tokens that a generation request will use."""
    genReq = GenerationRequest(
        settings=settings,
        messages=messages,
        functions=functions,
        extraParameters=extraParameters,
    )
    req = {"genRequest": genReq.model_dump()}
    res = await self._send_trpc_server_request("mutation", "countPromptTokens", req)
    return res["tokens"]

`generate_with_anthropic_prompt_caching(settings, messages, add_cache_control=True, **kwargs)` `async`

Generates multiple completions for a single prompt by first submitting a generation request with n=1, to write the prompt to Anthropic's prompt cache, then submitting more requests until settings.n completions have been generated. Loops because generate may return fewer generations than requested for Anthropic models. That's because Anthropic doesn't support n>1 natively, so Middleman makes n parallel API requests to get n completions. Some or all of these requests may fail due to rate limits or other errors.

If add_cache_control is True and the last message of the prompt has a content field that is a list, this method will automatically add a cache_control key to the last element of the content list. This way, Anthropic will cache the entire prompt.

Source code in pyhooks/pyhooks/__init__.py

async def generate_with_anthropic_prompt_caching(
    self,
    settings: MiddlemanSettings,
    messages: list[OpenaiChatMessage],
    add_cache_control: bool = True,
    **kwargs,
) -> list[MiddlemanResult]:
    """
    Generates multiple completions for a single prompt by first submitting a generation request
    with `n=1`, to write the prompt to Anthropic's prompt cache, then submitting more requests
    until `settings.n` completions have been generated. Loops because `generate` may return fewer
    generations than requested for Anthropic models. That's because Anthropic doesn't support `n>1`
    natively, so Middleman makes `n` parallel API requests to get `n` completions. Some or all of
    these requests may fail due to rate limits or other errors.

    If `add_cache_control` is True and the last message of the prompt has a `content` field that is a list,
    this method will automatically add a `cache_control` key to the last element of the content list.
    This way, Anthropic will cache the entire prompt.
    """
    if settings.n <= 1:
        return [await self.generate(settings=settings, messages=messages, **kwargs)]

    messages = [message.model_copy() for message in messages]
    if not isinstance(messages[-1].content, str) and add_cache_control:
        messages[-1].content[-1]["cache_control"] = {"type": "ephemeral"}

    results: list[MiddlemanResult] = []

    first_request_settings = settings.model_copy(update={"n": 1})
    results.append(
        await self.generate(
            settings=first_request_settings, messages=messages, **kwargs
        )
    )

    while True:
        completions_so_far = sum(
            len(r.outputs) if r.outputs else 0 for r in results
        )
        if completions_so_far >= settings.n:
            break

        next_request_settings = settings.model_copy(
            update={"n": settings.n - completions_so_far}
        )
        results.append(
            await self.generate(
                settings=next_request_settings, messages=messages, **kwargs
            )
        )

    return results

`get_input(description, default_input)` `async`

get input from user or use default if not in intervention mode

Source code in pyhooks/pyhooks/__init__.py

async def get_input(self, description: str, default_input: str) -> str:
    "get input from user or use default if not in intervention mode"
    trace_entry = self.make_trace_entry(
        {
            "description": description,
            "defaultInput": default_input,
        }
    )
    entry_key = {
        "runId": trace_entry["runId"],
        "index": trace_entry["index"],
        "agentBranchNumber": trace_entry["agentBranchNumber"],
    }
    await self._send_trpc_server_request("mutation", "requestInput", trace_entry)
    input = await self._send_trpc_server_request(
        "query", "retrieveInput", entry_key
    )
    while input is None:
        print("Waiting for human interaction")
        input = await self._send_trpc_server_request(
            "query", "retrieveInput", entry_key
        )
        if input is None:
            await asyncio.sleep(10)
    return input

`Pauser`

Manages delays in retrying RPCs, and sending pause/unpause requests to the server

Source code in pyhooks/pyhooks/__init__.py

class Pauser:
    """Manages delays in retrying RPCs, and sending pause/unpause requests to the server"""

    _envs: CommonEnvs
    _start: int
    _end: Optional[int]
    _state: State
    _sleeper: Sleeper
    _request_fn: RequestFn
    _record_pause: bool

    class State(Enum):
        NO_PAUSE = auto()
        PAUSE_REQUESTED = auto()
        PAUSE_FAILED = auto()
        PAUSE_SUCCEEDED = auto()

    def __init__(
        self,
        envs: CommonEnvs,
        sleeper: Sleeper,
        request_fn: RequestFn,
        record_pause: bool,
        start: int | None,
    ):
        self._envs = envs
        self._start = start if start is not None else timestamp_now()
        self._end = None
        self._state = self.State.NO_PAUSE
        self._sleeper = sleeper
        self._request_fn = request_fn
        self._record_pause = record_pause

    @property
    def run_id(self) -> int:
        return cast(int, self._envs.run_id or env.RUN_ID)

    @property
    def branch(self) -> int:
        return cast(int, self._envs.branch or env.AGENT_BRANCH_NUMBER)

    async def pause(self):
        await self._try_pause_once()
        await self._sleeper.sleep()
        self._end = timestamp_now()

    async def _try_pause_once(self):
        """Tries to ensure that a single pause request was sent to the server.

        Can be called successively and will only retry pausing until success."""
        match self._state:
            case self.State.NO_PAUSE:
                self._state = self.State.PAUSE_REQUESTED
                await self._send_pause()
            case self.State.PAUSE_FAILED:
                await self._send_pause()
            case self.State.PAUSE_REQUESTED, self.State.PAUSE_SUCCEEDED:
                return

    async def _send_pause(self) -> bool:
        if not self._record_pause:
            self._state = self.State.PAUSE_SUCCEEDED
            return True
        try:
            await self._request_fn(
                "mutation",
                "pause",
                {
                    "runId": self.run_id,
                    "agentBranchNumber": self.branch,
                    "reason": "pyhooksRetry",
                    "start": self._start,
                },
                record_pause_on_error=False,
                envs=self._envs,
            )
            self._state = self.State.PAUSE_SUCCEEDED
            return True
        except Exception as e:
            self._state = self.State.PAUSE_FAILED
            print("Failed to pause trpc server request", repr(e))
            return False

    async def unpause(self, end: int | None):
        """Sends an unpause request to the server if necessary.

        Also sends a pause request if previous pause attempts failed.

        Args:
            end: The end time of the pause.
        """

        if end is not None:
            self._end = end

        match self._state:
            case self.State.NO_PAUSE:
                return
            case self.State.PAUSE_REQUESTED:
                raise RuntimeError(
                    "Unpause called before pause completed (should never happen)"
                )
            case self.State.PAUSE_FAILED:
                if await self._send_pause():
                    await self._send_unpause()
                # If the pause request failed, an unpause will just make things confusing.
            case self.State.PAUSE_SUCCEEDED:
                await self._send_unpause()

    async def _send_unpause(self):
        assert self._end is not None
        if not self._record_pause:
            return
        try:
            await self._request_fn(
                "mutation",
                "unpause",
                {
                    "runId": self.run_id,
                    "agentBranchNumber": self.branch,
                    "reason": "pyhooksRetry",
                    "end": self._end,
                },
                record_pause_on_error=False,
                envs=self._envs,
            )
        except Exception as e:
            print("Failed to unpause trpc server request", repr(e))
            raise

`unpause(end)` `async`

Sends an unpause request to the server if necessary.

Also sends a pause request if previous pause attempts failed.

Parameters:

Name	Type	Description	Default
`end`	`int \| None`	The end time of the pause.	required

Source code in pyhooks/pyhooks/__init__.py

async def unpause(self, end: int | None):
    """Sends an unpause request to the server if necessary.

    Also sends a pause request if previous pause attempts failed.

    Args:
        end: The end time of the pause.
    """

    if end is not None:
        self._end = end

    match self._state:
        case self.State.NO_PAUSE:
            return
        case self.State.PAUSE_REQUESTED:
            raise RuntimeError(
                "Unpause called before pause completed (should never happen)"
            )
        case self.State.PAUSE_FAILED:
            if await self._send_pause():
                await self._send_unpause()
            # If the pause request failed, an unpause will just make things confusing.
        case self.State.PAUSE_SUCCEEDED:
            await self._send_unpause()

`pyhooks.agent_output`

Functionality for watching a single /agent-output/agent-branch-N directory for changes to the agent's stdout, stderr, and exit status files. When a file changes, watch_agent_output calls Hooks#update_agent_command_result with the updated stdout, stderr, and exit status. Each agent branch in an agent container starts its own copy of this script.

`pyhooks.env`

`pyhooks.execs`

exports python_exec utility for handling timeouts and output redirection (one day, RAM too)

`run_python(code, timeout_sec, wait_after_kill=15.0, minimum_free_ram_bytes=800000000, log=False)` `async`

exec the code in a separate thread and collect the stdout and stderr. If the code takes longer than timeout seconds to run, then we attempt to kill it. If it takes longer than wait_after_kill seconds to die, then we give up and return the result anyway. (The thread will die when its running system call finishes executing.)

Variables are shared between threads, so e.g. shared_box[0] += 1 works. Note that x += 1 won't work.

Source code in pyhooks/pyhooks/execs.py

async def run_python(
    code: str,
    timeout_sec: float,
    wait_after_kill=15.0,
    minimum_free_ram_bytes=800_000_000,
    log=False,
) -> str:
    """
    exec the code in a separate thread and collect the stdout and stderr.
    If the code takes longer than timeout seconds to run, then we attempt to kill it.
    If it takes longer than wait_after_kill seconds to die, then we give up and return the result anyway.
    (The thread will die when its running system call finishes executing.)

    Variables are shared between threads, so e.g. `shared_box[0] += 1` works. Note that `x += 1` won't work.
    """
    from pyhooks import Actions  # type: ignore

    await Actions().check_safety(code)

    async with aiohttp.ClientSession(
        timeout=aiohttp.ClientTimeout(
            total=60 * 25, sock_connect=60 * 25, sock_read=60 * 25
        )
    ) as session:
        try:
            async with session.post(
                "http://localhost:9712/run_python",
                json={
                    "code": code,
                    "timeout": timeout_sec,
                    "wait_after_kill": wait_after_kill,
                    "minimum_free_ram_bytes": minimum_free_ram_bytes,
                    "log": log,
                },
            ) as resp:
                # encode and decode to prevent errors from unicode surrogate characters
                return (
                    (await resp.json())["result"]
                    .encode("utf-8", errors="replace")
                    .decode("utf-8")
                )
        except Exception as e:
            print(f"Error connecting to python server: {e}", file=sys.stderr)
            return f"Unknown error. May be caused by python code timeout after 25 minutes. Details: {e}"

`pyhooks.options`

`pyhooks.python_server`

`InterruptibleThread`

Bases: Thread

A thread that can be interrupted with t.raiseException() Based on https://stackoverflow.com/a/325528

Source code in pyhooks/pyhooks/python_server.py

class InterruptibleThread(threading.Thread):
    """
    A thread that can be interrupted with t.raiseException()
    Based on https://stackoverflow.com/a/325528
    """

    def run(self):
        """
        Catch uncaught exceptions and save them to t.exc.
        Necessary to remove unwanted "Exception ignored in thread started by..." and "Exception ignored in sys.unraisablehook..."
        https://stackoverflow.com/a/31614591
        """
        self.exc = None
        try:
            self.ret = self._target(*self._args, **self._kwargs)  # type: ignore
        except Exception as e:
            self.exc = e

    def raiseException(self, ExceptionClass):
        """
        Interrupt thread with an exception.
        Exception happens after the current system call finishes executing.
        (So eg time.sleep() is not interrupted.)
        If exception isn't firing then you can try calling this in a loop.
        """
        if not self.is_alive():
            return  # do nothing
        thread_id = self.ident
        if thread_id is None:
            raise Exception("couldn't get thread identifier")
        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
            ctypes.c_long(thread_id), ctypes.py_object(ExceptionClass)
        )
        if res == 0:
            raise ValueError("invalid thread id")
        elif res != 1:
            # "if it returns a number greater than one, you're in trouble,
            # and you should call it again with exc=NULL to revert the effect"
            ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(thread_id), None)
            raise SystemError("PyThreadState_SetAsyncExc failed")

`raiseException(ExceptionClass)`

Interrupt thread with an exception. Exception happens after the current system call finishes executing. (So eg time.sleep() is not interrupted.) If exception isn't firing then you can try calling this in a loop.

Source code in pyhooks/pyhooks/python_server.py

def raiseException(self, ExceptionClass):
    """
    Interrupt thread with an exception.
    Exception happens after the current system call finishes executing.
    (So eg time.sleep() is not interrupted.)
    If exception isn't firing then you can try calling this in a loop.
    """
    if not self.is_alive():
        return  # do nothing
    thread_id = self.ident
    if thread_id is None:
        raise Exception("couldn't get thread identifier")
    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
        ctypes.c_long(thread_id), ctypes.py_object(ExceptionClass)
    )
    if res == 0:
        raise ValueError("invalid thread id")
    elif res != 1:
        # "if it returns a number greater than one, you're in trouble,
        # and you should call it again with exc=NULL to revert the effect"
        ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(thread_id), None)
        raise SystemError("PyThreadState_SetAsyncExc failed")

`run()`

Catch uncaught exceptions and save them to t.exc. Necessary to remove unwanted "Exception ignored in thread started by..." and "Exception ignored in sys.unraisablehook..." https://stackoverflow.com/a/31614591

Source code in pyhooks/pyhooks/python_server.py

def run(self):
    """
    Catch uncaught exceptions and save them to t.exc.
    Necessary to remove unwanted "Exception ignored in thread started by..." and "Exception ignored in sys.unraisablehook..."
    https://stackoverflow.com/a/31614591
    """
    self.exc = None
    try:
        self.ret = self._target(*self._args, **self._kwargs)  # type: ignore
    except Exception as e:
        self.exc = e

`OutputTee`

Allows each thread to output to different File objects (with optional prefix). Based on https://stackoverflow.com/a/57996986

Source code in pyhooks/pyhooks/python_server.py

class OutputTee:
    """
    Allows each thread to output to different File objects (with optional prefix).
    Based on https://stackoverflow.com/a/57996986
    """

    def __init__(self, default_file):
        self.which_files = {}
        self.default = default_file

    def set_outputs(self, files):
        self.which_files[get_thread_id()] = files

    def write(self, message: str):
        files = self.which_files.get(get_thread_id(), [self.default])
        for file in files:
            try:
                file.write(message)
            except:  # noqa: E722
                pass

    def flush(self):
        "required for compatibility"
        files = self.which_files.get(get_thread_id(), [self.default])
        for file in files:
            try:
                file.flush()
            except:  # noqa: E722
                pass

`flush()`

required for compatibility

Source code in pyhooks/pyhooks/python_server.py

def flush(self):
    "required for compatibility"
    files = self.which_files.get(get_thread_id(), [self.default])
    for file in files:
        try:
            file.flush()
        except:  # noqa: E722
            pass

`PrefixedFile`

add a prefix to each line written to a file

Source code in pyhooks/pyhooks/python_server.py

class PrefixedFile:
    "add a prefix to each line written to a file"

    def __init__(self, file, prefix):
        self.file = file
        self.prefix = prefix
        self.on_newline = True

    def write(self, s: str):
        if not s:
            return
        if self.on_newline:
            s = self.prefix + s
        ends_with_newline = s[-1] == "\n"
        if ends_with_newline:
            s = s[:-1]
        s = s.replace("\n", f"\n{self.prefix}")
        self.file.write(s)
        if ends_with_newline:
            self.file.write("\n")
        self.on_newline = ends_with_newline

    def flush(self):
        self.file.flush()

`worker(code, output_file, timeout_fyi, log)`

Redirects outputs and performs exec

Source code in pyhooks/pyhooks/python_server.py

def worker(code: str, output_file, timeout_fyi: float, log: bool):
    "Redirects outputs and performs exec"

    # set up output redirection
    c = worker_counter_box[0]
    worker_counter_box[0] += 1
    stdouts: list = [output_file]
    stderrs: list = [output_file]
    if log:
        stdouts.append(PrefixedFile(real_stdout, f"[python-exec-{c}]-  "))
        stderrs.append(PrefixedFile(real_stderr, f"[python-exec-{c}]+  "))
    stdout.set_outputs(stdouts)
    stderr.set_outputs(stderrs)

    # do the exec
    try:
        ipython_shell.run_cell(code)
    except PythonExecTimeoutException:
        print(
            f"PythonExecTimeoutException: python exec timed out after {timeout_fyi} seconds",
            file=stderr,
        )
    except PythonExecOutOfMemoryException:
        print(
            "PythonExecOutOfMemoryException: python exec exceeded available memory. Python environment has been reset.",
            file=stderr,
        )
    except Exception as e:
        traceback.print_exception(type(e), e, e.__traceback__, file=stderr)

`pyhooks.types`

`pyhooks.util`

`get_available_ram_bytes(base_path=_MEMORY_CGROUP_DIR)`

docker-specific! normal stuff like psutil won't work

Source code in pyhooks/pyhooks/util.py

def get_available_ram_bytes(base_path: pathlib.Path = _MEMORY_CGROUP_DIR) -> float:
    "docker-specific! normal stuff like psutil won't work"
    current_path = base_path / "memory.current"
    if not current_path.exists():
        # system is using cgroup v1
        current_path = base_path / "memory/memory.usage_in_bytes"

    current = current_path.read_text()
    return _get_ram_limit_bytes(base_path) - int(current)

pyhooks Python package

pyhooks

Actions

Hooks

count_prompt_tokens(settings, messages, functions=None, extraParameters=None) async

generate_with_anthropic_prompt_caching(settings, messages, add_cache_control=True, **kwargs) async

get_input(description, default_input) async

Pauser

unpause(end) async

pyhooks.agent_output

pyhooks.env

pyhooks.execs

run_python(code, timeout_sec, wait_after_kill=15.0, minimum_free_ram_bytes=800000000, log=False) async

pyhooks.options

pyhooks.python_server

InterruptibleThread

raiseException(ExceptionClass)

run()

OutputTee

flush()

PrefixedFile

worker(code, output_file, timeout_fyi, log)

pyhooks.types

pyhooks.util

get_available_ram_bytes(base_path=_MEMORY_CGROUP_DIR)

`pyhooks`

`Actions`

`Hooks`

`count_prompt_tokens(settings, messages, functions=None, extraParameters=None)` `async`

`generate_with_anthropic_prompt_caching(settings, messages, add_cache_control=True, **kwargs)` `async`

`get_input(description, default_input)` `async`

`Pauser`

`unpause(end)` `async`

`pyhooks.agent_output`

`pyhooks.env`

`pyhooks.execs`

`run_python(code, timeout_sec, wait_after_kill=15.0, minimum_free_ram_bytes=800000000, log=False)` `async`

`pyhooks.options`

`pyhooks.python_server`

`InterruptibleThread`

`raiseException(ExceptionClass)`

`run()`

`OutputTee`

`flush()`

`PrefixedFile`

`worker(code, output_file, timeout_fyi, log)`

`pyhooks.types`

`pyhooks.util`

`get_available_ram_bytes(base_path=_MEMORY_CGROUP_DIR)`