Skip to content

Vivaria architecture

How Vivaria runs agents on tasks

  1. A user defines a METR Task Standard task family
  2. The user picks out a task from the task family, e.g. general/count-odds
  3. The user makes an agent with a main.py file that calls hooks.getInstructions(), hooks.submit(answer), etc.
  4. The user runs viv run (see here for more details)
  5. The Vivaria server builds a Docker image based on the task family's and agent's code
  6. The server creates a Docker container from the image, again based on the task family's code
  7. The server runs a command in the container that starts the agent
  8. The agent logs trace entries, gets completions, and eventually submits an answer, all from/to the server via pyhooks
  9. Vivaria runs TaskFamily#score inside the Docker container, passing it the agent's submission

C4 diagrams

See here for details.

System context

C4Context System_Ext(llmapi, "LLM API providers") System_Boundary(b1, "METR") { Person(poke, "Researcher (poke)") System(vivaria, "Vivaria") Person(agentauthor, "Agent Author") Person(taskauthor, "Task Author") } Rel(vivaria, llmapi, "Calls out to") Rel(poke, vivaria, "Runs tasks") Rel(agentauthor, vivaria, "Writes agents") Rel(taskauthor, vivaria, "Writes tasks") UpdateRelStyle(poke, vivaria, $offsetX="-30", $offsetY="-20") UpdateRelStyle(agentauthor, vivaria, $offsetX="-30", $offsetY="-30") UpdateRelStyle(vivaria, llmapi, $offsetX="-30", $offsetY="-40") UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1")

Container

C4Container Person(agentauthor, "Agent Author") Person(taskauthor, "Task Author") System_Ext(llmapi, "LLM API Providers") Container_Ext(auth0, "Auth0") ContainerDb_Ext(github, "GitHub") ContainerDb_Ext(airtable, "Airtable", "", "For misc analysies & search") Boundary(b1, "METR", "") { Boundary(b2, "Server-Side", "") { Container(middleman, "Middleman", "Python", "In separate repo") Container(api, "API Server", "TypeScript", "Orchestrates everything") ContainerDb(db, "DB", "Postgres; scripts/schema.sql", "Stores runs, trace entries, etc.") Container(agents, "Agents", "Python", "Run tasks, records output <br>via pyhooks compatibility library") } Boundary(b3, "Users & Their Machines", "") { Container(ui, "Web UI", "TypeScript, React, Vite") Container(cli, "viv CLI", "Python") Person(poke, "User (poke)") } } Rel(middleman, auth0, "Checks auth", "HTTPS") UpdateRelStyle(middleman, auth0, $offsetX="+30", $offsetY="+80") Rel(ui, auth0, "Mints auth tokens", "HTTPS") UpdateRelStyle(ui, auth0, $offsetX="-60", $offsetY="+360") Rel(api, auth0, "Checks auth", "HTTPS") UpdateRelStyle(api, auth0, $offsetX="+45", $offsetY="+80") Rel(cli, github, "Commits & pushes<br> agents/tasks to", "HTTPS") UpdateRelStyle(cli, github, $offsetX="-80", $offsetY="+260") Rel(middleman, llmapi, "Calls out to", "HTTPS") UpdateRelStyle(middleman, llmapi, $offsetX="-205", $offsetY="+205") Rel(api, middleman, "Forwards <br>model calls", "HTTPS") UpdateRelStyle(api, middleman, $offsetX="-30", $offsetY="-30") Rel(cli, api, "Starts runs", "tRPC/HTTPS") UpdateRelStyle(cli, api, $offsetX="+10", $offsetY="+100") Rel(api, github, "Fetches agents<br> and tasks", "HTTPS") UpdateRelStyle(api, github, $offsetX="+0", $offsetY="-70") Rel(api, agents, "Starts and runs tasks on", "docker commands") UpdateRelStyle(api, agents, $offsetX="-50", $offsetY="+60") Rel(agents, api, "Calls models and<br>saves trace events", "pyhooks tRPC/HTTP") UpdateRelStyle(agents, api, $offsetX="-160", $offsetY="-10") Rel(api, db, "Reads/writes <br>traces, runs, etc.", "SQL/TCP") UpdateRelStyle(api, db, $offsetX="-40", $offsetY="-40") Rel(ui, api, "Gets traces", "tRPC/HTTPS") UpdateRelStyle(ui, api, $offsetX="-150", $offsetY="+70") Rel(poke, ui, "Views traces") UpdateRelStyle(poke, ui, $offsetX="-0", $offsetY="-0") Rel(poke, cli, "Runs tasks") UpdateRelStyle(poke, cli, $offsetX="-0", $offsetY="-0") Rel(taskauthor, github, "Writes tasks") UpdateRelStyle(taskauthor, github, $offsetX="-0", $offsetY="-0") Rel(agentauthor, github, "Writes agents") UpdateRelStyle(agentauthor, github, $offsetX="-0", $offsetY="-0") Rel(api, airtable, "Writes run info", "HTTPS") UpdateRelStyle(api, airtable, $offsetX="-0", $offsetY="-0") UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1")