Recipes

Copy-paste solutions for common toolkit configuration patterns. Start with the simplest pattern that fits your needs.

Which recipe do I need?

  • Just need an API key set in the user's shell? → Recipe 1
  • Just need paths or numbers? → Recipe 2
  • Need to validate that a file exists? → Recipe 3
  • Need to download something? → Recipe 4
  • Multiple setup options (download or provide path)? → Recipe 5
  • Custom system detection (CUDA, libraries)? → Recipe 6
  • Tools that need injected configuration? → Recipe 7

Recipe 1: API keys only

Your toolkit just needs an environment variable to be set in the user's shell.

toolkit.yaml

name: my-toolkit
version: 1.0.0

env_vars:
  required:
    - OPENAI_API_KEY
  optional:
    - OPENAI_ORG_ID
yaml

No setup.py needed. SciToolkit will check the env var exists before serving the toolkit. If missing, it skips the toolkit with a clear message.

Tool code

import os
from orchestral import define_tool
import json

@define_tool
def ask_gpt(prompt: str) -> str:
    """Ask GPT a question."""
    api_key = os.environ['OPENAI_API_KEY']  # Already validated
    # ... call OpenAI API ...
    return json.dumps(result)
python

Recipe 2: Simple paths and values

You need a path, a number, or both — but no validation logic.

toolkit.yaml

config:
  data_dir:
    type: path
    description: "Where to store outputs"
    default: "~/.my-toolkit/data"

  max_workers:
    type: integer
    description: "Number of parallel workers"
    default: 4
yaml

No setup.py needed. SciToolkit prompts for these on install. The user can press Enter to accept defaults.

Tool code

from pathlib import Path
from orchestral import define_tool
import json

@define_tool(state=["data_dir", "max_workers"])
def process_data(input_file: str, data_dir: Path, max_workers: int) -> str:
    """Process input file."""
    output = process(input_file, output_dir=data_dir, n_workers=max_workers)
    return json.dumps({"output": str(output)})
python

See Stateful Tools for the state=[...] mechanism.

Recipe 3: Validate a file exists

You collect a path declaratively, but need to check that the path actually contains usable files.

toolkit.yaml

config:
  opacity_path:
    type: path
    description: "Path to opacity data files"
    required: true

setup_script: true
yaml

setup.py

from pathlib import Path
from scitoolkit.setup import SetupContext


def setup(ctx: SetupContext) -> bool:
    return validate(ctx)


def validate(ctx: SetupContext) -> bool:
    path_str = ctx.get_config('opacity_path')
    if not path_str:
        ctx.error("opacity_path not configured")
        ctx.hint("Run: scitoolkit setup my-toolkit")
        return False

    path = Path(path_str).expanduser()
    if not path.exists():
        ctx.error(f"Path does not exist: {path}")
        return False

    h5_files = list(path.glob("*.h5"))
    if len(h5_files) < 10:
        ctx.error(f"Expected 10+ .h5 files, found {len(h5_files)}")
        return False

    return True
python

Recipe 4: Download a data file

Your toolkit needs data that's too big to bundle. Ship it on a server and have setup.py download it.

toolkit.yaml

name: my-toolkit
version: 1.0.0

setup_script: true
yaml

setup.py

from pathlib import Path
from scitoolkit.setup import SetupContext

DATA_URL = "https://data.scitoolkit.org/my-toolkit/dataset_v1.tar.gz"
DATA_SHA256 = "abc123def456..."  # Optional integrity check


def setup(ctx: SetupContext) -> bool:
    dest = ctx.data_dir / 'dataset'

    if dest.exists() and any(dest.iterdir()):
        if not ctx.confirm("Dataset already exists. Re-download?", default=False):
            ctx.set_config('dataset_path', str(dest))
            return True

    ctx.info("Downloading dataset (~500MB)...")
    ctx.download(
        url=DATA_URL,
        destination=dest,
        description="Dataset",
        size_hint="500MB",
        extract=True,
        sha256=DATA_SHA256,
    )

    ctx.set_config('dataset_path', str(dest))
    ctx.success(f"Dataset installed at {dest}")
    return True


def validate(ctx: SetupContext) -> bool:
    path = ctx.get_config('dataset_path')
    if not path or not Path(path).exists():
        ctx.error("Dataset not found")
        ctx.hint("Run: scitoolkit setup my-toolkit")
        return False
    return True
python

Recipe 5: Download or provide path (multiple options)

The user might already have the data on disk. Don't force them to re-download.

setup.py

from pathlib import Path
from scitoolkit.setup import SetupContext

OPACITY_URL = "https://data.scitoolkit.org/aster/opacity_v2.tar.gz"


def setup(ctx: SetupContext) -> bool:
    existing = ctx.get_config('opacity_path')
    if existing and Path(existing).expanduser().exists():
        ctx.info(f"Opacity data already configured: {existing}")
        if not ctx.confirm("Reconfigure?", default=False):
            return validate(ctx)

    choice = ctx.choice(
        "How would you like to set up opacity data?",
        [
            ("download", "Download automatically (~2.3GB)"),
            ("path",     "I have the data — let me provide the path"),
            ("skip",     "Skip for now"),
        ]
    )

    if choice == "download":
        return _download_opacity(ctx)
    elif choice == "path":
        return _prompt_opacity_path(ctx)
    else:
        ctx.warn("Setup skipped. Toolkit unavailable until configured.")
        return False


def _download_opacity(ctx: SetupContext) -> bool:
    dest = ctx.data_dir / 'opacity'
    ctx.download(
        url=OPACITY_URL, destination=dest,
        description="Opacity data", size_hint="2.3GB", extract=True,
    )
    ctx.set_config('opacity_path', str(dest))
    return validate(ctx)


def _prompt_opacity_path(ctx: SetupContext) -> bool:
    path = ctx.prompt_path("Path to opacity data:", must_exist=True)
    ctx.set_config('opacity_path', str(path))
    return validate(ctx)


def validate(ctx: SetupContext) -> bool:
    path_str = ctx.get_config('opacity_path')
    if not path_str:
        ctx.error("opacity_path not configured")
        return False

    path = Path(path_str).expanduser()
    if not path.exists():
        ctx.error(f"Opacity path not found: {path}")
        return False

    h5_files = list(path.glob("*.h5"))
    if len(h5_files) < 10:
        ctx.error(f"Expected 10+ .h5 files, found {len(h5_files)}")
        return False

    return True
python

Recipe 6: Custom detection (CUDA, system libraries)

You need to detect what's available on the system and adapt. Drop to plain Python.

setup.py

import subprocess
import shutil
from scitoolkit.setup import SetupContext


def setup(ctx: SetupContext) -> bool:
    # Detect CUDA
    if shutil.which('nvcc'):
        try:
            result = subprocess.run(
                ['nvcc', '--version'],
                capture_output=True, text=True, timeout=5,
            )
            if result.returncode == 0:
                version = _parse_cuda_version(result.stdout)
                ctx.success(f"CUDA detected: {version}")
                ctx.set_config('compute_mode', 'gpu')
                ctx.set_config('cuda_version', version)
                return True
        except subprocess.TimeoutExpired:
            pass

    # CUDA not available
    ctx.warn("CUDA not detected")
    if ctx.confirm("Continue in CPU-only mode?", default=True):
        ctx.set_config('compute_mode', 'cpu')
        return True

    ctx.error("GPU is required for this toolkit")
    ctx.hint("Install CUDA: https://developer.nvidia.com/cuda-downloads")
    return False


def validate(ctx: SetupContext) -> bool:
    mode = ctx.get_config('compute_mode')
    if mode not in ('gpu', 'cpu'):
        ctx.error("compute_mode not configured")
        return False
    return True


def _parse_cuda_version(nvcc_output: str) -> str:
    for line in nvcc_output.split('\n'):
        if 'release' in line:
            return line.split('release')[1].split(',')[0].strip()
    return "unknown"
python

Recipe 7: Tools that need injected configuration

Your tools need access to configuration values that the AI agent shouldn't see — workspace directories, data paths, API keys, worker counts. This is the stateful tools pattern.

toolkit.yaml

config:
  base_directory:
    type: path
    description: "Workspace for file operations"
    default: "~/my-toolkit-workspace"
yaml

Tool code

from pathlib import Path
from orchestral import define_tool
import json


@define_tool(state=["base_directory"])
def write_file(
    relative_path: str,
    content: str,
    base_directory: Path,
) -> str:
    """
    Write content to a file in the workspace.

    Args:
        relative_path: Path relative to the workspace.
        content: Text content to write.
    """
    full_path = (base_directory / relative_path).resolve()
    base_resolved = base_directory.resolve()

    # Safety: ensure the path stays inside base_directory
    if not str(full_path).startswith(str(base_resolved)):
        return json.dumps({"error": "Path escapes workspace"})

    full_path.parent.mkdir(parents=True, exist_ok=True)
    full_path.write_text(content)

    return json.dumps({"written": str(full_path.relative_to(base_resolved))})


@define_tool(state=["base_directory"])
def read_file(relative_path: str, base_directory: Path) -> str:
    """Read a file from the workspace."""
    full_path = (base_directory / relative_path).resolve()
    if not str(full_path).startswith(str(base_directory.resolve())):
        return json.dumps({"error": "Path escapes workspace"})

    return json.dumps({"content": full_path.read_text()})
python

From the agent's perspective, the schema is just write_file(relative_path, content). The base_directory is invisible to it. Clean for the agent, configurable for the user.

Starter template

When you run scitoolkit init, you get a working setup.py scaffold to edit:

"""
Setup script for my-toolkit.

Called by SciToolkit to configure the toolkit before serving.
- setup(ctx)    runs when the user does: scitoolkit setup my-toolkit
- validate(ctx) runs before:               scitoolkit serve
"""
from scitoolkit.setup import SetupContext


def setup(ctx: SetupContext) -> bool:
    """Interactive setup. Return True on success."""

    # Example: collect a path
    # path = ctx.prompt_path("Enter data directory:")
    # ctx.set_config('data_path', str(path))

    return validate(ctx)


def validate(ctx: SetupContext) -> bool:
    """Check the toolkit is ready to serve."""

    # Example: confirm config is set
    # if not ctx.get_config('data_path'):
    #     ctx.error("data_path not configured")
    #     ctx.hint("Run: scitoolkit setup my-toolkit")
    #     return False

    return True
python

Full example: ASTER

Here's ASTER's complete setup as a reference. It exercises most of the patterns in this guide.

toolkit.yaml

name: aster
version: 1.0.0
category: astro
description: "Agentic Science Toolkit for Exoplanet Research"
author: "Alex Roman"
license: "MIT"

environment:
  python: "3.12"

config:
  max_workers:
    type: integer
    description: "Parallel workers for computation"
    default: 4

  base_directory:
    type: path
    description: "Workspace for outputs"
    default: "~/.aster/workspace"

setup_script: true
yaml

setup.py

"""ASTER setup — handles opacity data download and workspace config."""
from pathlib import Path
from scitoolkit.setup import SetupContext

OPACITY_URL = "https://data.scitoolkit.org/aster/opacity_v2.tar.gz"
OPACITY_SHA256 = "..."  # Fill in real hash


def setup(ctx: SetupContext) -> bool:
    # Workspace
    workspace = Path(ctx.get_config('base_directory')).expanduser()
    workspace.mkdir(parents=True, exist_ok=True)
    ctx.info(f"Workspace: {workspace}")

    # Opacity data
    existing = ctx.get_config('opacity_path')
    if existing and _opacity_valid(Path(existing).expanduser()):
        ctx.info(f"Opacity data already configured: {existing}")
        return True

    choice = ctx.choice(
        "ASTER needs opacity data (~2.3GB). How would you like to set it up?",
        [
            ("download", "Download automatically"),
            ("path",     "I have the data — let me provide the path"),
            ("skip",     "Skip for now (ASTER won't work until configured)"),
        ]
    )

    if choice == "download":
        dest = ctx.data_dir / 'opacity'
        ctx.download(
            url=OPACITY_URL,
            destination=dest,
            description="Opacity data",
            size_hint="2.3GB",
            extract=True,
            sha256=OPACITY_SHA256,
        )
        ctx.set_config('opacity_path', str(dest))

    elif choice == "path":
        path = ctx.prompt_path("Path to opacity data:", must_exist=True)
        ctx.set_config('opacity_path', str(path))

    else:
        return False

    return validate(ctx)


def validate(ctx: SetupContext) -> bool:
    opacity_str = ctx.get_config('opacity_path')
    if not opacity_str:
        ctx.error("opacity_path not configured")
        ctx.hint("Run: scitoolkit setup aster")
        return False

    if not _opacity_valid(Path(opacity_str).expanduser()):
        ctx.error(f"Opacity data at {opacity_str} is invalid or incomplete")
        ctx.hint("Re-run: scitoolkit setup aster")
        return False

    workspace = Path(ctx.get_config('base_directory')).expanduser()
    if not workspace.exists():
        workspace.mkdir(parents=True, exist_ok=True)

    return True


def _opacity_valid(path: Path) -> bool:
    if not path.exists():
        return False
    return len(list(path.glob("*.h5"))) >= 10
python

Tools then declare opacity_path, base_directory, and max_workers as state fields, and they're injected automatically at serve time. See Stateful Tools for what those tool functions look like.