Recipes
Copy-paste solutions for common toolkit configuration patterns. Start with the simplest pattern that fits your needs.
Which recipe do I need?
- Just need an API key set in the user's shell? → Recipe 1
- Just need paths or numbers? → Recipe 2
- Need to validate that a file exists? → Recipe 3
- Need to download something? → Recipe 4
- Multiple setup options (download or provide path)? → Recipe 5
- Custom system detection (CUDA, libraries)? → Recipe 6
- Tools that need injected configuration? → Recipe 7
Recipe 1: API keys only
Your toolkit just needs an environment variable to be set in the user's shell.
toolkit.yaml
name: my-toolkit
version: 1.0.0
env_vars:
required:
- OPENAI_API_KEY
optional:
- OPENAI_ORG_IDyamlNo setup.py needed. SciToolkit will check the env var exists before serving the toolkit. If missing, it skips the toolkit with a clear message.
Tool code
import os
from orchestral import define_tool
import json
@define_tool
def ask_gpt(prompt: str) -> str:
"""Ask GPT a question."""
api_key = os.environ['OPENAI_API_KEY'] # Already validated
# ... call OpenAI API ...
return json.dumps(result)pythonRecipe 2: Simple paths and values
You need a path, a number, or both — but no validation logic.
toolkit.yaml
config:
data_dir:
type: path
description: "Where to store outputs"
default: "~/.my-toolkit/data"
max_workers:
type: integer
description: "Number of parallel workers"
default: 4yamlNo setup.py needed. SciToolkit prompts for these on install. The user can press Enter to accept defaults.
Tool code
from pathlib import Path
from orchestral import define_tool
import json
@define_tool(state=["data_dir", "max_workers"])
def process_data(input_file: str, data_dir: Path, max_workers: int) -> str:
"""Process input file."""
output = process(input_file, output_dir=data_dir, n_workers=max_workers)
return json.dumps({"output": str(output)})pythonSee Stateful Tools for the state=[...] mechanism.
Recipe 3: Validate a file exists
You collect a path declaratively, but need to check that the path actually contains usable files.
toolkit.yaml
config:
opacity_path:
type: path
description: "Path to opacity data files"
required: true
setup_script: trueyamlsetup.py
from pathlib import Path
from scitoolkit.setup import SetupContext
def setup(ctx: SetupContext) -> bool:
return validate(ctx)
def validate(ctx: SetupContext) -> bool:
path_str = ctx.get_config('opacity_path')
if not path_str:
ctx.error("opacity_path not configured")
ctx.hint("Run: scitoolkit setup my-toolkit")
return False
path = Path(path_str).expanduser()
if not path.exists():
ctx.error(f"Path does not exist: {path}")
return False
h5_files = list(path.glob("*.h5"))
if len(h5_files) < 10:
ctx.error(f"Expected 10+ .h5 files, found {len(h5_files)}")
return False
return TruepythonRecipe 4: Download a data file
Your toolkit needs data that's too big to bundle. Ship it on a server and have setup.py download it.
toolkit.yaml
name: my-toolkit
version: 1.0.0
setup_script: trueyamlsetup.py
from pathlib import Path
from scitoolkit.setup import SetupContext
DATA_URL = "https://data.scitoolkit.org/my-toolkit/dataset_v1.tar.gz"
DATA_SHA256 = "abc123def456..." # Optional integrity check
def setup(ctx: SetupContext) -> bool:
dest = ctx.data_dir / 'dataset'
if dest.exists() and any(dest.iterdir()):
if not ctx.confirm("Dataset already exists. Re-download?", default=False):
ctx.set_config('dataset_path', str(dest))
return True
ctx.info("Downloading dataset (~500MB)...")
ctx.download(
url=DATA_URL,
destination=dest,
description="Dataset",
size_hint="500MB",
extract=True,
sha256=DATA_SHA256,
)
ctx.set_config('dataset_path', str(dest))
ctx.success(f"Dataset installed at {dest}")
return True
def validate(ctx: SetupContext) -> bool:
path = ctx.get_config('dataset_path')
if not path or not Path(path).exists():
ctx.error("Dataset not found")
ctx.hint("Run: scitoolkit setup my-toolkit")
return False
return TruepythonRecipe 5: Download or provide path (multiple options)
The user might already have the data on disk. Don't force them to re-download.
setup.py
from pathlib import Path
from scitoolkit.setup import SetupContext
OPACITY_URL = "https://data.scitoolkit.org/aster/opacity_v2.tar.gz"
def setup(ctx: SetupContext) -> bool:
existing = ctx.get_config('opacity_path')
if existing and Path(existing).expanduser().exists():
ctx.info(f"Opacity data already configured: {existing}")
if not ctx.confirm("Reconfigure?", default=False):
return validate(ctx)
choice = ctx.choice(
"How would you like to set up opacity data?",
[
("download", "Download automatically (~2.3GB)"),
("path", "I have the data — let me provide the path"),
("skip", "Skip for now"),
]
)
if choice == "download":
return _download_opacity(ctx)
elif choice == "path":
return _prompt_opacity_path(ctx)
else:
ctx.warn("Setup skipped. Toolkit unavailable until configured.")
return False
def _download_opacity(ctx: SetupContext) -> bool:
dest = ctx.data_dir / 'opacity'
ctx.download(
url=OPACITY_URL, destination=dest,
description="Opacity data", size_hint="2.3GB", extract=True,
)
ctx.set_config('opacity_path', str(dest))
return validate(ctx)
def _prompt_opacity_path(ctx: SetupContext) -> bool:
path = ctx.prompt_path("Path to opacity data:", must_exist=True)
ctx.set_config('opacity_path', str(path))
return validate(ctx)
def validate(ctx: SetupContext) -> bool:
path_str = ctx.get_config('opacity_path')
if not path_str:
ctx.error("opacity_path not configured")
return False
path = Path(path_str).expanduser()
if not path.exists():
ctx.error(f"Opacity path not found: {path}")
return False
h5_files = list(path.glob("*.h5"))
if len(h5_files) < 10:
ctx.error(f"Expected 10+ .h5 files, found {len(h5_files)}")
return False
return TruepythonRecipe 6: Custom detection (CUDA, system libraries)
You need to detect what's available on the system and adapt. Drop to plain Python.
setup.py
import subprocess
import shutil
from scitoolkit.setup import SetupContext
def setup(ctx: SetupContext) -> bool:
# Detect CUDA
if shutil.which('nvcc'):
try:
result = subprocess.run(
['nvcc', '--version'],
capture_output=True, text=True, timeout=5,
)
if result.returncode == 0:
version = _parse_cuda_version(result.stdout)
ctx.success(f"CUDA detected: {version}")
ctx.set_config('compute_mode', 'gpu')
ctx.set_config('cuda_version', version)
return True
except subprocess.TimeoutExpired:
pass
# CUDA not available
ctx.warn("CUDA not detected")
if ctx.confirm("Continue in CPU-only mode?", default=True):
ctx.set_config('compute_mode', 'cpu')
return True
ctx.error("GPU is required for this toolkit")
ctx.hint("Install CUDA: https://developer.nvidia.com/cuda-downloads")
return False
def validate(ctx: SetupContext) -> bool:
mode = ctx.get_config('compute_mode')
if mode not in ('gpu', 'cpu'):
ctx.error("compute_mode not configured")
return False
return True
def _parse_cuda_version(nvcc_output: str) -> str:
for line in nvcc_output.split('\n'):
if 'release' in line:
return line.split('release')[1].split(',')[0].strip()
return "unknown"pythonRecipe 7: Tools that need injected configuration
Your tools need access to configuration values that the AI agent shouldn't see — workspace directories, data paths, API keys, worker counts. This is the stateful tools pattern.
toolkit.yaml
config:
base_directory:
type: path
description: "Workspace for file operations"
default: "~/my-toolkit-workspace"yamlTool code
from pathlib import Path
from orchestral import define_tool
import json
@define_tool(state=["base_directory"])
def write_file(
relative_path: str,
content: str,
base_directory: Path,
) -> str:
"""
Write content to a file in the workspace.
Args:
relative_path: Path relative to the workspace.
content: Text content to write.
"""
full_path = (base_directory / relative_path).resolve()
base_resolved = base_directory.resolve()
# Safety: ensure the path stays inside base_directory
if not str(full_path).startswith(str(base_resolved)):
return json.dumps({"error": "Path escapes workspace"})
full_path.parent.mkdir(parents=True, exist_ok=True)
full_path.write_text(content)
return json.dumps({"written": str(full_path.relative_to(base_resolved))})
@define_tool(state=["base_directory"])
def read_file(relative_path: str, base_directory: Path) -> str:
"""Read a file from the workspace."""
full_path = (base_directory / relative_path).resolve()
if not str(full_path).startswith(str(base_directory.resolve())):
return json.dumps({"error": "Path escapes workspace"})
return json.dumps({"content": full_path.read_text()})pythonFrom the agent's perspective, the schema is just write_file(relative_path, content). The base_directory is invisible to it. Clean for the agent, configurable for the user.
Starter template
When you run scitoolkit init, you get a working setup.py scaffold to edit:
"""
Setup script for my-toolkit.
Called by SciToolkit to configure the toolkit before serving.
- setup(ctx) runs when the user does: scitoolkit setup my-toolkit
- validate(ctx) runs before: scitoolkit serve
"""
from scitoolkit.setup import SetupContext
def setup(ctx: SetupContext) -> bool:
"""Interactive setup. Return True on success."""
# Example: collect a path
# path = ctx.prompt_path("Enter data directory:")
# ctx.set_config('data_path', str(path))
return validate(ctx)
def validate(ctx: SetupContext) -> bool:
"""Check the toolkit is ready to serve."""
# Example: confirm config is set
# if not ctx.get_config('data_path'):
# ctx.error("data_path not configured")
# ctx.hint("Run: scitoolkit setup my-toolkit")
# return False
return TruepythonFull example: ASTER
Here's ASTER's complete setup as a reference. It exercises most of the patterns in this guide.
toolkit.yaml
name: aster
version: 1.0.0
category: astro
description: "Agentic Science Toolkit for Exoplanet Research"
author: "Alex Roman"
license: "MIT"
environment:
python: "3.12"
config:
max_workers:
type: integer
description: "Parallel workers for computation"
default: 4
base_directory:
type: path
description: "Workspace for outputs"
default: "~/.aster/workspace"
setup_script: trueyamlsetup.py
"""ASTER setup — handles opacity data download and workspace config."""
from pathlib import Path
from scitoolkit.setup import SetupContext
OPACITY_URL = "https://data.scitoolkit.org/aster/opacity_v2.tar.gz"
OPACITY_SHA256 = "..." # Fill in real hash
def setup(ctx: SetupContext) -> bool:
# Workspace
workspace = Path(ctx.get_config('base_directory')).expanduser()
workspace.mkdir(parents=True, exist_ok=True)
ctx.info(f"Workspace: {workspace}")
# Opacity data
existing = ctx.get_config('opacity_path')
if existing and _opacity_valid(Path(existing).expanduser()):
ctx.info(f"Opacity data already configured: {existing}")
return True
choice = ctx.choice(
"ASTER needs opacity data (~2.3GB). How would you like to set it up?",
[
("download", "Download automatically"),
("path", "I have the data — let me provide the path"),
("skip", "Skip for now (ASTER won't work until configured)"),
]
)
if choice == "download":
dest = ctx.data_dir / 'opacity'
ctx.download(
url=OPACITY_URL,
destination=dest,
description="Opacity data",
size_hint="2.3GB",
extract=True,
sha256=OPACITY_SHA256,
)
ctx.set_config('opacity_path', str(dest))
elif choice == "path":
path = ctx.prompt_path("Path to opacity data:", must_exist=True)
ctx.set_config('opacity_path', str(path))
else:
return False
return validate(ctx)
def validate(ctx: SetupContext) -> bool:
opacity_str = ctx.get_config('opacity_path')
if not opacity_str:
ctx.error("opacity_path not configured")
ctx.hint("Run: scitoolkit setup aster")
return False
if not _opacity_valid(Path(opacity_str).expanduser()):
ctx.error(f"Opacity data at {opacity_str} is invalid or incomplete")
ctx.hint("Re-run: scitoolkit setup aster")
return False
workspace = Path(ctx.get_config('base_directory')).expanduser()
if not workspace.exists():
workspace.mkdir(parents=True, exist_ok=True)
return True
def _opacity_valid(path: Path) -> bool:
if not path.exists():
return False
return len(list(path.glob("*.h5"))) >= 10pythonTools then declare opacity_path, base_directory, and max_workers as state fields, and they're injected automatically at serve time. See Stateful Tools for what those tool functions look like.