# Compute diff between before and after statesdiff = client.diff_run(runId=run.runId)print("New records:", diff.diff['inserts'])print("Modified records:", diff.diff['updates'])print("Deleted records:", diff.diff['deletes'])
# Run assertions against the diff (uses test suite's expectedOutput)result = client.evaluate_run(runId=run.runId)print(f"Passed: {result.passed}")print(f"Score: {result.score}")
Or pass assertions explicitly (no test suite needed):
# First, create and customize an environmentenv = client.init_env( templateService="slack", templateName="slack_default")# Make customizations via API...# (add users, channels, etc.)# Save as new templatecustom = client.create_template_from_environment( environmentId=env.environmentId, service="slack", name="my_custom_template", description="Customized workspace with extra channels", visibility="private")print(f"Created template: {custom.templateId}")
# List all visible suitessuites = client.list_test_suites()# Filter by namesuites = client.list_test_suites(name="Slack Bench")# Filter by visibilitysuites = client.list_test_suites(visibility="public")for suite in suites.testSuites: print(f"- {suite.name} (ID: {suite.id})")
# Get suite with expanded test detailssuite = client.get_test_suite(suite_id, expand=True)print(f"Suite: {suite.name}")for test in suite.tests: print(f" - {test.name}: {test.prompt}")
suite = client.create_test_suite( name="My Agent Tests", description="Custom tests for my Slack agent", visibility="private")print(f"Created suite: {suite.id}")
# Get a specific testtest = suite.tests[0]# Create environment for this testenv = client.init_env(testId=test.id)# Start runrun = client.start_run(envId=env.environmentId, testId=test.id)# Run your agent with test.prompt# ...# Evaluate against test assertionsresult = client.evaluate_run(runId=run.runId)print(f"Test: {test.name}")print(f"Passed: {result.passed}")print(f"Failures: {result.failures}")