Skip to content

Use Arena as a Go Library

This guide shows you how to integrate Arena into your Go applications for programmatic testing, custom tooling, and dynamic scenario generation.

Use Arena programmatically when you need to:

  • Integrate with existing systems - Embed testing into your application
  • Generate scenarios dynamically - Create tests from data or user input
  • Custom reporting - Process results with your own logic
  • Build testing tools - Create specialized testing applications
  • Automate workflows - Chain testing with other operations

Add Arena to your Go project:

Terminal window
go get github.com/AltairaLabs/PromptKit/tools/arena/engine
go get github.com/AltairaLabs/PromptKit/pkg/config
go get github.com/AltairaLabs/PromptKit/runtime/prompt
import (
"github.com/AltairaLabs/PromptKit/pkg/config"
"github.com/AltairaLabs/PromptKit/runtime/prompt"
"github.com/AltairaLabs/PromptKit/tools/arena/engine"
)
// Create prompt configuration
promptConfig := &prompt.Config{
Spec: prompt.Spec{
TaskType: "assistant",
SystemTemplate: "You are a helpful assistant.",
},
}
// Create Arena configuration
cfg := &config.Config{
LoadedProviders: map[string]*config.Provider{
"openai": {
ID: "openai",
Type: "openai",
Model: "gpt-4",
},
},
LoadedPromptConfigs: map[string]*config.PromptConfigData{
"assistant": {
Config: promptConfig,
TaskType: "assistant",
},
},
LoadedScenarios: map[string]*config.Scenario{
"test-1": {
ID: "test-1",
TaskType: "assistant",
Turns: []config.TurnDefinition{
{Role: "user", Content: "Hello"},
},
},
},
Defaults: config.Defaults{
Temperature: 0.7,
MaxTokens: 500,
},
}
// Build all required components
providerReg, promptReg, mcpReg, executor, err := engine.BuildEngineComponents(cfg)
if err != nil {
return err
}
// Create engine
eng, err := engine.NewEngine(cfg, providerReg, promptReg, mcpReg, executor)
if err != nil {
return err
}
defer eng.Close()
// Generate plan
plan, err := eng.GenerateRunPlan(nil, nil, nil)
if err != nil {
return err
}
// Execute
ctx := context.Background()
runIDs, err := eng.ExecuteRuns(ctx, plan, 4) // 4 concurrent workers
if err != nil {
return err
}
import "github.com/AltairaLabs/PromptKit/tools/arena/statestore"
arenaStore := eng.GetStateStore().(*statestore.ArenaStateStore)
for _, runID := range runIDs {
result, err := arenaStore.GetRunResult(ctx, runID)
if err != nil {
continue
}
// Process result
fmt.Printf("Scenario: %s, Status: %s\n",
result.ScenarioID,
getStatus(result.Error))
}

Generate scenarios from external data:

// Load test cases from database/API/file
testCases := loadTestCases()
scenarios := make(map[string]*config.Scenario)
for i, tc := range testCases {
scenarios[tc.ID] = &config.Scenario{
ID: tc.ID,
TaskType: tc.TaskType,
Description: tc.Description,
Turns: buildTurns(tc.Conversations),
}
}
cfg.LoadedScenarios = scenarios

Test the same scenario across multiple providers:

providers := map[string]*config.Provider{
"openai-gpt4": {
ID: "openai-gpt4",
Type: "openai",
Model: "gpt-4",
},
"anthropic-claude": {
ID: "anthropic-claude",
Type: "anthropic",
Model: "claude-3-5-sonnet-20241022",
},
"google-gemini": {
ID: "google-gemini",
Type: "gemini",
Model: "gemini-2.0-flash-exp",
},
}
cfg.LoadedProviders = providers
// Execute - all providers will be tested
plan, _ := eng.GenerateRunPlan(nil, nil, nil)
runIDs, _ := eng.ExecuteRuns(ctx, plan, 6)
// Compare results
compareProviders(runIDs, eng.GetStateStore())

Process results with your own logic:

type TestMetrics struct {
Provider string
Scenario string
Duration time.Duration
Cost float64
TokensUsed int
Success bool
ErrorMsg string
}
func processResults(runIDs []string, store *statestore.ArenaStateStore) []TestMetrics {
var metrics []TestMetrics
for _, runID := range runIDs {
result, _ := store.GetRunResult(context.Background(), runID)
metrics = append(metrics, TestMetrics{
Provider: result.ProviderID,
Scenario: result.ScenarioID,
Duration: result.Duration,
Cost: result.Cost.TotalCost,
TokensUsed: result.Cost.InputTokens + result.Cost.OutputTokens,
Success: result.Error == "",
ErrorMsg: result.Error,
})
}
return metrics
}

Run specific combinations only:

// Only test specific scenarios with specific providers
plan, err := eng.GenerateRunPlan(
nil, // all regions
[]string{"openai", "claude"}, // only these providers
[]string{"critical-test-1"}, // only this scenario
)

Use mock providers to test without API calls:

cfg.LoadedProviders = map[string]*config.Provider{
"mock": {
ID: "mock",
Type: "mock",
Model: "test-model",
},
}
// Optionally enable mock mode on existing engine
err := eng.EnableMockProviderMode("path/to/mock-config.yaml")
func TestLLMResponses(t *testing.T) {
cfg := createTestConfig()
providerReg, promptReg, mcpReg, executor, err := engine.BuildEngineComponents(cfg)
require.NoError(t, err)
eng, err := engine.NewEngine(cfg, providerReg, promptReg, mcpReg, executor)
require.NoError(t, err)
defer eng.Close()
plan, _ := eng.GenerateRunPlan(nil, nil, nil)
runIDs, err := eng.ExecuteRuns(context.Background(), plan, 4)
require.NoError(t, err)
// Assert results
store := eng.GetStateStore().(*statestore.ArenaStateStore)
for _, runID := range runIDs {
result, _ := store.GetRunResult(context.Background(), runID)
assert.Empty(t, result.Error, "Test should pass")
}
}
func main() {
// Load config from environment
cfg := buildConfigFromEnv()
// Execute tests
eng, _ := setupEngine(cfg)
defer eng.Close()
plan, _ := eng.GenerateRunPlan(nil, nil, nil)
runIDs, _ := eng.ExecuteRuns(context.Background(), plan, 10)
// Process results and exit with appropriate code
results := collectResults(runIDs, eng.GetStateStore())
if hasFailures(results) {
fmt.Println("❌ Tests failed")
os.Exit(1)
}
fmt.Println("✅ All tests passed")
os.Exit(0)
}
type CustomRunner struct {
engine *engine.Engine
store *statestore.ArenaStateStore
}
func (r *CustomRunner) RunWithRetry(scenario string, maxRetries int) error {
for i := 0; i < maxRetries; i++ {
plan, _ := r.engine.GenerateRunPlan(nil, nil, []string{scenario})
runIDs, _ := r.engine.ExecuteRuns(context.Background(), plan, 1)
result, _ := r.store.GetRunResult(context.Background(), runIDs[0])
if result.Error == "" {
return nil
}
log.Printf("Attempt %d failed, retrying...", i+1)
time.Sleep(time.Second * time.Duration(i+1))
}
return fmt.Errorf("failed after %d attempts", maxRetries)
}
eng, _ := engine.NewEngine(cfg, providerReg, promptReg, mcpReg, executor)
defer eng.Close() // Ensures proper cleanup
result, err := store.GetRunResult(ctx, runID)
if err != nil {
log.Printf("Failed to get result for %s: %v", runID, err)
continue // Don't fail entire batch
}
// For rate-limited APIs
runIDs, _ := eng.ExecuteRuns(ctx, plan, 2)
// For mock testing
runIDs, _ := eng.ExecuteRuns(ctx, plan, 20)
// Build components once
providerReg, promptReg, mcpReg, executor, _ := engine.BuildEngineComponents(cfg)
// Reuse for multiple engines
eng1, _ := engine.NewEngine(cfg1, providerReg, promptReg, mcpReg, executor)
eng2, _ := engine.NewEngine(cfg2, providerReg, promptReg, mcpReg, executor)

If you see “package not found” errors:

Terminal window
go mod tidy
go get github.com/AltairaLabs/PromptKit/tools/arena/engine@latest

Always check type assertions:

arenaStore, ok := eng.GetStateStore().(*statestore.ArenaStateStore)
if !ok {
log.Fatal("Expected ArenaStateStore")
}

Process results in batches:

const batchSize = 100
for i := 0; i < len(runIDs); i += batchSize {
end := i + batchSize
if end > len(runIDs) {
end = len(runIDs)
}
processBatch(runIDs[i:end], store)
}