Skip to content

Runtime Reference

Complete API reference for the PromptKit Runtime components.

The PromptKit Runtime provides the core execution engine for LLM interactions. It handles:

  • Pipeline Execution: Stage-based processing with streaming support
  • Provider Integration: Multi-LLM support (OpenAI, Anthropic, Google Gemini)
  • Tool Execution: Function calling with MCP integration
  • State Management: Conversation persistence and caching
  • Validation: Content and response validation
  • Configuration: Flexible runtime configuration
ComponentDescriptionReference
PipelineStage-based execution enginepipeline.md
ProvidersLLM provider implementationsproviders.md
ToolsFunction calling and MCP integrationtools-mcp.md
State StoreConversation persistencestatestore.md
TypesCore data structurestypes.md
A2AClient, types, tool bridge, mocka2a.md
LoggingStructured logging with contextlogging.md
TelemetryOpenTelemetry trace exporttelemetry.md
import (
"github.com/AltairaLabs/PromptKit/runtime/pipeline"
"github.com/AltairaLabs/PromptKit/runtime/providers"
"github.com/AltairaLabs/PromptKit/runtime/tools"
"github.com/AltairaLabs/PromptKit/runtime/mcp"
"github.com/AltairaLabs/PromptKit/runtime/statestore"
"github.com/AltairaLabs/PromptKit/runtime/hooks"
"github.com/AltairaLabs/PromptKit/runtime/hooks/guardrails"
"github.com/AltairaLabs/PromptKit/runtime/types"
"github.com/AltairaLabs/PromptKit/runtime/logger"
"github.com/AltairaLabs/PromptKit/runtime/telemetry"
)
import (
"context"
"github.com/AltairaLabs/PromptKit/runtime/providers"
"github.com/AltairaLabs/PromptKit/runtime/providers/openai"
"github.com/AltairaLabs/PromptKit/runtime/types"
)
// Create provider
provider := openai.NewProvider(
"openai",
"gpt-4o-mini",
"", // default baseURL (uses env var for API key)
providers.ProviderDefaults{Temperature: 0.7, MaxTokens: 1500},
false, // includeRawOutput
)
defer provider.Close()
// Execute prediction
ctx := context.Background()
resp, err := provider.Predict(ctx, providers.PredictionRequest{
Messages: []types.Message{
{Role: "user", Content: "Hello!"},
},
Temperature: 0.7,
MaxTokens: 1500,
})
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.Content)
import (
"github.com/AltairaLabs/PromptKit/runtime/mcp"
)
// Register MCP server
mcpRegistry := mcp.NewRegistry()
defer mcpRegistry.Close()
mcpRegistry.RegisterServer(mcp.ServerConfig{
Name: "filesystem",
Command: "npx",
Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "/allowed"},
})
// Discover tools
ctx := context.Background()
serverTools, err := mcpRegistry.ListAllTools(ctx)
if err != nil {
log.Fatal(err)
}
for serverName, tools := range serverTools {
log.Printf("Server %s has %d tools\n", serverName, len(tools))
}
// Execute with streaming
streamChan, err := provider.PredictStream(ctx, providers.PredictionRequest{
Messages: []types.Message{{Role: "user", Content: "Write a story"}},
Temperature: 0.7,
MaxTokens: 1500,
})
if err != nil {
log.Fatal(err)
}
// Process chunks
for chunk := range streamChan {
if chunk.Error != nil {
log.Printf("Error: %v\n", chunk.Error)
break
}
if chunk.Delta != "" {
fmt.Print(chunk.Delta)
}
if chunk.FinishReason != nil {
fmt.Printf("\n\nStream complete: %s\n", *chunk.FinishReason)
}
}
defaults := providers.ProviderDefaults{
Temperature: 0.7,
TopP: 0.95,
MaxTokens: 2000,
Pricing: providers.Pricing{
InputCostPer1K: 0.00015, // $0.15 per 1M tokens
OutputCostPer1K: 0.0006, // $0.60 per 1M tokens
},
}
provider := openai.NewProvider("openai", "gpt-4o-mini", "", defaults, false)
policy := &pipeline.ToolPolicy{
ToolChoice: "auto", // "auto", "required", "none", or specific tool
MaxRounds: 5, // Max tool execution rounds (default: 50)
MaxToolCallsPerTurn: 10, // Max tools per LLM response
MaxParallelToolCalls: 5, // Max concurrent tool executions (default: 10)
MaxCostUSD: 1.00, // Stop after $1 spent (0 = unlimited)
Blocklist: []string{"dangerous_tool"}, // Blocked tools
}
resp, err := provider.Predict(ctx, req)
if err != nil {
switch {
case errors.Is(err, context.DeadlineExceeded):
log.Println("Request timeout")
default:
log.Printf("Provider error: %v", err)
}
}
// MCP tool call errors
response, err := client.CallTool(ctx, "read_file", args)
if err != nil {
log.Printf("Tool execution failed: %v", err)
}
// Always close resources
defer provider.Close()
defer mcpRegistry.Close()
// Use context for cancellation
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
resp, err := provider.Predict(ctx, req)
// Always drain streaming channels
streamChan, err := provider.PredictStream(ctx, req)
if err != nil {
return err
}
for chunk := range streamChan {
// Process chunks
if chunk.Error != nil {
break
}
}
  • Configure MaxConcurrentExecutions based on provider rate limits
  • Use semaphores to prevent overwhelming providers
  • Consider graceful degradation under load
  • Use streaming for interactive applications (chatbots, UIs)
  • Use non-streaming for batch processing, testing, analytics
  • Streaming has ~10% overhead but better UX
  • MCP tools run in separate processes (stdio overhead)
  • Consider tool execution timeouts
  • Use repository executors for fast in-memory tools
  • Pipeline creates fresh ExecutionContext per call (prevents contamination)
  • Large conversation histories can increase memory usage
  • Consider state store cleanup strategies