Skip to content

Runtime Reference

Complete API reference for the PromptKit Runtime components.

The PromptKit Runtime provides the core execution engine for LLM interactions. It handles:

  • Pipeline Execution: Stage-based processing with streaming support
  • Provider Integration: Multi-LLM support (OpenAI, Anthropic, Google Gemini)
  • Tool Execution: Function calling with MCP integration
  • State Management: Conversation persistence and caching
  • Validation: Content and response validation
  • Configuration: Flexible runtime configuration
ComponentDescriptionReference
PipelineStage-based execution enginepipeline.md
ProvidersLLM provider implementationsproviders.md
ToolsFunction calling and MCP integrationtools.md
MCPModel Context Protocol supportmcp.md
State StoreConversation persistencestatestore.md
ValidatorsContent validationvalidators.md
TypesCore data structurestypes.md
A2AClient, types, tool bridge, mocka2a.md
LoggingStructured logging with contextlogging.md
TelemetryOpenTelemetry trace exporttelemetry.md
import (
"github.com/AltairaLabs/PromptKit/runtime/pipeline"
"github.com/AltairaLabs/PromptKit/runtime/providers"
"github.com/AltairaLabs/PromptKit/runtime/tools"
"github.com/AltairaLabs/PromptKit/runtime/mcp"
"github.com/AltairaLabs/PromptKit/runtime/statestore"
"github.com/AltairaLabs/PromptKit/runtime/hooks"
"github.com/AltairaLabs/PromptKit/runtime/hooks/guardrails"
"github.com/AltairaLabs/PromptKit/runtime/types"
"github.com/AltairaLabs/PromptKit/runtime/logger"
"github.com/AltairaLabs/PromptKit/runtime/telemetry"
)
import (
"context"
"github.com/AltairaLabs/PromptKit/runtime/providers"
"github.com/AltairaLabs/PromptKit/runtime/providers/openai"
"github.com/AltairaLabs/PromptKit/runtime/types"
)
// Create provider
provider := openai.NewProvider(
"openai",
"gpt-4o-mini",
"", // default baseURL (uses env var for API key)
providers.ProviderDefaults{Temperature: 0.7, MaxTokens: 1500},
false, // includeRawOutput
)
defer provider.Close()
// Execute prediction
ctx := context.Background()
resp, err := provider.Predict(ctx, providers.PredictionRequest{
Messages: []types.Message{
{Role: "user", Content: "Hello!"},
},
Temperature: 0.7,
MaxTokens: 1500,
})
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.Content)
import (
"github.com/AltairaLabs/PromptKit/runtime/mcp"
)
// Register MCP server
mcpRegistry := mcp.NewRegistry()
defer mcpRegistry.Close()
mcpRegistry.RegisterServer(mcp.ServerConfig{
Name: "filesystem",
Command: "npx",
Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "/allowed"},
})
// Discover tools
ctx := context.Background()
serverTools, err := mcpRegistry.ListAllTools(ctx)
if err != nil {
log.Fatal(err)
}
for serverName, tools := range serverTools {
log.Printf("Server %s has %d tools\n", serverName, len(tools))
}
// Execute with streaming
streamChan, err := provider.PredictStream(ctx, providers.PredictionRequest{
Messages: []types.Message{{Role: "user", Content: "Write a story"}},
Temperature: 0.7,
MaxTokens: 1500,
})
if err != nil {
log.Fatal(err)
}
// Process chunks
for chunk := range streamChan {
if chunk.Error != nil {
log.Printf("Error: %v\n", chunk.Error)
break
}
if chunk.Delta != "" {
fmt.Print(chunk.Delta)
}
if chunk.FinishReason != nil {
fmt.Printf("\n\nStream complete: %s\n", *chunk.FinishReason)
}
}
defaults := providers.ProviderDefaults{
Temperature: 0.7,
TopP: 0.95,
MaxTokens: 2000,
Pricing: providers.Pricing{
InputCostPer1K: 0.00015, // $0.15 per 1M tokens
OutputCostPer1K: 0.0006, // $0.60 per 1M tokens
},
}
provider := openai.NewProvider("openai", "gpt-4o-mini", "", defaults, false)
policy := &pipeline.ToolPolicy{
ToolChoice: "auto", // "auto", "required", "none", or specific tool
MaxRounds: 5, // Max tool execution rounds (default: 50)
MaxToolCallsPerTurn: 10, // Max tools per LLM response
MaxParallelToolCalls: 5, // Max concurrent tool executions (default: 10)
MaxCostUSD: 1.00, // Stop after $1 spent (0 = unlimited)
Blocklist: []string{"dangerous_tool"}, // Blocked tools
}
resp, err := provider.Predict(ctx, req)
if err != nil {
switch {
case errors.Is(err, context.DeadlineExceeded):
log.Println("Request timeout")
default:
log.Printf("Provider error: %v", err)
}
}
// MCP tool call errors
response, err := client.CallTool(ctx, "read_file", args)
if err != nil {
log.Printf("Tool execution failed: %v", err)
}
// Always close resources
defer provider.Close()
defer mcpRegistry.Close()
// Use context for cancellation
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
resp, err := provider.Predict(ctx, req)
// Always drain streaming channels
streamChan, err := provider.PredictStream(ctx, req)
if err != nil {
return err
}
for chunk := range streamChan {
// Process chunks
if chunk.Error != nil {
break
}
}
  • Configure MaxConcurrentExecutions based on provider rate limits
  • Use semaphores to prevent overwhelming providers
  • Consider graceful degradation under load
  • Use streaming for interactive applications (chatbots, UIs)
  • Use non-streaming for batch processing, testing, analytics
  • Streaming has ~10% overhead but better UX
  • MCP tools run in separate processes (stdio overhead)
  • Consider tool execution timeouts
  • Use repository executors for fast in-memory tools
  • Pipeline creates fresh ExecutionContext per call (prevents contamination)
  • Large conversation histories can increase memory usage
  • Consider state store cleanup strategies