Skip to content

Runtime API Reference

import "github.com/AltairaLabs/PromptKit/runtime/annotations"

Package annotations provides out-of-band annotations for session recordings. Annotations allow layering evaluations, feedback, and metadata on recorded sessions without modifying the authoritative event record.

Annotation represents an out-of-band annotation on a session or event.

type Annotation struct {
// ID is a unique identifier for this annotation.
ID string `json:"id"`
// Type identifies the kind of annotation.
Type AnnotationType `json:"type"`
// SessionID is the session this annotation belongs to.
SessionID string `json:"session_id"`
// Target specifies what this annotation targets.
Target Target `json:"target"`
// Key is the annotation key (e.g., "quality", "category", "safety").
Key string `json:"key"`
// Value holds the annotation value (type depends on annotation type).
Value AnnotationValue `json:"value"`
// Metadata contains additional structured data.
Metadata map[string]interface{} `json:"metadata,omitempty"`
// CreatedAt is when this annotation was created.
CreatedAt time.Time `json:"created_at"`
// CreatedBy identifies who created this annotation.
CreatedBy string `json:"created_by,omitempty"`
// Version is the annotation version (for corrections/updates).
Version int `json:"version"`
// PreviousID references the previous version if this is an update.
PreviousID string `json:"previous_id,omitempty"`
}

AnnotationType identifies the kind of annotation.

type AnnotationType string

Annotation types.

const (
// TypeScore represents a numeric evaluation score.
TypeScore AnnotationType = "score"
// TypeLabel represents a categorical label.
TypeLabel AnnotationType = "label"
// TypeComment represents a textual comment or note.
TypeComment AnnotationType = "comment"
// TypeFlag represents a binary flag (e.g., safety, policy).
TypeFlag AnnotationType = "flag"
// TypeMetric represents a performance or quality metric.
TypeMetric AnnotationType = "metric"
// TypeAssertion represents an assertion result (pass/fail).
TypeAssertion AnnotationType = "assertion"
// TypeGroundTruth represents ground truth labels for training.
TypeGroundTruth AnnotationType = "ground_truth"
)

AnnotationValue holds the value of an annotation. The actual type depends on the annotation type.

type AnnotationValue struct {
// Score is the numeric value (for TypeScore, TypeMetric).
Score *float64 `json:"score,omitempty"`
// Label is the categorical value (for TypeLabel, TypeGroundTruth).
Label string `json:"label,omitempty"`
// Labels is a list of categorical values (for multi-label scenarios).
Labels []string `json:"labels,omitempty"`
// Text is the textual value (for TypeComment).
Text string `json:"text,omitempty"`
// Flag is the boolean value (for TypeFlag).
Flag *bool `json:"flag,omitempty"`
// Passed indicates assertion result (for TypeAssertion).
Passed *bool `json:"passed,omitempty"`
// Message is an optional message (for TypeAssertion, TypeComment).
Message string `json:"message,omitempty"`
// Unit is the unit of measurement (for TypeMetric).
Unit string `json:"unit,omitempty"`
}

func NewAssertionValue(passed bool, message string) AnnotationValue

NewAssertionValue creates an assertion annotation value.

func NewCommentValue(text string) AnnotationValue

NewCommentValue creates a comment annotation value.

func NewFlagValue(flag bool) AnnotationValue

NewFlagValue creates a flag annotation value.

func NewLabelValue(label string) AnnotationValue

NewLabelValue creates a label annotation value.

func NewLabelsValue(labels ...string) AnnotationValue

NewLabelsValue creates a multi-label annotation value.

func NewMetricValue(value float64, unit string) AnnotationValue

NewMetricValue creates a metric annotation value with optional unit.

func NewScoreValue(score float64) AnnotationValue

NewScoreValue creates a score annotation value.

FileStore implements Store using JSON Lines files. Annotations for each session are stored in a separate file.

type FileStore struct {
// contains filtered or unexported fields
}

func NewFileStore(dir string) (*FileStore, error)

NewFileStore creates a file-based annotation store.

func (s *FileStore) Add(ctx context.Context, ann *Annotation) error

Add creates a new annotation.

func (s *FileStore) Close() error

Close releases resources.

func (s *FileStore) Delete(ctx context.Context, id string) error

Delete marks an annotation as deleted.

func (s *FileStore) Get(ctx context.Context, id string) (*Annotation, error)

Get retrieves an annotation by ID.

func (s *FileStore) Query(ctx context.Context, filter *Filter) ([]*Annotation, error)

Query returns annotations matching the filter.

func (s *FileStore) Update(ctx context.Context, previousID string, ann *Annotation) error

Update creates a new version of an existing annotation.

Filter specifies criteria for querying annotations.

type Filter struct {
// SessionID filters by session.
SessionID string
// Types filters by annotation type.
Types []AnnotationType
// Keys filters by annotation key.
Keys []string
// TargetTypes filters by target type.
TargetTypes []TargetType
// EventID filters by target event ID.
EventID string
// TurnIndex filters by target turn index.
TurnIndex *int
// CreatedBy filters by creator.
CreatedBy string
// Since filters by creation time (inclusive).
Since time.Time
// Until filters by creation time (exclusive).
Until time.Time
// IncludeDeleted includes deleted annotations.
IncludeDeleted bool
// LatestVersionOnly returns only the latest version of each annotation.
LatestVersionOnly bool
// Limit limits the number of results.
Limit int
}

Store persists annotations separately from the event stream.

type Store interface {
// Add creates a new annotation.
Add(ctx context.Context, ann *Annotation) error
// Update creates a new version of an existing annotation.
// The new annotation will reference the previous version.
Update(ctx context.Context, previousID string, ann *Annotation) error
// Get retrieves an annotation by ID.
Get(ctx context.Context, id string) (*Annotation, error)
// Query returns annotations matching the filter.
Query(ctx context.Context, filter *Filter) ([]*Annotation, error)
// Delete removes an annotation by ID.
// Note: This is a soft delete - the annotation is marked as deleted but preserved.
Delete(ctx context.Context, id string) error
// Close releases resources held by the store.
Close() error
}

Target specifies what an annotation targets.

type Target struct {
// Type identifies the target type.
Type TargetType `json:"type"`
// EventID is the target event ID (for TargetEvent).
EventID string `json:"event_id,omitempty"`
// EventSequence is the target event sequence number (alternative to EventID).
EventSequence int64 `json:"event_sequence,omitempty"`
// TurnIndex is the target turn index (for TargetTurn).
TurnIndex int `json:"turn_index,omitempty"`
// MessageIndex is the target message index (for TargetMessage).
MessageIndex int `json:"message_index,omitempty"`
// StartTime is the start of the time range (for TargetTimeRange).
StartTime time.Time `json:"start_time,omitempty"`
// EndTime is the end of the time range (for TargetTimeRange).
EndTime time.Time `json:"end_time,omitempty"`
}

func AtEvent(eventID string) Target

AtEvent creates a target for a specific event.

func AtEventSequence(seq int64) Target

AtEventSequence creates a target for an event by sequence number.

func AtMessage(messageIndex int) Target

AtMessage creates a target for a specific message.

func AtTurn(turnIndex int) Target

AtTurn creates a target for a specific conversation turn.

func ForSession() Target

ForSession creates a target for the entire session.

func InTimeRange(start, end time.Time) Target

InTimeRange creates a target for a time range.

TargetType identifies what the annotation targets.

type TargetType string

Target types.

const (
// TargetSession targets the entire session.
TargetSession TargetType = "session"
// TargetEvent targets a specific event.
TargetEvent TargetType = "event"
// TargetTimeRange targets a time range within the session.
TargetTimeRange TargetType = "time_range"
// TargetTurn targets a specific conversation turn.
TargetTurn TargetType = "turn"
// TargetMessage targets a specific message.
TargetMessage TargetType = "message"
)
import "github.com/AltairaLabs/PromptKit/runtime/audio"

Package audio provides voice activity detection (VAD), turn detection, and audio session management for real-time voice AI applications.

The package follows industry-standard patterns for voice AI:

  • VAD (Voice Activity Detection): Detects when someone is speaking vs. silent
  • Turn Detection: Determines when a speaker has finished their turn
  • Interruption Handling: Manages user interrupting bot output

Audio processing follows a two-stage approach:

  1. VADAnalyzer detects voice activity in real-time
  2. TurnDetector uses VAD output plus additional signals to detect turn boundaries
vad := audio.NewSimpleVAD(audio.DefaultVADParams())
detector := audio.NewSilenceDetector(500 * time.Millisecond)
for chunk := range audioStream {
vad.Analyze(ctx, chunk)
if detector.DetectTurnEnd(ctx, vad) {
// User finished speaking
}
}

Package audio provides audio processing utilities.

Standard audio sample rates for common use cases.

const (
SampleRate24kHz = 24000 // Common TTS output rate
SampleRate16kHz = 16000 // Common STT/ASR input rate
)

Default VAD parameter values.

const (
DefaultVADConfidence = 0.5
DefaultVADStartSecs = 0.2
DefaultVADStopSecs = 0.8
DefaultVADMinVolume = 0.01
DefaultVADSampleRate = 16000
)

func Resample24kTo16k(input []byte) ([]byte, error)

Resample24kTo16k is a convenience function for the common case of resampling from 24kHz (TTS output) to 16kHz (Gemini input).

func ResamplePCM16(input []byte, fromRate, toRate int) ([]byte, error)

ResamplePCM16 resamples PCM16 audio data from one sample rate to another. Uses linear interpolation for reasonable quality resampling. Input and output are little-endian 16-bit signed PCM samples.

AccumulatingTurnDetector is a TurnDetector that accumulates audio during a turn.

type AccumulatingTurnDetector interface {
TurnDetector
// OnTurnComplete registers a callback for when a complete turn is detected.
OnTurnComplete(callback TurnCallback)
// GetAccumulatedAudio returns audio accumulated so far (may be incomplete turn).
GetAccumulatedAudio() []byte
// SetTranscript sets the transcript for the current turn (from external STT).
SetTranscript(transcript string)
}

InterruptionCallback is called when user interrupts the bot.

type InterruptionCallback func()

InterruptionHandler manages user interruption logic during bot output.

type InterruptionHandler struct {
// contains filtered or unexported fields
}

func NewInterruptionHandler(strategy InterruptionStrategy, vad VADAnalyzer) *InterruptionHandler

NewInterruptionHandler creates an InterruptionHandler with the given strategy and VAD.

func (h *InterruptionHandler) IsBotSpeaking() bool

IsBotSpeaking returns true if the bot is currently outputting audio.

func (*InterruptionHandler) NotifySentenceBoundary

Section titled “func (*InterruptionHandler) NotifySentenceBoundary”
func (h *InterruptionHandler) NotifySentenceBoundary()

NotifySentenceBoundary notifies the handler of a sentence boundary. For deferred interruption strategy, this may trigger the pending interruption.

func (h *InterruptionHandler) OnInterrupt(callback InterruptionCallback)

OnInterrupt registers a callback for when interruption occurs.

func (h *InterruptionHandler) ProcessAudio(ctx context.Context, audio []byte) (bool, error)

ProcessAudio processes audio and detects user interruption. Returns true if an interruption was detected and should be acted upon.

func (*InterruptionHandler) ProcessVADState

Section titled “func (*InterruptionHandler) ProcessVADState”
func (h *InterruptionHandler) ProcessVADState(ctx context.Context, state VADState) (bool, error)

ProcessVADState processes a VAD state update for interruption detection. Returns true if an interruption was detected and should be acted upon.

func (h *InterruptionHandler) Reset()

Reset clears interruption state for a new turn.

func (*InterruptionHandler) SetBotSpeaking

Section titled “func (*InterruptionHandler) SetBotSpeaking”
func (h *InterruptionHandler) SetBotSpeaking(speaking bool)

SetBotSpeaking sets whether the bot is currently outputting audio.

func (*InterruptionHandler) WasInterrupted

Section titled “func (*InterruptionHandler) WasInterrupted”
func (h *InterruptionHandler) WasInterrupted() bool

WasInterrupted returns true if an interruption occurred.

InterruptionStrategy determines how to handle user interrupting bot.

type InterruptionStrategy int

const (
// InterruptionIgnore ignores user speech during bot output.
InterruptionIgnore InterruptionStrategy = iota
// InterruptionImmediate immediately stops bot and starts listening.
InterruptionImmediate
// InterruptionDeferred waits for bot's current sentence, then switches.
InterruptionDeferred
)

func (s InterruptionStrategy) String() string

String returns a human-readable representation of the interruption strategy.

SilenceDetector detects turn boundaries based on silence duration. It triggers end-of-turn when silence exceeds a configurable threshold.

type SilenceDetector struct {
// Threshold is the silence duration required to trigger turn end.
Threshold time.Duration
// contains filtered or unexported fields
}

func NewSilenceDetector(threshold time.Duration) *SilenceDetector

NewSilenceDetector creates a SilenceDetector with the given threshold. threshold is the duration of silence required to trigger end-of-turn.

func (*SilenceDetector) GetAccumulatedAudio

Section titled “func (*SilenceDetector) GetAccumulatedAudio”
func (d *SilenceDetector) GetAccumulatedAudio() []byte

GetAccumulatedAudio returns audio accumulated so far.

func (d *SilenceDetector) IsUserSpeaking() bool

IsUserSpeaking returns true if user is currently speaking.

func (d *SilenceDetector) Name() string

Name returns the detector identifier.

func (d *SilenceDetector) OnTurnComplete(callback TurnCallback)

OnTurnComplete registers a callback for when a complete turn is detected.

func (d *SilenceDetector) ProcessAudio(ctx context.Context, audio []byte) (bool, error)

ProcessAudio processes an incoming audio chunk. This implementation delegates to ProcessVADState and expects VAD to be run separately. Returns true if end of turn is detected.

func (d *SilenceDetector) ProcessVADState(ctx context.Context, state VADState) (bool, error)

ProcessVADState processes a VAD state update and detects turn boundaries. Returns true if end of turn is detected.

func (d *SilenceDetector) Reset()

Reset clears state for a new conversation.

func (d *SilenceDetector) SetTranscript(transcript string)

SetTranscript sets the transcript for the current turn.

SimpleVAD is a basic voice activity detector using RMS (Root Mean Square) analysis. It provides a lightweight VAD implementation without requiring external ML models. For more accurate detection, consider using SileroVAD.

type SimpleVAD struct {
// contains filtered or unexported fields
}

func NewSimpleVAD(params VADParams) (*SimpleVAD, error)

NewSimpleVAD creates a SimpleVAD analyzer with the given parameters.

func (v *SimpleVAD) Analyze(ctx context.Context, audio []byte) (float64, error)

Analyze processes audio and returns voice probability based on RMS volume.

func (v *SimpleVAD) Name() string

Name returns the analyzer identifier.

func (v *SimpleVAD) OnStateChange() <-chan VADEvent

OnStateChange returns a channel that receives state transitions.

func (v *SimpleVAD) Reset()

Reset clears accumulated state for a new conversation.

func (v *SimpleVAD) State() VADState

State returns the current VAD state.

TurnCallback is called when a complete user turn is detected. audio contains the accumulated audio for the turn. transcript contains any accumulated transcript (may be empty).

type TurnCallback func(audio []byte, transcript string)

TurnDetector determines when a speaker has finished their turn. This is separate from VAD - VAD detects voice activity, turn detection determines conversation boundaries.

type TurnDetector interface {
// Name returns the detector identifier.
Name() string
// ProcessAudio processes an incoming audio chunk.
// Returns true if end of turn is detected.
ProcessAudio(ctx context.Context, audio []byte) (bool, error)
// ProcessVADState processes a VAD state update.
// Returns true if end of turn is detected based on VAD state.
ProcessVADState(ctx context.Context, state VADState) (bool, error)
// IsUserSpeaking returns true if user is currently speaking.
IsUserSpeaking() bool
// Reset clears state for a new conversation.
Reset()
}

VADAnalyzer analyzes audio for voice activity.

type VADAnalyzer interface {
// Name returns the analyzer identifier.
Name() string
// Analyze processes audio and returns voice probability (0.0-1.0).
// audio should be raw PCM samples at the configured sample rate.
Analyze(ctx context.Context, audio []byte) (float64, error)
// State returns the current VAD state based on accumulated analysis.
State() VADState
// OnStateChange returns a channel that receives state transitions.
// The channel is buffered and may drop events if not consumed.
OnStateChange() <-chan VADEvent
// Reset clears accumulated state for a new conversation.
Reset()
}

VADEvent represents a state transition in VAD.

type VADEvent struct {
State VADState
PrevState VADState
Timestamp time.Time
Duration time.Duration // How long in the previous state
Confidence float64 // Voice confidence at transition
}

VADParams configures voice activity detection behavior.

type VADParams struct {
// Confidence threshold for voice detection (0.0-1.0, default: 0.5).
// Higher values require more confidence before triggering.
Confidence float64
// StartSecs is seconds of speech required to trigger VADStateSpeaking (default: 0.2).
// Prevents false starts from brief noise.
StartSecs float64
// StopSecs is seconds of silence required to trigger VADStateQuiet (default: 0.8).
// Allows natural pauses without ending turn.
StopSecs float64
// MinVolume is the minimum RMS volume threshold (default: 0.01).
// Audio below this is treated as silence.
MinVolume float64
// SampleRate is the audio sample rate in Hz (default: 16000).
SampleRate int
}

func DefaultVADParams() VADParams

DefaultVADParams returns sensible defaults for voice activity detection.

func (p VADParams) Validate() error

Validate checks that VAD parameters are within acceptable ranges.

VADState represents the current voice activity state.

type VADState int

const (
// VADStateQuiet indicates no voice activity detected.
VADStateQuiet VADState = iota
// VADStateStarting indicates voice is starting (within start threshold).
VADStateStarting
// VADStateSpeaking indicates active speech.
VADStateSpeaking
// VADStateStopping indicates voice is stopping (within stop threshold).
VADStateStopping
)

func (s VADState) String() string

String returns a human-readable representation of the VAD state.

ValidationError represents a parameter validation error.

type ValidationError struct {
Field string
Message string
}

func (e *ValidationError) Error() string
import "github.com/AltairaLabs/PromptKit/runtime/credentials"

Package credentials provides credential management for LLM provider authentication. It supports multiple credential types including API keys, AWS SigV4, GCP OAuth, and Azure AD.

BedrockModelMapping maps Claude model names to Bedrock model IDs.

var BedrockModelMapping = map[string]string{
"claude-3-5-sonnet-20241022": "anthropic.claude-3-5-sonnet-20241022-v2:0",
"claude-3-5-sonnet-20240620": "anthropic.claude-3-5-sonnet-20240620-v1:0",
"claude-3-opus-20240229": "anthropic.claude-3-opus-20240229-v1:0",
"claude-3-sonnet-20240229": "anthropic.claude-3-sonnet-20240229-v1:0",
"claude-3-haiku-20240307": "anthropic.claude-3-haiku-20240307-v1:0",
"claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0",
}

DefaultEnvVars maps provider types to their default environment variable names. This maintains backward compatibility with existing configurations.

var DefaultEnvVars = map[string][]string{
"claude": {"ANTHROPIC_API_KEY", "CLAUDE_API_KEY"},
"openai": {"OPENAI_API_KEY", "OPENAI_TOKEN"},
"gemini": {"GEMINI_API_KEY", "GOOGLE_API_KEY"},
"imagen": {"GEMINI_API_KEY", "GOOGLE_API_KEY"},
}

ProviderHeaderConfig maps provider types to their API key header configuration.

var ProviderHeaderConfig = map[string]struct {
HeaderName string
Prefix string
}{
"claude": {HeaderName: "X-API-Key", Prefix: ""},
"openai": {HeaderName: "Authorization", Prefix: "Bearer "},
"gemini": {HeaderName: "", Prefix: ""},
"imagen": {HeaderName: "", Prefix: ""},
}

func BedrockEndpoint(region string) string

BedrockEndpoint returns the Bedrock endpoint URL for a region.

func VertexEndpoint(project, region string) string

VertexEndpoint returns the Vertex AI endpoint URL for a project and region.

APIKeyCredential implements header-based API key authentication. It supports flexible header names for different providers.

type APIKeyCredential struct {
// contains filtered or unexported fields
}

func NewAPIKeyCredential(apiKey string, opts ...APIKeyOption) *APIKeyCredential

NewAPIKeyCredential creates a new API key credential. By default, it uses “Authorization” header with “Bearer ” prefix.

func (c *APIKeyCredential) APIKey() string

APIKey returns the raw API key value. This is useful for providers that need the key for non-HTTP operations.

func (c *APIKeyCredential) Apply(_ context.Context, req *http.Request) error

Apply adds the API key to the request header.

func (c *APIKeyCredential) Type() string

Type returns “api_key”.

APIKeyOption configures an APIKeyCredential.

type APIKeyOption func(*APIKeyCredential)

func WithBearerPrefix() APIKeyOption

WithBearerPrefix adds “Bearer ” prefix to the API key.

func WithHeaderName(name string) APIKeyOption

WithHeaderName sets the header name for the API key.

func WithPrefix(prefix string) APIKeyOption

WithPrefix sets a custom prefix for the API key.

AWSCredential implements AWS SigV4 signing for Bedrock.

type AWSCredential struct {
// contains filtered or unexported fields
}

func NewAWSCredential(ctx context.Context, region string) (*AWSCredential, error)

NewAWSCredential creates a new AWS credential using the default credential chain. This supports IRSA (IAM Roles for Service Accounts), instance profiles, and environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY).

func NewAWSCredentialWithRole(ctx context.Context, region, roleARN string) (*AWSCredential, error)

NewAWSCredentialWithRole creates an AWS credential that assumes a role.

func (c *AWSCredential) Apply(ctx context.Context, req *http.Request) error

Apply signs the request using AWS SigV4.

func (c *AWSCredential) Config() aws.Config

Config returns the AWS config for advanced use cases.

func (c *AWSCredential) Region() string

Region returns the configured AWS region.

func (c *AWSCredential) Type() string

Type returns “aws”.

AzureCredential implements Azure AD token-based authentication for Azure AI services.

type AzureCredential struct {
// contains filtered or unexported fields
}

func NewAzureCredential(ctx context.Context, endpoint string) (*AzureCredential, error)

NewAzureCredential creates a new Azure credential using the default credential chain. This supports Managed Identity, Azure CLI, environment variables, and more.

func NewAzureCredentialWithClientSecret(ctx context.Context, endpoint, tenantID, clientID, clientSecret string) (*AzureCredential, error)

NewAzureCredentialWithClientSecret creates an Azure credential using client secret.

func NewAzureCredentialWithManagedIdentity

Section titled “func NewAzureCredentialWithManagedIdentity”
func NewAzureCredentialWithManagedIdentity(ctx context.Context, endpoint string, clientID *string) (*AzureCredential, error)

NewAzureCredentialWithManagedIdentity creates an Azure credential using Managed Identity.

func (c *AzureCredential) Apply(ctx context.Context, req *http.Request) error

Apply adds the Azure AD token to the request.

func (c *AzureCredential) Endpoint() string

Endpoint returns the configured Azure endpoint.

func (c *AzureCredential) Type() string

Type returns “azure”.

Credential applies authentication to HTTP requests. Implementations handle different authentication schemes like API keys, AWS SigV4 signing, OAuth tokens, etc.

type Credential interface {
// Apply adds authentication to the HTTP request.
// It may modify headers, query parameters, or the request body.
Apply(ctx context.Context, req *http.Request) error
// Type returns the credential type identifier (e.g., "api_key", "aws", "gcp", "azure").
Type() string
}

func MustResolve(ctx context.Context, cfg ResolverConfig) Credential

MustResolve resolves credentials and panics on error. Use this only in initialization code where errors are unrecoverable.

func Resolve(ctx context.Context, cfg ResolverConfig) (Credential, error)

Resolve resolves credentials according to the chain: 1. api_key (explicit value) 2. credential_file (read from file) 3. credential_env (read from environment variable) 4. default env vars for provider type

For platform configurations (bedrock, vertex, azure), it returns the appropriate cloud credential type that uses the respective SDK’s default credential chain.

GCPCredential implements OAuth2 token-based authentication for Vertex AI.

type GCPCredential struct {
// contains filtered or unexported fields
}

func NewGCPCredential(ctx context.Context, project, region string) (*GCPCredential, error)

NewGCPCredential creates a new GCP credential using Application Default Credentials. This supports Workload Identity, service account keys, and gcloud auth.

func NewGCPCredentialWithServiceAccount(ctx context.Context, project, region, keyFile string) (*GCPCredential, error)

NewGCPCredentialWithServiceAccount creates a GCP credential from a service account key file.

func (c *GCPCredential) Apply(ctx context.Context, req *http.Request) error

Apply adds the OAuth2 token to the request.

func (c *GCPCredential) Project() string

Project returns the configured GCP project ID.

func (c *GCPCredential) Region() string

Region returns the configured GCP region.

func (c *GCPCredential) Type() string

Type returns “gcp”.

NoOpCredential is a credential that does nothing. Used for providers that don’t require authentication or handle it internally.

type NoOpCredential struct{}

func (c *NoOpCredential) Apply(_ context.Context, _ *http.Request) error

Apply does nothing.

func (c *NoOpCredential) Type() string

Type returns “none”.

ResolverConfig holds configuration for credential resolution.

type ResolverConfig struct {
// ProviderType is the provider type (claude, openai, gemini, etc.)
ProviderType string
// CredentialConfig is the explicit credential configuration from the provider.
CredentialConfig *config.CredentialConfig
// PlatformConfig is the platform configuration (bedrock, vertex, azure).
PlatformConfig *config.PlatformConfig
// ConfigDir is the base directory for resolving relative credential file paths.
ConfigDir string
}
import "github.com/AltairaLabs/PromptKit/runtime/events"

Package events provides event storage and replay for session recording.

Package events provides event storage for session recording and replay.

Package events provides a lightweight pub/sub event bus for runtime observability.

Package events provides event storage and replay for session recording.

Package events provides event storage and replay for session recording.

Package events provides event storage and replay for session recording.

Package events provides event storage for session recording and replay.

Package events provides event storage and replay for session recording.

func ExportSession(ctx context.Context, session *AnnotatedSession, outputPath string, format ExportFormat) error

ExportSession is a convenience function to export a session.

AnnotatedSession represents a complete session with events, media, and annotations. It provides a unified interface for loading and accessing all session data.

type AnnotatedSession struct {
// SessionID is the unique session identifier.
SessionID string
// Events are all events in the session, sorted by timestamp.
Events []*Event
// Annotations are all annotations for this session.
Annotations []*annotations.Annotation
// Timeline is the assembled media timeline.
Timeline *MediaTimeline
// Metadata contains session-level metadata.
Metadata SessionMetadata
}

func (*AnnotatedSession) BuildTimelineView

Section titled “func (*AnnotatedSession) BuildTimelineView”
func (s *AnnotatedSession) BuildTimelineView() *TimelineView

BuildTimelineView creates a unified timeline view of all session content.

func (*AnnotatedSession) GetAnnotationsByType

Section titled “func (*AnnotatedSession) GetAnnotationsByType”
func (s *AnnotatedSession) GetAnnotationsByType(annotType annotations.AnnotationType) []*annotations.Annotation

GetAnnotationsByType returns all annotations of the specified type.

func (*AnnotatedSession) GetAnnotationsForEvent

Section titled “func (*AnnotatedSession) GetAnnotationsForEvent”
func (s *AnnotatedSession) GetAnnotationsForEvent(eventIndex int) []*annotations.Annotation

GetAnnotationsForEvent returns annotations targeting the specified event.

func (*AnnotatedSession) GetAnnotationsInTimeRange

Section titled “func (*AnnotatedSession) GetAnnotationsInTimeRange”
func (s *AnnotatedSession) GetAnnotationsInTimeRange(start, end time.Duration) []*annotations.Annotation

GetAnnotationsInTimeRange returns annotations active during the specified time range.

func (*AnnotatedSession) GetConversationMessages

Section titled “func (*AnnotatedSession) GetConversationMessages”
func (s *AnnotatedSession) GetConversationMessages() []*Event

GetConversationMessages returns all message events in order.

func (s *AnnotatedSession) GetEventsByType(eventType EventType) []*Event

GetEventsByType returns all events of the specified type.

func (*AnnotatedSession) GetTranscriptions

Section titled “func (*AnnotatedSession) GetTranscriptions”
func (s *AnnotatedSession) GetTranscriptions() []*Event

GetTranscriptions returns all transcription events.

func (s *AnnotatedSession) NewSyncPlayer(config *SyncPlayerConfig) *SyncPlayer

NewSyncPlayer creates a synchronized player for this session.

func (s *AnnotatedSession) Summary() string

Summary returns a human-readable summary of the session.

AnnotatedSessionLoader loads annotated sessions from storage.

type AnnotatedSessionLoader struct {
// contains filtered or unexported fields
}

func NewAnnotatedSessionLoader(eventStore EventStore, blobStore BlobStore, annotStore annotations.Store) *AnnotatedSessionLoader

NewAnnotatedSessionLoader creates a new session loader.

func (l *AnnotatedSessionLoader) Load(ctx context.Context, sessionID string) (*AnnotatedSession, error)

Load loads a complete annotated session.

func (*AnnotatedSessionLoader) WithMetadata

Section titled “func (*AnnotatedSessionLoader) WithMetadata”
func (l *AnnotatedSessionLoader) WithMetadata(compute bool) *AnnotatedSessionLoader

WithMetadata enables or disables metadata computation.

AnnotationHandler is called when an annotation becomes active.

type AnnotationHandler func(annotation *annotations.Annotation, position time.Duration)

AudioHandler is called with audio data during synchronized playback. Returns false to stop playback.

type AudioHandler func(data []byte, track TrackType, position time.Duration) bool

AudioInputData contains data for audio input events.

type AudioInputData struct {
// Actor identifies the source of the audio (e.g., "user", "environment").
Actor string `json:"actor"`
// Payload contains the audio data or reference.
Payload BinaryPayload `json:"payload"`
// Metadata contains audio format information.
Metadata AudioMetadata `json:"metadata"`
// TurnID links this audio to a specific conversation turn.
TurnID string `json:"turn_id,omitempty"`
// ChunkIndex is the sequence number for streaming audio (0-based).
ChunkIndex int `json:"chunk_index"`
// IsFinal indicates this is the last chunk in the stream.
IsFinal bool `json:"is_final"`
// contains filtered or unexported fields
}

AudioMetadata contains format information for audio data.

type AudioMetadata struct {
// SampleRate is the audio sample rate in Hz (e.g., 16000, 24000, 44100).
SampleRate int `json:"sample_rate"`
// Channels is the number of audio channels (1=mono, 2=stereo).
Channels int `json:"channels"`
// Encoding is the audio encoding format (e.g., "pcm", "pcm_linear16", "opus", "mp3").
Encoding string `json:"encoding"`
// BitsPerSample is the bit depth for PCM audio (e.g., 16, 24, 32).
BitsPerSample int `json:"bits_per_sample,omitempty"`
// DurationMs is the duration of the audio in milliseconds.
DurationMs int64 `json:"duration_ms"`
}

AudioOutputData contains data for audio output events.

type AudioOutputData struct {
// Payload contains the audio data or reference.
Payload BinaryPayload `json:"payload"`
// Metadata contains audio format information.
Metadata AudioMetadata `json:"metadata"`
// TurnID links this audio to a specific conversation turn.
TurnID string `json:"turn_id,omitempty"`
// ChunkIndex is the sequence number for streaming audio (0-based).
ChunkIndex int `json:"chunk_index"`
// IsFinal indicates this is the last chunk in the stream.
IsFinal bool `json:"is_final"`
// GeneratedFrom indicates what generated this audio (e.g., "tts", "model").
GeneratedFrom string `json:"generated_from,omitempty"`
// contains filtered or unexported fields
}

AudioTranscriptionData contains data for transcription events.

type AudioTranscriptionData struct {
// Text is the transcribed text.
Text string `json:"text"`
// Language is the detected or specified language code (e.g., "en-US").
Language string `json:"language,omitempty"`
// Confidence is the confidence score (0.0 to 1.0) if available.
Confidence float64 `json:"confidence,omitempty"`
// TurnID links this transcription to a specific conversation turn.
TurnID string `json:"turn_id,omitempty"`
// AudioEventID references the audio event this transcription is derived from.
AudioEventID string `json:"audio_event_id,omitempty"`
// IsFinal indicates this is the final transcription (vs. interim results).
IsFinal bool `json:"is_final"`
// Provider is the STT provider used (e.g., "whisper", "google", "deepgram").
Provider string `json:"provider,omitempty"`
// contains filtered or unexported fields
}

BinaryPayload represents a reference to binary data stored externally. This allows events to reference large payloads (audio, video, images) without embedding them directly in the event stream.

type BinaryPayload struct {
// StorageRef is a URI or path to the stored binary data.
// Examples: "file://recordings/audio/chunk-001.pcm", "s3://bucket/key"
StorageRef string `json:"storage_ref"`
// MIMEType is the MIME type of the binary data.
MIMEType string `json:"mime_type"`
// Size is the size of the binary data in bytes.
Size int64 `json:"size"`
// Checksum is an optional integrity checksum (e.g., SHA256).
Checksum string `json:"checksum,omitempty"`
// InlineData contains the raw bytes if small enough to embed directly.
// If set, StorageRef may be empty.
InlineData []byte `json:"inline_data,omitempty"`
}

BlobStore provides storage for binary payloads referenced by events. This separates large binary data (audio, video, images) from the event stream.

type BlobStore interface {
// Store saves binary data and returns a storage reference.
// The reference can be used to retrieve the data later.
Store(ctx context.Context, sessionID string, data []byte, mimeType string) (*BinaryPayload, error)
// StoreReader saves binary data from a reader and returns a storage reference.
// This is more efficient for large payloads.
StoreReader(ctx context.Context, sessionID string, r io.Reader, mimeType string) (*BinaryPayload, error)
// Load retrieves binary data by storage reference.
Load(ctx context.Context, ref string) ([]byte, error)
// LoadReader returns a reader for binary data by storage reference.
// The caller is responsible for closing the reader.
LoadReader(ctx context.Context, ref string) (io.ReadCloser, error)
// Delete removes binary data by storage reference.
Delete(ctx context.Context, ref string) error
// Close releases any resources held by the store.
Close() error
}

ContextBuiltData contains data for context building events.

type ContextBuiltData struct {
MessageCount int
TokenCount int
TokenBudget int
Truncated bool
// contains filtered or unexported fields
}

ConversationStartedData contains data for conversation start events.

type ConversationStartedData struct {
SystemPrompt string // The assembled system prompt for this conversation
// contains filtered or unexported fields
}

CustomEventData allows middleware to emit arbitrary structured events.

type CustomEventData struct {
MiddlewareName string
EventName string
Data map[string]interface{}
Message string
// contains filtered or unexported fields
}

Emitter provides helpers for publishing runtime events with shared metadata.

type Emitter struct {
// contains filtered or unexported fields
}

func NewEmitter(bus *EventBus, runID, sessionID, conversationID string) *Emitter

NewEmitter creates a new event emitter.

func (e *Emitter) AudioInput(data *AudioInputData)

AudioInput emits the audio.input event for recording user/environment audio.

func (e *Emitter) AudioOutput(data *AudioOutputData)

AudioOutput emits the audio.output event for recording assistant/model audio.

func (e *Emitter) ContextBuilt(messageCount, tokenCount, tokenBudget int, truncated bool)

ContextBuilt emits the context.built event.

func (e *Emitter) ConversationStarted(systemPrompt string)

ConversationStarted emits the conversation.started event with the system prompt.

func (e *Emitter) EmitCustom(eventType EventType, middlewareName, eventName string, data map[string]interface{}, message string)

EmitCustom allows middleware to emit arbitrary event types with structured payloads.

func (e *Emitter) MessageCreated(role, content string, index int, toolCalls []MessageToolCall, toolResult *MessageToolResult)

MessageCreated emits the message.created event.

func (e *Emitter) MessageUpdated(index int, latencyMs int64, inputTokens, outputTokens int, totalCost float64)

MessageUpdated emits the message.updated event.

func (e *Emitter) MiddlewareCompleted(name string, index int, duration time.Duration)

MiddlewareCompleted emits the middleware.completed event.

func (e *Emitter) MiddlewareFailed(name string, index int, err error, duration time.Duration)

MiddlewareFailed emits the middleware.failed event.

func (e *Emitter) MiddlewareStarted(name string, index int)

MiddlewareStarted emits the middleware.started event.

func (e *Emitter) PipelineCompleted(duration time.Duration, totalCost float64, inputTokens, outputTokens, messageCount int)

PipelineCompleted emits the pipeline.completed event.

func (e *Emitter) PipelineFailed(err error, duration time.Duration)

PipelineFailed emits the pipeline.failed event.

func (e *Emitter) PipelineStarted(middlewareCount int)

PipelineStarted emits the pipeline.started event.

func (e *Emitter) ProviderCallCompleted(data *ProviderCallCompletedData)

ProviderCallCompleted emits the provider.call.completed event.

func (e *Emitter) ProviderCallFailed(provider, model string, err error, duration time.Duration)

ProviderCallFailed emits the provider.call.failed event.

func (e *Emitter) ProviderCallStarted(provider, model string, messageCount, toolCount int)

ProviderCallStarted emits the provider.call.started event.

func (e *Emitter) StageCompleted(name string, index int, duration time.Duration)

StageCompleted emits the stage.completed event (for streaming architecture).

func (e *Emitter) StageFailed(name string, index int, err error, duration time.Duration)

StageFailed emits the stage.failed event (for streaming architecture).

func (e *Emitter) StageStarted(name string, index int, stageType interface{})

StageStarted emits the stage.started event (for streaming architecture).

func (e *Emitter) StateLoaded(conversationID string, messageCount int)

StateLoaded emits the state.loaded event.

func (e *Emitter) StateSaved(conversationID string, messageCount int)

StateSaved emits the state.saved event.

func (e *Emitter) StreamInterrupted(reason string)

StreamInterrupted emits the stream.interrupted event.

func (e *Emitter) TokenBudgetExceeded(required, budget, excess int)

TokenBudgetExceeded emits the context.token_budget_exceeded event.

func (e *Emitter) ToolCallCompleted(toolName, callID string, duration time.Duration, status string)

ToolCallCompleted emits the tool.call.completed event.

func (e *Emitter) ToolCallFailed(toolName, callID string, err error, duration time.Duration)

ToolCallFailed emits the tool.call.failed event.

func (e *Emitter) ToolCallStarted(toolName, callID string, args map[string]interface{})

ToolCallStarted emits the tool.call.started event.

func (e *Emitter) ValidationFailed(validatorName, validatorType string, err error, duration time.Duration, violations []string)

ValidationFailed emits the validation.failed event.

func (e *Emitter) ValidationPassed(validatorName, validatorType string, duration time.Duration)

ValidationPassed emits the validation.passed event.

func (e *Emitter) ValidationStarted(validatorName, validatorType string)

ValidationStarted emits the validation.started event.

Event represents a runtime event delivered to listeners.

type Event struct {
Type EventType
Timestamp time.Time
RunID string
SessionID string
ConversationID string
Data EventData
}

EventBus manages event distribution to listeners.

type EventBus struct {
// contains filtered or unexported fields
}

func NewEventBus() *EventBus

NewEventBus creates a new event bus.

func (eb *EventBus) Clear()

Clear removes all listeners (primarily for tests).

func (eb *EventBus) Publish(event *Event)

Publish sends an event to all registered listeners asynchronously. If a store is configured, the event is persisted before dispatch.

func (eb *EventBus) Store() EventStore

Store returns the configured event store, or nil if none.

func (eb *EventBus) Subscribe(eventType EventType, listener Listener)

Subscribe registers a listener for a specific event type.

func (eb *EventBus) SubscribeAll(listener Listener)

SubscribeAll registers a listener for all event types.

func (eb *EventBus) WithStore(store EventStore) *EventBus

WithStore returns a new event bus that persists events to the given store.

EventData is a marker interface for event payloads.

type EventData interface {
// contains filtered or unexported methods
}

EventFilter specifies criteria for querying events.

type EventFilter struct {
SessionID string
ConversationID string
RunID string
Types []EventType
Since time.Time
Until time.Time
Limit int
}

EventStore persists events for later replay and analysis.

type EventStore interface {
// Append adds an event to the store.
Append(ctx context.Context, event *Event) error
// Query returns events matching the filter.
Query(ctx context.Context, filter *EventFilter) ([]*Event, error)
// QueryRaw returns stored events with raw data preserved.
// This is useful for export/import where data serialization must be preserved.
QueryRaw(ctx context.Context, filter *EventFilter) ([]*StoredEvent, error)
// Stream returns a channel of events for a session.
// The channel is closed when all events have been sent or context is canceled.
Stream(ctx context.Context, sessionID string) (<-chan *Event, error)
// Close releases any resources held by the store.
Close() error
}

EventStoreWithBlobs combines an EventStore with a BlobStore for multimodal recording.

type EventStoreWithBlobs struct {
EventStore
BlobStore
}

func NewEventStoreWithBlobs(dir string) (*EventStoreWithBlobs, error)

NewEventStoreWithBlobs creates a combined event and blob store.

func (s *EventStoreWithBlobs) Close() error

Close releases resources from both stores.

EventType identifies the type of event emitted by the runtime.

type EventType string

const (
// EventPipelineStarted marks pipeline start.
EventPipelineStarted EventType = "pipeline.started"
// EventPipelineCompleted marks pipeline completion.
EventPipelineCompleted EventType = "pipeline.completed"
// EventPipelineFailed marks pipeline failure.
EventPipelineFailed EventType = "pipeline.failed"
// EventMiddlewareStarted marks middleware start.
EventMiddlewareStarted EventType = "middleware.started"
// EventMiddlewareCompleted marks middleware completion.
EventMiddlewareCompleted EventType = "middleware.completed"
// EventMiddlewareFailed marks middleware failure.
EventMiddlewareFailed EventType = "middleware.failed"
// EventStageStarted marks stage start (for new streaming architecture).
EventStageStarted EventType = "stage.started"
// EventStageCompleted marks stage completion (for new streaming architecture).
EventStageCompleted EventType = "stage.completed"
// EventStageFailed marks stage failure (for new streaming architecture).
EventStageFailed EventType = "stage.failed"
// EventProviderCallStarted marks provider call start.
EventProviderCallStarted EventType = "provider.call.started"
// EventProviderCallCompleted marks provider call completion.
EventProviderCallCompleted EventType = "provider.call.completed"
// EventProviderCallFailed marks provider call failure.
EventProviderCallFailed EventType = "provider.call.failed"
// EventToolCallStarted marks tool call start.
EventToolCallStarted EventType = "tool.call.started"
// EventToolCallCompleted marks tool call completion.
EventToolCallCompleted EventType = "tool.call.completed"
// EventToolCallFailed marks tool call failure.
EventToolCallFailed EventType = "tool.call.failed"
// EventValidationStarted marks validation start.
EventValidationStarted EventType = "validation.started"
// EventValidationPassed marks validation success.
EventValidationPassed EventType = "validation.passed"
// EventValidationFailed marks validation failure.
EventValidationFailed EventType = "validation.failed"
// EventContextBuilt marks context creation.
EventContextBuilt EventType = "context.built"
// EventTokenBudgetExceeded marks token budget overflow.
EventTokenBudgetExceeded EventType = "context.token_budget_exceeded"
// EventStateLoaded marks state load.
EventStateLoaded EventType = "state.loaded"
// EventStateSaved marks state save.
EventStateSaved EventType = "state.saved"
// EventStreamInterrupted marks a stream interruption.
EventStreamInterrupted EventType = "stream.interrupted"
// EventMessageCreated marks message creation.
EventMessageCreated EventType = "message.created"
// EventMessageUpdated marks message update (e.g., cost/latency after completion).
EventMessageUpdated EventType = "message.updated"
// EventConversationStarted marks the start of a new conversation.
EventConversationStarted EventType = "conversation.started"
// EventAudioInput marks audio input from user/environment (multimodal recording).
EventAudioInput EventType = "audio.input"
// EventAudioOutput marks audio output from agent (multimodal recording).
EventAudioOutput EventType = "audio.output"
// EventAudioTranscription marks speech-to-text transcription result.
EventAudioTranscription EventType = "audio.transcription"
// EventVideoFrame marks a video frame capture (multimodal recording).
EventVideoFrame EventType = "video.frame"
// EventScreenshot marks a screenshot capture.
EventScreenshot EventType = "screenshot"
// EventImageInput marks image input from user/environment (multimodal recording).
EventImageInput EventType = "image.input"
// EventImageOutput marks image output from agent (multimodal recording).
EventImageOutput EventType = "image.output"
)

ExportConfig configures session export behavior.

type ExportConfig struct {
// Format is the output format.
Format ExportFormat
// OutputPath is the path to write the output file.
OutputPath string
// IncludeAnnotations when true, overlays annotations on video output.
IncludeAnnotations bool
// IncludeEvents when true, overlays events on video output.
IncludeEvents bool
// IncludeTranscriptions when true, overlays transcriptions on video output.
IncludeTranscriptions bool
// VideoWidth is the output video width (default: 1280).
VideoWidth int
// VideoHeight is the output video height (default: 720).
VideoHeight int
// FontSize is the font size for overlays (default: 24).
FontSize int
// AudioMix specifies how to mix audio tracks.
// "stereo" = input on left, output on right
// "mono" = mix both to mono
// "output" = output audio only
// "input" = input audio only
AudioMix string
// FFmpegPath is the path to ffmpeg binary (default: "ffmpeg").
FFmpegPath string
// StartTime is the start position for export (default: 0).
StartTime time.Duration
// EndTime is the end position for export (default: full duration).
EndTime time.Duration
// OnProgress is called with progress updates (0.0 to 1.0).
OnProgress func(progress float64)
}

func DefaultExportConfig(outputPath string) *ExportConfig

DefaultExportConfig returns sensible defaults for export.

ExportFormat specifies the output format for session export.

type ExportFormat string

const (
// ExportFormatMP4 exports as MP4 video with H.264.
ExportFormatMP4 ExportFormat = "mp4"
// ExportFormatWebM exports as WebM video with VP9.
ExportFormatWebM ExportFormat = "webm"
// ExportFormatWAV exports audio only as WAV.
ExportFormatWAV ExportFormat = "wav"
// ExportFormatMP3 exports audio only as MP3.
ExportFormatMP3 ExportFormat = "mp3"
// ExportFormatJSON exports as JSON timeline.
ExportFormatJSON ExportFormat = "json"
)

FileBlobStore implements BlobStore using the local filesystem. Blobs are stored in a directory structure: baseDir/sessionID/hash.ext

type FileBlobStore struct {
// contains filtered or unexported fields
}

func NewFileBlobStore(dir string) (*FileBlobStore, error)

NewFileBlobStore creates a file-based blob store.

func (s *FileBlobStore) Close() error

Close releases any resources.

func (s *FileBlobStore) Delete(ctx context.Context, ref string) error

Delete removes binary data by storage reference.

func (s *FileBlobStore) Load(ctx context.Context, ref string) ([]byte, error)

Load retrieves binary data by storage reference.

func (s *FileBlobStore) LoadReader(ctx context.Context, ref string) (io.ReadCloser, error)

LoadReader returns a reader for binary data by storage reference.

func (s *FileBlobStore) Store(ctx context.Context, sessionID string, data []byte, mimeType string) (*BinaryPayload, error)

Store saves binary data and returns a storage reference.

func (s *FileBlobStore) StoreReader(ctx context.Context, sessionID string, r io.Reader, mimeType string) (*BinaryPayload, error)

StoreReader saves binary data from a reader and returns a storage reference.

FileEventStore implements EventStore using JSON Lines files. Each session is stored in a separate file for efficient streaming.

type FileEventStore struct {
// contains filtered or unexported fields
}

func NewFileEventStore(dir string) (*FileEventStore, error)

NewFileEventStore creates a file-based event store. Events are stored as JSON Lines in the specified directory.

func (s *FileEventStore) Append(ctx context.Context, event *Event) error

Append adds an event to the store.

func (s *FileEventStore) Close() error

Close releases all resources.

func (s *FileEventStore) Query(ctx context.Context, filter *EventFilter) ([]*Event, error)

Query returns events matching the filter.

func (s *FileEventStore) QueryRaw(ctx context.Context, filter *EventFilter) ([]*StoredEvent, error)

QueryRaw returns stored events with raw data preserved.

func (s *FileEventStore) Stream(ctx context.Context, sessionID string) (<-chan *Event, error)

Stream returns a channel of events for a session.

func (s *FileEventStore) Sync() error

Sync flushes all pending writes to disk.

ImageInputData contains data for image input events.

type ImageInputData struct {
// Actor identifies the source of the image (e.g., "user", "environment").
Actor string `json:"actor"`
// Payload contains the image data or reference.
Payload BinaryPayload `json:"payload"`
// Metadata contains image format information.
Metadata VideoMetadata `json:"metadata"` // Reuse VideoMetadata for dimensions
// Description is an optional description of the image content.
Description string `json:"description,omitempty"`
// contains filtered or unexported fields
}

ImageOutputData contains data for image output events.

type ImageOutputData struct {
// Payload contains the image data or reference.
Payload BinaryPayload `json:"payload"`
// Metadata contains image format information.
Metadata VideoMetadata `json:"metadata"` // Reuse VideoMetadata for dimensions
// GeneratedFrom indicates what generated this image (e.g., "dalle", "stable-diffusion").
GeneratedFrom string `json:"generated_from,omitempty"`
// Prompt is the prompt used to generate the image (if applicable).
Prompt string `json:"prompt,omitempty"`
// contains filtered or unexported fields
}

JSONSegment is a media segment in the JSON timeline.

type JSONSegment struct {
StartTime float64 `json:"start_time_seconds"`
Duration float64 `json:"duration_seconds"`
StorageRef string `json:"storage_ref"`
Size int64 `json:"size_bytes"`
}

JSONTimeline is the JSON export format.

type JSONTimeline struct {
SessionID string `json:"session_id"`
Duration float64 `json:"duration_seconds"`
Metadata SessionMetadata `json:"metadata"`
Events []JSONTimelineItem `json:"events"`
Tracks []JSONTrack `json:"tracks"`
}

JSONTimelineItem is a single item in the JSON timeline.

type JSONTimelineItem struct {
Time float64 `json:"time_seconds"`
Duration float64 `json:"duration_seconds,omitempty"`
Type string `json:"type"`
Data map[string]interface{} `json:"data"`
}

JSONTrack is a media track in the JSON timeline.

type JSONTrack struct {
Type string `json:"type"`
Duration float64 `json:"duration_seconds"`
Segments []JSONSegment `json:"segments"`
}

Listener is a function that handles events.

type Listener func(*Event)

MediaSegment represents a continuous segment of media data.

type MediaSegment struct {
// StartTime is when this segment starts relative to session start.
StartTime time.Duration
// Duration is how long this segment lasts.
Duration time.Duration
// Payload contains the media data or reference.
Payload *BinaryPayload
// Metadata contains format information.
Metadata interface{} // AudioMetadata or VideoMetadata
// EventIndex is the index of the source event.
EventIndex int
// ChunkIndex is the original chunk sequence number.
ChunkIndex int
}

MediaTimeline represents a complete media timeline for a session. It organizes audio/video data from events into seekable tracks.

type MediaTimeline struct {
// SessionID is the session this timeline belongs to.
SessionID string
// SessionStart is when the session started.
SessionStart time.Time
// SessionEnd is when the session ended.
SessionEnd time.Time
// Tracks contains all media tracks indexed by type.
Tracks map[TrackType]*MediaTrack
// Events are all the source events.
Events []*Event
// contains filtered or unexported fields
}

func LoadMediaTimeline(ctx context.Context, store EventStore, blobStore BlobStore, sessionID string) (*MediaTimeline, error)

LoadMediaTimeline loads a complete media timeline from storage.

func NewMediaTimeline(sessionID string, events []*Event, blobStore BlobStore) *MediaTimeline

NewMediaTimeline creates a new media timeline from session events.

func (mt *MediaTimeline) Duration() time.Duration

Duration returns the total session duration.

func (mt *MediaTimeline) ExportAudioToWAV(trackType TrackType, path string) error

ExportAudioToWAV is a convenience method to export a specific audio track to WAV.

func (mt *MediaTimeline) GetTrack(trackType TrackType) *MediaTrack

GetTrack returns the track of the specified type, or nil if not present.

func (mt *MediaTimeline) HasTrack(trackType TrackType) bool

HasTrack returns true if the timeline has the specified track type.

func (mt *MediaTimeline) NewMixedAudioReader() (*MixedAudioReader, error)

NewMixedAudioReader creates a reader that mixes both audio tracks.

func (mt *MediaTimeline) NewTrackReader(trackType TrackType) (*TrackReader, error)

NewTrackReader creates a reader for the specified track.

MediaTrack represents a single track of media (e.g., audio input, audio output).

type MediaTrack struct {
// Type identifies the track type.
Type TrackType
// Segments are the ordered media segments in this track.
Segments []*MediaSegment
// TotalDuration is the total duration of all segments.
TotalDuration time.Duration
// Format contains track-level format information.
Format interface{} // AudioMetadata or VideoMetadata
}

func (t *MediaTrack) ExportToWAV(path string, blobStore BlobStore) error

ExportToWAV exports the audio track to a WAV file. The blobStore is used to load segment data that references external storage.

func (t *MediaTrack) OffsetInSegment(offset time.Duration) (*MediaSegment, time.Duration)

OffsetInSegment returns the segment containing the given offset and the position within it. Returns nil if the offset is beyond the track duration.

MessageCreatedData contains data for message creation events.

type MessageCreatedData struct {
Role string
Content string
Index int // Position in conversation history
Parts []types.ContentPart // Multimodal content parts (text, images, audio, video)
ToolCalls []MessageToolCall // Tool calls requested by assistant (if any)
ToolResult *MessageToolResult // Tool result for tool messages (if any)
// contains filtered or unexported fields
}

MessageToolCall represents a tool call in a message event (mirrors runtime/types.MessageToolCall).

type MessageToolCall struct {
ID string `json:"id"` // Unique identifier for this tool call
Name string `json:"name"` // Name of the tool to invoke
Args string `json:"args"` // JSON-encoded tool arguments as string
}

MessageToolResult represents a tool result in a message event (mirrors runtime/types.MessageToolResult).

type MessageToolResult struct {
ID string `json:"id"` // References the MessageToolCall.ID
Name string `json:"name"` // Tool name that was executed
Content string `json:"content"` // Result content
Error string `json:"error,omitempty"` // Error message if tool failed
LatencyMs int64 `json:"latency_ms,omitempty"` // Tool execution latency
}

MessageUpdatedData contains data for message update events.

type MessageUpdatedData struct {
Index int // Position in conversation history
LatencyMs int64
InputTokens int
OutputTokens int
TotalCost float64
// contains filtered or unexported fields
}

MiddlewareCompletedData contains data for middleware completion events.

type MiddlewareCompletedData struct {
Name string
Index int
Duration time.Duration
// contains filtered or unexported fields
}

MiddlewareFailedData contains data for middleware failure events.

type MiddlewareFailedData struct {
Name string
Index int
Error error
Duration time.Duration
// contains filtered or unexported fields
}

MiddlewareStartedData contains data for middleware start events.

type MiddlewareStartedData struct {
Name string
Index int
// contains filtered or unexported fields
}

MixedAudioReader provides a reader that mixes input and output audio tracks.

type MixedAudioReader struct {
// contains filtered or unexported fields
}

func (r *MixedAudioReader) Channels() int

Channels returns the number of audio channels.

func (r *MixedAudioReader) Close() error

Close releases resources.

func (r *MixedAudioReader) Position() time.Duration

Position returns the current playback position.

func (r *MixedAudioReader) SampleRate() int

SampleRate returns the audio sample rate.

func (r *MixedAudioReader) Seek(offset time.Duration) error

Seek moves both readers to the specified position.

PipelineCompletedData contains data for pipeline completion events.

type PipelineCompletedData struct {
Duration time.Duration
TotalCost float64
InputTokens int
OutputTokens int
MessageCount int
// contains filtered or unexported fields
}

PipelineFailedData contains data for pipeline failure events.

type PipelineFailedData struct {
Error error
Duration time.Duration
// contains filtered or unexported fields
}

PipelineStartedData contains data for pipeline start events.

type PipelineStartedData struct {
MiddlewareCount int
// contains filtered or unexported fields
}

PlayerCallback is called for each event during replay. Return false to stop playback.

type PlayerCallback func(event *Event, position time.Duration) bool

PlayerConfig configures session replay behavior.

type PlayerConfig struct {
// Speed is the playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed).
// Default: 1.0
Speed float64
// OnEvent is called for each event during replay.
// If nil, events are still played but not observed.
OnEvent PlayerCallback
// OnStateChange is called when the player state changes.
OnStateChange func(state PlayerState)
// OnComplete is called when playback reaches the end.
OnComplete func()
// OnError is called when an error occurs during playback.
OnError func(err error)
// SkipTiming when true, delivers all events immediately without timing delays.
// Useful for fast-forward or event analysis.
SkipTiming bool
}

func DefaultPlayerConfig() *PlayerConfig

DefaultPlayerConfig returns sensible defaults for playback.

PlayerState represents the current state of the session player.

type PlayerState int

const (
// PlayerStateStopped indicates the player is stopped.
PlayerStateStopped PlayerState = iota
// PlayerStatePlaying indicates the player is actively replaying events.
PlayerStatePlaying
// PlayerStatePaused indicates the player is paused.
PlayerStatePaused
)

ProviderCallCompletedData contains data for provider call completion events.

type ProviderCallCompletedData struct {
Provider string
Model string
Duration time.Duration
InputTokens int
OutputTokens int
CachedTokens int
Cost float64
FinishReason string
ToolCallCount int
// contains filtered or unexported fields
}

ProviderCallFailedData contains data for provider call failure events.

type ProviderCallFailedData struct {
Provider string
Model string
Error error
Duration time.Duration
// contains filtered or unexported fields
}

ProviderCallStartedData contains data for provider call start events.

type ProviderCallStartedData struct {
Provider string
Model string
MessageCount int
ToolCount int
// contains filtered or unexported fields
}

Rect represents a rectangle for screen coordinates.

type Rect struct {
X int `json:"x"`
Y int `json:"y"`
Width int `json:"width"`
Height int `json:"height"`
}

ScreenshotData contains data for screenshot events.

type ScreenshotData struct {
// Payload contains the image data or reference.
Payload BinaryPayload `json:"payload"`
// Metadata contains image format information.
Metadata VideoMetadata `json:"metadata"` // Reuse VideoMetadata for dimensions
// WindowTitle is the title of the captured window (if applicable).
WindowTitle string `json:"window_title,omitempty"`
// WindowBounds contains the window position and size.
WindowBounds *Rect `json:"window_bounds,omitempty"`
// Reason describes why the screenshot was taken (e.g., "before_action", "after_action", "periodic").
Reason string `json:"reason,omitempty"`
// contains filtered or unexported fields
}

SerializableEvent is a JSON-friendly version of Event. The Data field uses json.RawMessage to preserve type information during round-trips.

type SerializableEvent struct {
Type EventType `json:"type"`
Timestamp time.Time `json:"timestamp"`
RunID string `json:"run_id,omitempty"`
SessionID string `json:"session_id"`
ConversationID string `json:"conversation_id,omitempty"`
DataType string `json:"data_type,omitempty"`
Data json.RawMessage `json:"data,omitempty"`
}

func (se *SerializableEvent) RawData() json.RawMessage

RawData returns the raw JSON data for custom unmarshaling.

SessionExporter exports annotated sessions to various formats.

type SessionExporter struct {
// contains filtered or unexported fields
}

func NewSessionExporter(session *AnnotatedSession, config *ExportConfig) *SessionExporter

NewSessionExporter creates a new session exporter.

func (e *SessionExporter) Export(ctx context.Context) error

Export exports the session to the configured format.

SessionMetadata contains high-level session information.

type SessionMetadata struct {
// StartTime is when the session started.
StartTime time.Time
// EndTime is when the session ended.
EndTime time.Time
// Duration is the total session duration.
Duration time.Duration
// EventCounts by type.
EventCounts map[EventType]int
// AnnotationCounts by type.
AnnotationCounts map[annotations.AnnotationType]int
// HasAudioInput indicates if the session has audio input.
HasAudioInput bool
// HasAudioOutput indicates if the session has audio output.
HasAudioOutput bool
// HasVideo indicates if the session has video.
HasVideo bool
// TotalAudioInputDuration is the total duration of audio input.
TotalAudioInputDuration time.Duration
// TotalAudioOutputDuration is the total duration of audio output.
TotalAudioOutputDuration time.Duration
// ConversationTurns is the number of conversation turns.
ConversationTurns int
// ToolCalls is the number of tool calls.
ToolCalls int
// ProviderCalls is the number of provider calls.
ProviderCalls int
}

SessionPlayer replays recorded session events with timing control.

type SessionPlayer struct {
// contains filtered or unexported fields
}

func NewSessionPlayer(store EventStore, sessionID string, config *PlayerConfig) *SessionPlayer

NewSessionPlayer creates a new player for replaying a recorded session.

func (p *SessionPlayer) CurrentTime() time.Duration

CurrentTime returns the elapsed playback time from the session start.

func (p *SessionPlayer) Duration() time.Duration

Duration returns the total duration of the session.

func (p *SessionPlayer) EventCount() int

EventCount returns the number of loaded events.

func (p *SessionPlayer) Events() []*Event

Events returns all loaded events.

func (p *SessionPlayer) Load(ctx context.Context) error

Load loads all events for the session into memory. Must be called before Play.

func (p *SessionPlayer) Pause()

Pause pauses playback.

func (p *SessionPlayer) Play(ctx context.Context)

Play starts or resumes playback.

func (p *SessionPlayer) Position() int

Position returns the current event index.

func (p *SessionPlayer) Seek(position time.Duration)

Seek jumps to a specific position in the session. The position is specified as a duration from the session start.

func (p *SessionPlayer) SeekToEvent(index int)

SeekToEvent jumps to a specific event index.

func (p *SessionPlayer) SetSpeed(speed float64)

SetSpeed changes the playback speed.

func (p *SessionPlayer) State() PlayerState

State returns the current player state.

func (p *SessionPlayer) Stop()

Stop stops playback and resets position.

func (p *SessionPlayer) Wait()

Wait blocks until playback completes or is stopped.

StageCompletedData contains data for stage completion events (streaming architecture).

type StageCompletedData struct {
Name string
Index int
Duration time.Duration
StageType string
// contains filtered or unexported fields
}

StageFailedData contains data for stage failure events (streaming architecture).

type StageFailedData struct {
Name string
Index int
Error error
Duration time.Duration
StageType string
// contains filtered or unexported fields
}

StageStartedData contains data for stage start events (streaming architecture).

type StageStartedData struct {
Name string
Index int
StageType string // Type of stage (transform, accumulate, generate, sink, bidirectional)
// contains filtered or unexported fields
}

StateLoadedData contains data for state load events.

type StateLoadedData struct {
ConversationID string
MessageCount int
// contains filtered or unexported fields
}

StateSavedData contains data for state save events.

type StateSavedData struct {
ConversationID string
MessageCount int
// contains filtered or unexported fields
}

StoredEvent wraps an Event with storage metadata for serialization.

type StoredEvent struct {
Sequence int64 `json:"seq"`
ParentID int64 `json:"parent_id,omitempty"`
Event *SerializableEvent `json:"event"`
}

StreamInterruptedData contains data for stream interruption events.

type StreamInterruptedData struct {
Reason string
// contains filtered or unexported fields
}

SyncPlayer provides synchronized playback of events, audio, and annotations.

type SyncPlayer struct {
// contains filtered or unexported fields
}

func NewSyncPlayer(timeline *MediaTimeline, annots []*annotations.Annotation, config *SyncPlayerConfig) *SyncPlayer

NewSyncPlayer creates a new synchronized player.

func (p *SyncPlayer) AnnotationCount() int

AnnotationCount returns the number of annotations.

func (p *SyncPlayer) Annotations() []*annotations.Annotation

Annotations returns all annotations.

func (p *SyncPlayer) Duration() time.Duration

Duration returns the total session duration.

func (p *SyncPlayer) EventCount() int

EventCount returns the number of events.

func (p *SyncPlayer) GetAnnotationsInRange(start, end time.Duration) []*annotations.Annotation

GetAnnotationsInRange returns annotations active within the specified time range.

func (p *SyncPlayer) GetEventsInRange(start, end time.Duration) []*Event

GetEventsInRange returns events within the specified time range.

func (p *SyncPlayer) Pause()

Pause pauses playback.

func (p *SyncPlayer) Play(ctx context.Context) error

Play starts or resumes playback.

func (p *SyncPlayer) Position() time.Duration

Position returns the current playback position.

func (p *SyncPlayer) Seek(position time.Duration) error

Seek jumps to a specific position in the session.

func (p *SyncPlayer) SetSpeed(speed float64)

SetSpeed changes the playback speed.

func (p *SyncPlayer) State() PlayerState

State returns the current player state.

func (p *SyncPlayer) Stop()

Stop stops playback and resets position.

func (p *SyncPlayer) Timeline() *MediaTimeline

Timeline returns the media timeline.

func (p *SyncPlayer) Wait()

Wait blocks until playback completes or is stopped.

SyncPlayerConfig configures synchronized playback behavior.

type SyncPlayerConfig struct {
// Speed is the playback speed multiplier (1.0 = real-time).
Speed float64
// OnEvent is called for each event during replay.
OnEvent PlayerCallback
// OnAudio is called with audio data chunks during playback.
// The handler receives raw PCM data that can be played through speakers.
OnAudio AudioHandler
// OnAnnotation is called when an annotation becomes active.
OnAnnotation AnnotationHandler
// OnStateChange is called when the player state changes.
OnStateChange func(state PlayerState)
// OnComplete is called when playback reaches the end.
OnComplete func()
// OnError is called when an error occurs during playback.
OnError func(err error)
// AudioBufferSize is the size of audio chunks delivered to OnAudio.
// Default: 4096 bytes
AudioBufferSize int
// SkipTiming when true, delivers all events immediately without timing delays.
SkipTiming bool
}

func DefaultSyncPlayerConfig() *SyncPlayerConfig

DefaultSyncPlayerConfig returns sensible defaults for synchronized playback.

TimelineBuilder helps build a media timeline incrementally.

type TimelineBuilder struct {
// contains filtered or unexported fields
}

func NewTimelineBuilder(sessionID string, blobStore BlobStore) *TimelineBuilder

NewTimelineBuilder creates a new timeline builder.

func (b *TimelineBuilder) AddEvent(event *Event)

AddEvent adds an event to the timeline.

func (b *TimelineBuilder) Build() *MediaTimeline

Build creates the final MediaTimeline.

TimelineItem represents a single item in the timeline view.

type TimelineItem struct {
// Type is the item type.
Type TimelineItemType
// Time is when this item occurs relative to session start.
Time time.Duration
// Duration is how long this item spans (0 for instantaneous items).
Duration time.Duration
// Event is the event (if Type == TimelineItemEvent).
Event *Event
// Annotation is the annotation (if Type == TimelineItemAnnotation).
Annotation *annotations.Annotation
// Track is the media track (if Type == TimelineItemMedia).
Track TrackType
// Segment is the media segment (if Type == TimelineItemMedia).
Segment *MediaSegment
}

TimelineItemType identifies the type of timeline item.

type TimelineItemType int

const (
// TimelineItemEvent is an event item.
TimelineItemEvent TimelineItemType = iota
// TimelineItemAnnotation is an annotation item.
TimelineItemAnnotation
// TimelineItemMedia is a media segment item.
TimelineItemMedia
)

TimelineView represents a view of the session timeline.

type TimelineView struct {
// Items are all items in the timeline, sorted by time.
Items []TimelineItem
}

TokenBudgetExceededData contains data for token budget exceeded events.

type TokenBudgetExceededData struct {
RequiredTokens int
Budget int
Excess int
// contains filtered or unexported fields
}

ToolCallCompletedData contains data for tool call completion events.

type ToolCallCompletedData struct {
ToolName string
CallID string
Duration time.Duration
Status string // e.g. "success", "error", "pending"
// contains filtered or unexported fields
}

ToolCallFailedData contains data for tool call failure events.

type ToolCallFailedData struct {
ToolName string
CallID string
Error error
Duration time.Duration
// contains filtered or unexported fields
}

ToolCallStartedData contains data for tool call start events.

type ToolCallStartedData struct {
ToolName string
CallID string
Args map[string]interface{}
// contains filtered or unexported fields
}

TrackReader provides a reader interface for a media track.

type TrackReader struct {
// contains filtered or unexported fields
}

func (r *TrackReader) Close() error

Close releases resources.

func (r *TrackReader) Position() time.Duration

Position returns the current playback position.

func (r *TrackReader) Read(p []byte) (n int, err error)

Read implements io.Reader for streaming track data.

func (r *TrackReader) Seek(offset time.Duration) error

Seek implements io.Seeker for random access.

TrackType identifies the type of media track.

type TrackType string

const (
// TrackAudioInput represents user/environment audio input.
TrackAudioInput TrackType = "audio_input"
// TrackAudioOutput represents agent audio output.
TrackAudioOutput TrackType = "audio_output"
// TrackVideo represents video frames.
TrackVideo TrackType = "video"
)

ValidationFailedData contains data for validation failure events.

type ValidationFailedData struct {
ValidatorName string
ValidatorType string
Error error
Duration time.Duration
Violations []string
// contains filtered or unexported fields
}

ValidationPassedData contains data for validation success events.

type ValidationPassedData struct {
ValidatorName string
ValidatorType string
Duration time.Duration
// contains filtered or unexported fields
}

ValidationStartedData contains data for validation start events.

type ValidationStartedData struct {
ValidatorName string
ValidatorType string // e.g. "input", "output", "semantic"
// contains filtered or unexported fields
}

VideoFrameData contains data for video frame events.

type VideoFrameData struct {
// Payload contains the frame data or reference.
Payload BinaryPayload `json:"payload"`
// Metadata contains video format information.
Metadata VideoMetadata `json:"metadata"`
// FrameIndex is the frame sequence number.
FrameIndex int64 `json:"frame_index"`
// TimestampMs is the frame timestamp in milliseconds from session start.
TimestampMs int64 `json:"timestamp_ms"`
// IsKeyframe indicates if this is a keyframe (for seeking).
IsKeyframe bool `json:"is_keyframe"`
// contains filtered or unexported fields
}

VideoMetadata contains format information for video data.

type VideoMetadata struct {
// Width is the video frame width in pixels.
Width int `json:"width"`
// Height is the video frame height in pixels.
Height int `json:"height"`
// Encoding is the video encoding format (e.g., "h264", "vp8", "mjpeg", "raw").
Encoding string `json:"encoding"`
// FrameRate is the frames per second.
FrameRate float64 `json:"frame_rate,omitempty"`
// DurationMs is the duration in milliseconds (for video segments).
DurationMs int64 `json:"duration_ms,omitempty"`
}
import "github.com/AltairaLabs/PromptKit/runtime/logger"

Package logger provides structured logging with automatic PII redaction.

Package logger provides structured logging with automatic PII redaction.

This package wraps Go’s standard log/slog with convenience functions for:

  • LLM API call logging (requests, responses, errors)
  • Tool execution logging
  • Automatic API key and sensitive data redaction
  • Contextual logging with request tracing
  • Level-based verbosity control

All exported functions use the global DefaultLogger which can be configured for different output formats and log levels.

Log format constants

const (
FormatJSON = "json"
FormatText = "text"
)

var (
// DefaultLogger is the global structured logger instance.
// It is safe for concurrent use and initialized with slog.LevelInfo by default.
DefaultLogger *slog.Logger
)

func APIRequest(provider, method, url string, headers map[string]string, body interface{})

APIRequest logs HTTP API request details at debug level with automatic PII redaction. This function is a no-op when debug logging is disabled for performance.

Parameters:

  • provider: The API provider name (e.g., “OpenAI”, “Anthropic”)
  • method: HTTP method (GET, POST, etc.)
  • url: Request URL (will be redacted for sensitive data)
  • headers: HTTP headers map (will be redacted)
  • body: Request body (will be marshaled to JSON and redacted)

Sensitive data in URL, headers, and body are automatically redacted.

func APIResponse(provider string, statusCode int, body string, err error)

APIResponse logs HTTP API response details at debug level with automatic PII redaction. This function is a no-op when debug logging is disabled for performance.

Parameters:

  • provider: The API provider name
  • statusCode: HTTP status code
  • body: Response body as string (will be redacted)
  • err: Error if the request failed (takes precedence over body logging)

Response bodies are attempted to be parsed as JSON for pretty formatting. Status codes are logged with emoji indicators: 🟢 (2xx), 🟡 (3xx), 🔴 (4xx/5xx).

func Configure(cfg *LoggingConfigSpec) error

Configure applies a LoggingConfigSpec to the global logger. This reconfigures the logger with the new settings.

func Debug(msg string, args ...any)

Debug logs a debug-level message with structured attributes. Debug messages are only output when the log level is set to LevelDebug or lower.

func DebugContext(ctx context.Context, msg string, args ...any)

DebugContext logs a debug message with context and structured attributes.

func Error(msg string, args ...any)

Error logs an error message with structured attributes. Use for errors that affect operation but don’t cause complete failure.

func ErrorContext(ctx context.Context, msg string, args ...any)

ErrorContext logs an error message with context and structured attributes.

func Info(msg string, args ...any)

Info logs an informational message with structured key-value attributes. Args should be provided in key-value pairs: key1, value1, key2, value2, …

func InfoContext(ctx context.Context, msg string, args ...any)

InfoContext logs an informational message with context and structured attributes. The context can be used for request tracing and cancellation.

func LLMCall(provider, role string, messages int, temperature float64, attrs ...any)

LLMCall logs an LLM API call with structured fields for observability. Additional attributes can be passed as key-value pairs after the required parameters.

func LLMError(provider, role string, err error, attrs ...any)

LLMError logs an LLM API error for debugging and monitoring.

func LLMResponse(provider, role string, tokensIn, tokensOut int, cost float64, attrs ...any)

LLMResponse logs an LLM API response with token usage and cost tracking. Cost should be provided in USD (e.g., 0.0001 for $0.0001).

func ParseLevel(s string) slog.Level

ParseLevel converts a string log level to slog.Level. Supported values: “trace”, “debug”, “info”, “warn”, “warning”, “error”. Unknown values default to LevelInfo.

func RedactSensitiveData(input string) string

RedactSensitiveData removes API keys and other sensitive information from strings. It replaces matched patterns with a redacted form that preserves the first few characters for debugging while hiding the sensitive portion.

Supported patterns:

  • OpenAI keys (sk-…): Shows first 4 chars
  • Google keys (AIza…): Shows first 4 chars
  • Bearer tokens: Shows only “Bearer [REDACTED]”

This function is safe for concurrent use as it only reads from the compiled patterns.

func SetLevel(level slog.Level)

SetLevel changes the logging level for all subsequent log operations. This is safe for concurrent use as it replaces the entire logger instance.

func SetOutput(w io.Writer)

SetOutput changes the log output destination and reinitializes the logger. This is primarily for testing. Pass nil to reset to os.Stderr.

func SetVerbose(verbose bool)

SetVerbose enables debug-level logging when verbose is true, otherwise sets info-level. This is a convenience wrapper around SetLevel for command-line verbose flags.

func ToolCall(provider string, messages, tools int, choice string, attrs ...any)

ToolCall logs a tool execution request with context about available tools. The choice parameter indicates the tool selection mode (e.g., “auto”, “required”, “none”).

func ToolResponse(provider string, tokensIn, tokensOut, toolCalls int, cost float64, attrs ...any)

ToolResponse logs the result of tool executions with token usage and cost.

func Warn(msg string, args ...any)

Warn logs a warning message with structured attributes. Use for recoverable errors or unexpected but non-critical situations.

func WarnContext(ctx context.Context, msg string, args ...any)

WarnContext logs a warning message with context and structured attributes.

func WithCorrelationID(ctx context.Context, correlationID string) context.Context

WithCorrelationID returns a new context with the correlation ID set.

func WithEnvironment(ctx context.Context, environment string) context.Context

WithEnvironment returns a new context with the environment set.

func WithLoggingContext(ctx context.Context, fields *LoggingFields) context.Context

WithLoggingContext returns a new context with multiple logging fields set at once. This is a convenience function for setting multiple fields in one call. Only non-empty values are set.

func WithModel(ctx context.Context, model string) context.Context

WithModel returns a new context with the model name set.

func WithProvider(ctx context.Context, provider string) context.Context

WithProvider returns a new context with the provider name set.

func WithRequestID(ctx context.Context, requestID string) context.Context

WithRequestID returns a new context with the request ID set.

func WithScenario(ctx context.Context, scenario string) context.Context

WithScenario returns a new context with the scenario name set.

func WithScenarioVersion(ctx context.Context, version string) context.Context

WithScenarioVersion returns a new context with the scenario version set.

func WithSessionID(ctx context.Context, sessionID string) context.Context

WithSessionID returns a new context with the session ID set.

func WithStage(ctx context.Context, stage string) context.Context

WithStage returns a new context with the pipeline stage set.

func WithTurnID(ctx context.Context, turnID string) context.Context

WithTurnID returns a new context with the turn ID set.

ContextHandler is a slog.Handler that automatically extracts logging fields from context and adds them to log records. It wraps an inner handler and delegates all actual logging to it after enriching records with context data.

type ContextHandler struct {
// contains filtered or unexported fields
}

func NewContextHandler(inner slog.Handler, commonFields ...slog.Attr) *ContextHandler

NewContextHandler creates a new ContextHandler wrapping the given handler. The commonFields are added to every log record (useful for environment, service name, etc.).

func (h *ContextHandler) Enabled(ctx context.Context, level slog.Level) bool

Enabled reports whether the handler handles records at the given level. It delegates to the inner handler.

func (h *ContextHandler) Handle(ctx context.Context, r slog.Record) error

Handle processes the log record by extracting context fields and adding them to the record before delegating to the inner handler.

func (h *ContextHandler) Unwrap() slog.Handler

Unwrap returns the inner handler. This is useful for handler chains that need to inspect or replace the underlying handler.

func (h *ContextHandler) WithAttrs(attrs []slog.Attr) slog.Handler

WithAttrs returns a new handler with the given attributes added. The attributes are added to the inner handler.

func (h *ContextHandler) WithGroup(name string) slog.Handler

WithGroup returns a new handler with the given group name. The group is added to the inner handler.

LoggingConfigSpec defines the logging configuration for the Configure function. This mirrors the config.LoggingConfigSpec to avoid import cycles.

type LoggingConfigSpec struct {
DefaultLevel string
Format string // "json" or "text"
CommonFields map[string]string
Modules []ModuleLoggingSpec
}

LoggingFields holds all standard logging context fields. This struct is used with WithLoggingContext for bulk field setting.

type LoggingFields struct {
TurnID string
Scenario string
ScenarioVersion string
Provider string
Model string
Stage string
SessionID string
RequestID string
CorrelationID string
Environment string
}

func ExtractLoggingFields(ctx context.Context) LoggingFields

ExtractLoggingFields extracts all logging fields from a context. Returns a LoggingFields struct with all values found in the context.

ModuleConfig manages per-module logging configuration. It supports hierarchical module names where more specific modules override less specific ones (e.g., “runtime.pipeline” overrides “runtime”).

type ModuleConfig struct {
// contains filtered or unexported fields
}

func GetModuleConfig() *ModuleConfig

GetModuleConfig returns the global module configuration. This is primarily for testing.

func NewModuleConfig(defaultLevel slog.Level) *ModuleConfig

NewModuleConfig creates a new ModuleConfig with the given default level.

func (m *ModuleConfig) LevelFor(module string) slog.Level

LevelFor returns the log level for the given module. It checks for exact match first, then walks up the hierarchy. For example, for “runtime.pipeline.stage”:

  1. Check “runtime.pipeline.stage” (exact match)
  2. Check “runtime.pipeline” (parent)
  3. Check “runtime” (grandparent)
  4. Return default level

func (m *ModuleConfig) SetDefaultLevel(level slog.Level)

SetDefaultLevel sets the default log level.

func (m *ModuleConfig) SetModuleLevel(module string, level slog.Level)

SetModuleLevel sets the log level for a specific module. Module names use dot notation (e.g., “runtime.pipeline”).

ModuleHandler extends ContextHandler with per-module log level filtering. It determines the module name from the call stack and applies the appropriate log level from the module configuration.

type ModuleHandler struct {
ContextHandler
// contains filtered or unexported fields
}

func NewModuleHandler(inner slog.Handler, moduleConfig *ModuleConfig, commonFields ...slog.Attr) *ModuleHandler

NewModuleHandler creates a new ModuleHandler with per-module log level filtering.

func (h *ModuleHandler) Enabled(ctx context.Context, level slog.Level) bool

Enabled reports whether the handler handles records at the given level. It uses the module configuration to determine the level for the calling module.

func (h *ModuleHandler) Handle(ctx context.Context, r slog.Record) error

Handle processes the log record, adding the module name as an attribute.

func (h *ModuleHandler) WithAttrs(attrs []slog.Attr) slog.Handler

WithAttrs returns a new handler with the given attributes added.

func (h *ModuleHandler) WithGroup(name string) slog.Handler

WithGroup returns a new handler with the given group name.

ModuleLoggingSpec configures logging for a specific module.

type ModuleLoggingSpec struct {
Name string
Level string
Fields map[string]string
}
import "github.com/AltairaLabs/PromptKit/runtime/mcp"

ProtocolVersion defines the MCP protocol version (as of 2025-06-18).

const ProtocolVersion = "2025-06-18"

var (
// ErrClientNotInitialized is returned when attempting operations on uninitialized client
ErrClientNotInitialized = errors.New("mcp: client not initialized")
// ErrClientClosed is returned when attempting operations on closed client
ErrClientClosed = errors.New("mcp: client closed")
// ErrServerUnresponsive is returned when server doesn't respond
ErrServerUnresponsive = errors.New("mcp: server unresponsive")
// ErrProcessDied is returned when server process dies unexpectedly
ErrProcessDied = errors.New("mcp: server process died")
)

Client interface defines the MCP client operations

type Client interface {
// Initialize establishes the MCP connection and negotiates capabilities
Initialize(ctx context.Context) (*InitializeResponse, error)
// ListTools retrieves all available tools from the server
ListTools(ctx context.Context) ([]Tool, error)
// CallTool executes a tool with the given arguments
CallTool(ctx context.Context, name string, arguments json.RawMessage) (*ToolCallResponse, error)
// Close terminates the connection to the MCP server
Close() error
// IsAlive checks if the connection is still active
IsAlive() bool
}

ClientCapabilities describes what the client supports

type ClientCapabilities struct {
Elicitation *ElicitationCapability `json:"elicitation,omitempty"`
Sampling *SamplingCapability `json:"sampling,omitempty"`
Logging *LoggingCapability `json:"logging,omitempty"`
}

ClientOptions configures MCP client behavior

type ClientOptions struct {
// RequestTimeout is the default timeout for RPC requests
RequestTimeout time.Duration
// InitTimeout is the timeout for the initialization handshake
InitTimeout time.Duration
// MaxRetries is the number of times to retry failed requests
MaxRetries int
// RetryDelay is the initial delay between retries (exponential backoff)
RetryDelay time.Duration
// EnableGracefulDegradation allows operations to continue even if MCP is unavailable
EnableGracefulDegradation bool
}

func DefaultClientOptions() ClientOptions

DefaultClientOptions returns sensible defaults

Content represents a content item in MCP responses

type Content struct {
Type string `json:"type"` // "text", "image", "resource", etc.
Text string `json:"text,omitempty"`
Data string `json:"data,omitempty"` // Base64 encoded data
MimeType string `json:"mimeType,omitempty"` // MIME type for data
URI string `json:"uri,omitempty"` // URI for resources
}

ElicitationCapability indicates the client supports elicitation

type ElicitationCapability struct{}

Implementation describes client or server implementation details

type Implementation struct {
Name string `json:"name"`
Version string `json:"version"`
}

InitializeRequest represents the initialization request params

type InitializeRequest struct {
ProtocolVersion string `json:"protocolVersion"`
Capabilities ClientCapabilities `json:"capabilities"`
ClientInfo Implementation `json:"clientInfo"`
}

InitializeResponse represents the initialization response

type InitializeResponse struct {
ProtocolVersion string `json:"protocolVersion"`
Capabilities ServerCapabilities `json:"capabilities"`
ServerInfo Implementation `json:"serverInfo"`
}

JSONRPCError represents a JSON-RPC 2.0 error

type JSONRPCError struct {
Code int `json:"code"`
Message string `json:"message"`
Data interface{} `json:"data,omitempty"`
}

JSONRPCMessage represents a JSON-RPC 2.0 message

type JSONRPCMessage struct {
JSONRPC string `json:"jsonrpc"`
ID interface{} `json:"id,omitempty"` // Request ID (number or string)
Method string `json:"method,omitempty"` // Method name for requests/notifications
Params json.RawMessage `json:"params,omitempty"` // Parameters for method
Result json.RawMessage `json:"result,omitempty"` // Result for responses
Error *JSONRPCError `json:"error,omitempty"` // Error for error responses
}

LoggingCapability indicates the client supports logging

type LoggingCapability struct{}

PromptsCapability indicates the server supports prompts

type PromptsCapability struct {
ListChanged bool `json:"listChanged,omitempty"`
}

Registry interface defines the MCP server registry operations

type Registry interface {
// RegisterServer adds a new MCP server configuration
RegisterServer(config ServerConfig) error
// GetClient returns an active client for the given server name
GetClient(ctx context.Context, serverName string) (Client, error)
// GetClientForTool returns the client that provides the specified tool
GetClientForTool(ctx context.Context, toolName string) (Client, error)
// ListServers returns all registered server names
ListServers() []string
// ListAllTools returns all tools from all connected servers
ListAllTools(ctx context.Context) (map[string][]Tool, error)
// Close shuts down all MCP servers and connections
Close() error
}

RegistryImpl implements the Registry interface

type RegistryImpl struct {
// contains filtered or unexported fields
}

func NewRegistry() *RegistryImpl

NewRegistry creates a new MCP server registry

func NewRegistryWithServers(serverConfigs []ServerConfigData) (*RegistryImpl, error)

NewRegistryWithServers creates a registry and registers multiple servers. Returns error if any server registration fails.

func (r *RegistryImpl) Close() error

Close shuts down all MCP servers and connections

func (r *RegistryImpl) GetClient(ctx context.Context, serverName string) (Client, error)

GetClient returns an active client for the given server name

func (r *RegistryImpl) GetClientForTool(ctx context.Context, toolName string) (Client, error)

GetClientForTool returns the client that provides the specified tool

func (r *RegistryImpl) GetToolSchema(ctx context.Context, toolName string) (*Tool, error)

GetToolSchema returns the schema for a specific tool

func (r *RegistryImpl) ListAllTools(ctx context.Context) (map[string][]Tool, error)

ListAllTools returns all tools from all connected servers

func (r *RegistryImpl) ListServers() []string

ListServers returns all registered server names

func (r *RegistryImpl) RegisterServer(config ServerConfig) error

RegisterServer adds a new MCP server configuration

ResourcesCapability indicates the server supports resources

type ResourcesCapability struct {
ListChanged bool `json:"listChanged,omitempty"`
}

SamplingCapability indicates the client supports sampling

type SamplingCapability struct{}

ServerCapabilities describes what the server supports

type ServerCapabilities struct {
Tools *ToolsCapability `json:"tools,omitempty"`
Resources *ResourcesCapability `json:"resources,omitempty"`
Prompts *PromptsCapability `json:"prompts,omitempty"`
}

ServerConfig represents configuration for an MCP server

type ServerConfig struct {
Name string `json:"name" yaml:"name"` // Unique identifier for this server
Command string `json:"command" yaml:"command"` // Command to execute
Args []string `json:"args,omitempty" yaml:"args,omitempty"`
Env map[string]string `json:"env,omitempty" yaml:"env,omitempty"`
}

ServerConfigData holds MCP server configuration matching config.MCPServerConfig

type ServerConfigData struct {
Name string
Command string
Args []string
Env map[string]string
}

StdioClient implements the MCP Client interface using stdio transport

type StdioClient struct {
// contains filtered or unexported fields
}

func NewStdioClient(config ServerConfig) *StdioClient

NewStdioClient creates a new MCP client using stdio transport

func NewStdioClientWithOptions(config ServerConfig, options ClientOptions) *StdioClient

NewStdioClientWithOptions creates a client with custom options

func (c *StdioClient) CallTool(ctx context.Context, name string, arguments json.RawMessage) (*ToolCallResponse, error)

CallTool executes a tool with the given arguments

func (c *StdioClient) Close() error

Close terminates the connection to the MCP server

func (c *StdioClient) Initialize(ctx context.Context) (*InitializeResponse, error)

Initialize establishes the MCP connection and negotiates capabilities

func (c *StdioClient) IsAlive() bool

IsAlive checks if the connection is still active

func (c *StdioClient) ListTools(ctx context.Context) ([]Tool, error)

ListTools retrieves all available tools from the server

Tool represents an MCP tool definition

type Tool struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
InputSchema json.RawMessage `json:"inputSchema"` // JSON Schema for tool input
}

ToolCallRequest represents a request to execute a tool

type ToolCallRequest struct {
Name string `json:"name"`
Arguments json.RawMessage `json:"arguments,omitempty"`
}

ToolCallResponse represents the response from a tool execution

type ToolCallResponse struct {
Content []Content `json:"content"`
IsError bool `json:"isError,omitempty"`
}

ToolsCapability indicates the server supports tools

type ToolsCapability struct {
ListChanged bool `json:"listChanged,omitempty"` // Server can send notifications
}

ToolsListRequest represents a request to list available tools

type ToolsListRequest struct {
}

ToolsListResponse represents the response to a tools/list request

type ToolsListResponse struct {
Tools []Tool `json:"tools"`
}
import "github.com/AltairaLabs/PromptKit/runtime/media"

Package media provides utilities for processing media content.

Package media provides utilities for processing media content.

Package media provides utilities for processing media content (images, video).

Audio format constants.

const (
AudioFormatWAV = "wav"
AudioFormatMP3 = "mp3"
AudioFormatFLAC = "flac"
AudioFormatOGG = "ogg"
AudioFormatM4A = "m4a"
AudioFormatAAC = "aac"
AudioFormatPCM = "pcm"
AudioFormatWebM = "webm"
)

Audio MIME type constants.

const (
MIMETypeAudioWAV = "audio/wav"
MIMETypeAudioMP3 = "audio/mpeg"
MIMETypeAudioFLAC = "audio/flac"
MIMETypeAudioOGG = "audio/ogg"
MIMETypeAudioM4A = "audio/mp4"
MIMETypeAudioAAC = "audio/aac"
MIMETypeAudioPCM = "audio/pcm"
MIMETypeAudioWebM = "audio/webm"
)

Default configuration values.

const (
DefaultFFmpegPath = "ffmpeg"
DefaultFFmpegTimeout = 300 // 5 minutes
DefaultFFmpegCheckTimeout = 5 // seconds for availability check
DefaultTempFilePermissions = 0600 // owner read/write only
)

Image format constants.

const (
FormatJPEG = "jpeg"
FormatJPG = "jpg"
FormatPNG = "png"
FormatGIF = "gif"
FormatWebP = "webp"
)

MIME type constants.

const (
MIMETypeJPEG = "image/jpeg"
MIMETypePNG = "image/png"
MIMETypeGIF = "image/gif"
MIMETypeWebP = "image/webp"
)

Default configuration values.

const (
DefaultMaxWidth = 1024
DefaultMaxHeight = 1024
DefaultQuality = 85
MinQuality = 10
QualityDecay = 0.9
)

MIME type variant constants.

const (
MIMETypeAudioXWAV = "audio/x-wav"
)

FFmpeg error types.

var (
ErrFFmpegNotFound = fmt.Errorf("ffmpeg not found in PATH")
ErrFFmpegTimeout = fmt.Errorf("ffmpeg execution timed out")
)

func AudioFormatToMIMEType(format string) string

AudioFormatToMIMEType converts a format string to MIME type.

func CheckFFmpegAvailable(ffmpegPath string) error

CheckFFmpegAvailable checks if ffmpeg is available in PATH.

func IsFormatSupported(mimeType string, supportedFormats []string) bool

IsFormatSupported checks if a MIME type is in the list of supported formats.

func MIMETypeToAudioFormat(mimeType string) string

MIMETypeToAudioFormat converts a MIME type to a format string.

func MIMETypeToFormat(mimeType string) string

MIMETypeToFormat converts a MIME type to format string.

func SelectTargetFormat(supportedFormats []string) string

SelectTargetFormat selects the best target format from supported formats. Prefers lossless formats (WAV) when available, then common formats (MP3).

AudioConvertResult contains the result of an audio conversion.

type AudioConvertResult struct {
Data []byte
Format string
MIMEType string
OriginalSize int64
NewSize int64
WasConverted bool
}

AudioConverter handles audio format conversion using ffmpeg.

type AudioConverter struct {
// contains filtered or unexported fields
}

func NewAudioConverter(config AudioConverterConfig) *AudioConverter

NewAudioConverter creates a new audio converter with the given config.

func (c *AudioConverter) CanConvert(fromMIME, toMIME string) bool

CanConvert checks if the converter can convert between the given formats.

func (c *AudioConverter) ConvertAudio(ctx context.Context, data []byte, fromMIME, toMIME string) (*AudioConvertResult, error)

ConvertAudio converts audio data from one format to another. If the source format matches the target, returns the original data unchanged.

AudioConverterConfig configures audio conversion behavior.

type AudioConverterConfig struct {
// FFmpegPath is the path to the ffmpeg binary.
// Default: "ffmpeg" (uses PATH).
FFmpegPath string
// FFmpegTimeout is the maximum time for FFmpeg execution.
// Default: 5 minutes.
FFmpegTimeout int // seconds
// SampleRate is the output sample rate in Hz.
// 0 means preserve original.
SampleRate int
// Channels is the number of output channels.
// 0 means preserve original.
Channels int
// BitRate is the output bitrate for lossy formats (e.g., "128k").
// Empty means use ffmpeg default.
BitRate string
}

func DefaultAudioConverterConfig() AudioConverterConfig

DefaultAudioConverterConfig returns sensible defaults for audio conversion.

ContentConverter handles conversion of MediaContent to match provider requirements.

type ContentConverter struct {
// contains filtered or unexported fields
}

func NewContentConverter(config AudioConverterConfig) *ContentConverter

NewContentConverter creates a new content converter.

func (*ContentConverter) ConvertMediaContentIfNeeded

Section titled “func (*ContentConverter) ConvertMediaContentIfNeeded”
func (c *ContentConverter) ConvertMediaContentIfNeeded(ctx context.Context, media *types.MediaContent, contentType string, targetFormats []string) (*types.MediaContent, error)

ConvertMediaContentIfNeeded converts media content to a supported format if necessary.

func (*ContentConverter) ConvertMessageForProvider

Section titled “func (*ContentConverter) ConvertMessageForProvider”
func (c *ContentConverter) ConvertMessageForProvider(ctx context.Context, msg *types.Message, provider providers.Provider) (*types.Message, error)

ConvertMessageForProvider converts all media parts in a message to formats supported by the provider. Returns a new message with converted content (original is not modified).

ImageResizeConfig configures image resizing behavior.

type ImageResizeConfig struct {
// MaxWidth is the maximum width in pixels (0 = no limit).
MaxWidth int
// MaxHeight is the maximum height in pixels (0 = no limit).
MaxHeight int
// MaxSizeBytes is the maximum encoded size in bytes (0 = no limit).
// If exceeded after resize, quality is reduced iteratively.
MaxSizeBytes int64
// Quality is the encoding quality (1-100). Used for JPEG and WebP.
// Default: 85.
Quality int
// Format is the output format ("jpeg", "png", "" = preserve original).
Format string
// PreserveAspectRatio maintains the original aspect ratio when resizing.
// Default: true.
PreserveAspectRatio bool
// SkipIfSmaller skips processing if the image is already within limits.
// Default: true.
SkipIfSmaller bool
}

func DefaultImageResizeConfig() ImageResizeConfig

DefaultImageResizeConfig returns sensible defaults for image resizing.

ResizeResult contains the result of an image resize operation.

type ResizeResult struct {
Data []byte
Format string
MIMEType string
Width int
Height int
OriginalSize int64
NewSize int64
WasResized bool
}

func ResizeImage(data []byte, config ImageResizeConfig) (*ResizeResult, error)

ResizeImage resizes an image to fit within the configured dimensions. Returns the resized image data, format, and any error.

import "github.com/AltairaLabs/PromptKit/runtime/persistence"

Package persistence provides persistence interfaces and common errors.

Package persistence provides abstract persistence layer for Runtime components.

This package implements the Repository Pattern to decouple Runtime from storage implementations. It provides interfaces for loading prompts, tools, and fragments from various backends (YAML files, JSON files, memory, packs, etc.).

Sentinel errors for persistence operations.

var (
// ErrNilConfig is returned when a nil config is passed to SavePrompt.
ErrNilConfig = errors.New("config cannot be nil")
// ErrNilDescriptor is returned when a nil descriptor is passed to SaveTool.
ErrNilDescriptor = errors.New("descriptor cannot be nil")
// ErrEmptyTaskType is returned when a config has an empty task_type.
ErrEmptyTaskType = errors.New("task_type cannot be empty")
// ErrEmptyToolName is returned when a tool descriptor has an empty name.
ErrEmptyToolName = errors.New("tool name cannot be empty")
// ErrPromptNotFound is returned when a requested prompt is not found.
ErrPromptNotFound = errors.New("prompt not found")
// ErrToolNotFound is returned when a requested tool is not found.
ErrToolNotFound = errors.New("tool not found")
)

PromptRepository provides abstract access to prompt configurations

type PromptRepository interface {
// LoadPrompt loads a prompt configuration by task type
LoadPrompt(taskType string) (*prompt.Config, error)
// LoadFragment loads a fragment by name and optional path
LoadFragment(name string, relativePath string, baseDir string) (*prompt.Fragment, error)
// ListPrompts returns all available prompt task types
ListPrompts() ([]string, error)
// SavePrompt saves a prompt configuration (for future write support)
SavePrompt(config *prompt.Config) error
}

ToolRepository provides abstract access to tool descriptors

type ToolRepository interface {
// LoadTool loads a tool descriptor by name
LoadTool(name string) (*tools.ToolDescriptor, error)
// ListTools returns all available tool names
ListTools() ([]string, error)
// SaveTool saves a tool descriptor (for future write support)
SaveTool(descriptor *tools.ToolDescriptor) error
}
import "github.com/AltairaLabs/PromptKit/runtime/pipeline"

Package pipeline provides types and configuration for stage-based pipeline execution. The legacy middleware-based pipeline has been removed in favor of the stage architecture. See runtime/pipeline/stage for the current implementation.

PipelineConfig represents the complete pipeline configuration for pack format

type Config struct {
Stages []string `json:"stages"` // Pipeline stages in order
Middleware []MiddlewareConfig `json:"middleware,omitempty"` // Deprecated: for backward compatibility only
}

ExecutionResult is the output of a pipeline execution.

type ExecutionResult struct {
Messages []types.Message `json:"messages"` // All messages including history and responses
Response *Response `json:"response"` // The final response
Trace ExecutionTrace `json:"trace"` // Complete execution trace with all LLM calls
CostInfo types.CostInfo `json:"cost_info"` // Aggregate cost across all LLM calls
Metadata map[string]interface{} `json:"metadata"` // Metadata populated by stages
}

ExecutionTrace captures the complete execution history of a pipeline run.

type ExecutionTrace struct {
LLMCalls []LLMCall `json:"llm_calls"` // All LLM API calls made during execution
Events []TraceEvent `json:"events,omitempty"` // Other trace events
StartedAt time.Time `json:"started_at"` // When pipeline execution started
CompletedAt *time.Time `json:"completed_at,omitempty"` // When pipeline execution completed
}

LLMCall represents a single LLM API call within a pipeline execution.

type LLMCall struct {
Sequence int `json:"sequence"` // Call number in sequence
MessageIndex int `json:"message_index"` // Index into messages array
Request interface{} `json:"request,omitempty"` // Raw request (if debugging enabled)
Response interface{} `json:"response"` // Parsed response
RawResponse interface{} `json:"raw_response,omitempty"` // Raw provider response
StartedAt time.Time `json:"started_at"` // When call started
Duration time.Duration `json:"duration"` // How long the call took
Cost types.CostInfo `json:"cost"` // Cost information for this call
ToolCalls []types.MessageToolCall `json:"tool_calls,omitempty"` // If this call triggered tool execution
Error *string `json:"error,omitempty"` // Error message if the call failed
}

func (l *LLMCall) GetError() error

GetError returns the error as an error type, or nil if no error occurred.

func (l *LLMCall) SetError(err error)

SetError sets the error for this LLM call from an error value.

MiddlewareConfig represents configuration for a specific middleware (deprecated)

type MiddlewareConfig struct {
Type string `json:"type"` // Middleware type
Config map[string]interface{} `json:"config,omitempty"` // Type-specific configuration
}

ProviderMiddlewareConfig contains configuration for provider middleware

type ProviderMiddlewareConfig struct {
RetryPolicy *RetryPolicy `json:"retry_policy,omitempty"` // Retry policy
TimeoutMs int `json:"timeout_ms,omitempty"` // Request timeout in milliseconds
}

Response represents the output from a pipeline execution.

type Response struct {
Role string `json:"role"`
Content string `json:"content"`
ToolCalls []types.MessageToolCall `json:"tool_calls,omitempty"`
}

RetryPolicy defines retry behavior for provider middleware

type RetryPolicy struct {
MaxRetries int `json:"max_retries"` // Maximum retry attempts
Backoff string `json:"backoff"` // Backoff strategy ("fixed", "exponential")
InitialDelayMs int `json:"initial_delay_ms,omitempty"` // Initial delay in milliseconds
}

StateStoreConfig contains configuration for state store middleware

type StateStoreConfig struct {
Store interface{} // State store implementation (statestore.Store)
ConversationID string // Unique conversation identifier
UserID string // User identifier (optional)
Metadata map[string]interface{} // Additional metadata to store (optional)
}

TemplateMiddlewareConfig contains configuration for template middleware

type TemplateMiddlewareConfig struct {
StrictMode bool `json:"strict_mode"` // Fail on undefined variables
AllowUndefined bool `json:"allow_undefined"` // Allow undefined variables
}

ToolPolicy defines constraints on tool usage.

type ToolPolicy struct {
ToolChoice string `json:"tool_choice,omitempty"` // "auto", "required", "none", or specific tool name
MaxRounds int `json:"max_rounds,omitempty"`
MaxToolCallsPerTurn int `json:"max_tool_calls_per_turn,omitempty"`
Blocklist []string `json:"blocklist,omitempty"`
}

TraceEvent represents a significant event during pipeline execution.

type TraceEvent struct {
Type string `json:"type"` // Event type
Timestamp time.Time `json:"timestamp"` // When the event occurred
Data interface{} `json:"data"` // Event-specific data
Message string `json:"message,omitempty"` // Human-readable description
}

ValidationError represents a validation failure.

type ValidationError struct {
Type string `json:"type"`
Details string `json:"details"`
Failures []types.ValidationResult `json:"failures"` // All failed validations
}

func (e *ValidationError) Error() string

Error returns the error message for this validation error.

ValidatorMiddlewareConfig contains configuration for validator middleware

type ValidatorMiddlewareConfig struct {
FailFast bool `json:"fail_fast"` // Stop on first validation error
CollectAllErrors bool `json:"collect_all_errors"` // Collect all errors before failing
}
import "github.com/AltairaLabs/PromptKit/runtime/prompt"

Package prompt provides template-based prompt management and assembly.

This package implements a registry system for loading, caching, and assembling prompt templates via repository interfaces:

  • Fragment-based prompt composition
  • Variable substitution with required/optional vars
  • Model-specific overrides (template modifications only)
  • Tool allowlist integration
  • Version tracking and content hashing

The Registry uses the repository pattern to load prompt configs, avoiding direct file I/O. It resolves fragment references, performs template variable substitution, and generates AssembledPrompt objects ready for LLM execution.

For system architecture and design patterns, see:

Create a registry with a repository (config-first pattern):

repo := memory.NewRepository()
registry := prompt.NewRegistryWithRepository(repo)
assembled := registry.LoadWithVars("task_type", vars, "gpt-4")

See package github.com/AltairaLabs/PromptKit/sdk for higher-level APIs.

PromptPackSchemaURL is the JSON Schema URL for validating PromptPack files

const PromptPackSchemaURL = "https://promptpack.org/schema/latest/promptpack.schema.json"

func ExtractVariablesFromTemplate(template string) []string

ExtractVariablesFromTemplate analyzes a template string and extracts variable names This helps auto-generate variable metadata when not explicitly specified

func GetDefaultPipelineConfig() map[string]interface{}

GetDefaultPipelineConfig returns the default Arena pipeline configuration Returns as map to avoid import cycle with pipeline package

func GetUsedVars(vars map[string]string) []string

GetUsedVars returns a list of variable names that had non-empty values

Deprecated: Use template.GetUsedVars instead

func SupportsMediaType(config *MediaConfig, mediaType string) bool

SupportsMediaType checks if a MediaConfig supports a specific media type

func ValidateMediaConfig(config *MediaConfig) error

ValidateMediaConfig validates a MediaConfig for correctness and completeness

AssembledPrompt represents a complete prompt ready for LLM execution.

type AssembledPrompt struct {
TaskType string `json:"task_type"`
SystemPrompt string `json:"system_prompt"`
AllowedTools []string `json:"allowed_tools,omitempty"` // Tools this prompt can use
Validators []ValidatorConfig `json:"validators,omitempty"` // Validators to apply at runtime
}

func (ap *AssembledPrompt) UsesTools() bool

UsesTools returns true if this prompt has tools configured

AudioConfig contains audio-specific configuration

type AudioConfig struct {
// Maximum audio size in MB (0 = unlimited)
MaxSizeMB int `yaml:"max_size_mb,omitempty" json:"max_size_mb,omitempty"`
// Allowed formats: ["mp3", "wav", "ogg", "webm"]
AllowedFormats []string `yaml:"allowed_formats,omitempty" json:"allowed_formats,omitempty"`
// Max duration in seconds (0 = unlimited)
MaxDurationSec int `yaml:"max_duration_sec,omitempty" json:"max_duration_sec,omitempty"`
// Whether metadata (duration, bitrate) is required
RequireMetadata bool `yaml:"require_metadata,omitempty" json:"require_metadata,omitempty"`
}

func GetAudioConfig(config *MediaConfig) *AudioConfig

GetAudioConfig returns the audio configuration if audio is supported

ChangelogEntry records a change in the prompt configuration

type ChangelogEntry struct {
Version string `yaml:"version"` // Version number
Date string `yaml:"date"` // Date of change (YYYY-MM-DD)
Author string `yaml:"author,omitempty"` // Author of change
Description string `yaml:"description"` // Description of change
}

CompilationInfo contains information about prompt compilation

type CompilationInfo struct {
CompiledWith string `yaml:"compiled_with" json:"compiled_with"` // Compiler version
CreatedAt string `yaml:"created_at" json:"created_at"` // Timestamp (RFC3339)
Schema string `yaml:"schema,omitempty" json:"schema,omitempty"` // Pack schema version (e.g., "v1")
}

Config represents a YAML prompt configuration file in K8s-style manifest format

type Config struct {
APIVersion string `yaml:"apiVersion" json:"apiVersion"`
Kind string `yaml:"kind" json:"kind"`
Metadata metav1.ObjectMeta `yaml:"metadata,omitempty" json:"metadata,omitempty"`
Spec Spec `yaml:"spec" json:"spec"`
}

func ParseConfig(data []byte) (*Config, error)

ParseConfig parses a prompt config from YAML data. This is a package-level utility function for parsing prompt configs in the config layer. The config layer should read files using os.ReadFile and pass the data to this function. Returns the parsed Config or an error if parsing/validation fails.

func (c *Config) GetAllowedTools() []string

GetAllowedTools returns the allowed tools from the prompt config

func (c *Config) GetTaskType() string

GetTaskType returns the task type from the prompt config

CostEstimate provides estimated costs for prompt execution

type CostEstimate struct {
MinCostUSD float64 `yaml:"min_cost_usd"` // Minimum cost per execution
MaxCostUSD float64 `yaml:"max_cost_usd"` // Maximum cost per execution
AvgCostUSD float64 `yaml:"avg_cost_usd"` // Average cost per execution
}

ExampleContentPart represents a content part in an example (simplified for YAML)

type ExampleContentPart struct {
// Content type: "text", "image", "audio", "video"
Type string `yaml:"type" json:"type"`
// Text content (for type=text)
Text string `yaml:"text,omitempty" json:"text,omitempty"`
// For media content
Media *ExampleMedia `yaml:"media,omitempty" json:"media,omitempty"`
}

ExampleMedia represents media references in examples

type ExampleMedia struct {
// Relative path to media file
FilePath string `yaml:"file_path,omitempty" json:"file_path,omitempty"`
// External URL
URL string `yaml:"url,omitempty" json:"url,omitempty"`
// MIME type
MIMEType string `yaml:"mime_type" json:"mime_type"`
// Detail level for images
Detail string `yaml:"detail,omitempty" json:"detail,omitempty"`
// Optional caption
Caption string `yaml:"caption,omitempty" json:"caption,omitempty"`
}

FileWriter abstracts file writing for testing

type FileWriter interface {
WriteFile(path string, data []byte, perm os.FileMode) error
}

Fragment represents a reusable prompt fragment

type Fragment struct {
Type string `yaml:"fragment_type"`
Version string `yaml:"version"`
Description string `yaml:"description"`
Content string `yaml:"content"`
SourceFile string `yaml:"source_file,omitempty"` // Source file path (for pack compilation)
ResolvedAtCompile bool `yaml:"resolved_at_compile,omitempty"` // Whether resolved at compile time
}

FragmentRef references a prompt fragment for assembly

type FragmentRef struct {
Name string `yaml:"name"`
Path string `yaml:"path,omitempty"` // Optional: relative path to fragment file
Required bool `yaml:"required"`
}

FragmentRepository interface for loading fragments (to avoid import cycles)

type FragmentRepository interface {
LoadFragment(name string, relativePath string, baseDir string) (*Fragment, error)
}

FragmentResolver handles fragment loading, resolution, and variable substitution using the repository pattern

type FragmentResolver struct {
// contains filtered or unexported fields
}

func NewFragmentResolverWithRepository(repository FragmentRepository) *FragmentResolver

NewFragmentResolverWithRepository creates a new fragment resolver with a repository

func (*FragmentResolver) AssembleFragments

Section titled “func (*FragmentResolver) AssembleFragments”
func (fr *FragmentResolver) AssembleFragments(fragments []FragmentRef, vars map[string]string, configFilePath string) (map[string]string, error)

AssembleFragments loads and assembles prompt fragments into variables. Resolves dynamic names and paths using the provided variable map.

func (fr *FragmentResolver) LoadFragment(name, relativePath, configFilePath string) (*Fragment, error)

LoadFragment loads a fragment from the repository with caching. Uses name as cache key, or path if provided.

ImageConfig contains image-specific configuration

type ImageConfig struct {
// Maximum image size in MB (0 = unlimited)
MaxSizeMB int `yaml:"max_size_mb,omitempty" json:"max_size_mb,omitempty"`
// Allowed formats: ["jpeg", "png", "webp", "gif"]
AllowedFormats []string `yaml:"allowed_formats,omitempty" json:"allowed_formats,omitempty"`
// Default detail level: "low", "high", "auto"
DefaultDetail string `yaml:"default_detail,omitempty" json:"default_detail,omitempty"`
// Whether captions are required
RequireCaption bool `yaml:"require_caption,omitempty" json:"require_caption,omitempty"`
// Max images per message (0 = unlimited)
MaxImagesPerMsg int `yaml:"max_images_per_msg,omitempty" json:"max_images_per_msg,omitempty"`
}

func GetImageConfig(config *MediaConfig) *ImageConfig

GetImageConfig returns the image configuration if images are supported

Info provides summary information about a prompt configuration

type Info struct {
TaskType string
Version string
Description string
FragmentCount int
RequiredVars []string
OptionalVars []string
ToolAllowlist []string
ModelOverrides []string
}

Loader interface abstracts the registry for testing

type Loader interface {
LoadConfig(taskType string) (*Config, error)
ListTaskTypes() []string
}

MediaConfig defines multimodal media support configuration for a prompt

type MediaConfig struct {
// Enable multimodal support for this prompt
Enabled bool `yaml:"enabled" json:"enabled"`
// Supported content types: "image", "audio", "video"
SupportedTypes []string `yaml:"supported_types,omitempty" json:"supported_types,omitempty"`
// Image-specific configuration
Image *ImageConfig `yaml:"image,omitempty" json:"image,omitempty"`
// Audio-specific configuration
Audio *AudioConfig `yaml:"audio,omitempty" json:"audio,omitempty"`
// Video-specific configuration
Video *VideoConfig `yaml:"video,omitempty" json:"video,omitempty"`
// Example multimodal messages
Examples []MultimodalExample `yaml:"examples,omitempty" json:"examples,omitempty"`
}

Metadata contains additional metadata for the pack format

type Metadata struct {
Domain string `yaml:"domain,omitempty"` // Domain/category (e.g., "customer-support")
Language string `yaml:"language,omitempty"` // Primary language (e.g., "en")
Tags []string `yaml:"tags,omitempty"` // Tags for categorization
CostEstimate *CostEstimate `yaml:"cost_estimate,omitempty"` // Estimated cost per execution
Performance *PerformanceMetrics `yaml:"performance,omitempty"` // Performance benchmarks
Changelog []ChangelogEntry `yaml:"changelog,omitempty"` // Version history
}

MetadataBuilder helps construct pack format metadata from prompt configs and test results

type MetadataBuilder struct {
// contains filtered or unexported fields
}

func NewMetadataBuilder(spec *Spec) *MetadataBuilder

NewMetadataBuilder creates a new metadata builder for a prompt spec

func (mb *MetadataBuilder) AddChangelogEntry(version, author, description string)

AddChangelogEntry adds a new entry to the prompt’s changelog

func (*MetadataBuilder) BuildCompilationInfo

Section titled “func (*MetadataBuilder) BuildCompilationInfo”
func (mb *MetadataBuilder) BuildCompilationInfo(compilerVersion string) *CompilationInfo

BuildCompilationInfo generates compilation metadata

func (mb *MetadataBuilder) BuildMetadata(domain, language string, tags []string, testResults []TestResultSummary) *Metadata

BuildMetadata generates Metadata from test execution results

func (mb *MetadataBuilder) SetDomain(domain string)

SetDomain sets the domain for the prompt metadata

func (mb *MetadataBuilder) SetLanguage(language string)

SetLanguage sets the language for the prompt metadata

func (mb *MetadataBuilder) SetTags(tags []string)

SetTags sets the tags for the prompt metadata

func (*MetadataBuilder) UpdateFromCostInfo

Section titled “func (*MetadataBuilder) UpdateFromCostInfo”
func (mb *MetadataBuilder) UpdateFromCostInfo(costs []types.CostInfo)

UpdateFromCostInfo updates cost estimate from types.CostInfo

func (mb *MetadataBuilder) ValidateMetadata() []string

ValidateMetadata checks that metadata fields are properly populated

ModelOverride contains model-specific template modifications. Note: Temperature and MaxTokens should be configured at the scenario or provider level, not in the prompt configuration.

type ModelOverride struct {
SystemTemplate string `yaml:"system_template,omitempty"`
SystemTemplateSuffix string `yaml:"system_template_suffix,omitempty"`
}

ModelTestResultRef is a simplified reference to model test results The full ModelTestResult type is in pkg/engine for tracking test execution

type ModelTestResultRef struct {
Provider string `yaml:"provider"`
Model string `yaml:"model"`
Date string `yaml:"date"`
SuccessRate float64 `yaml:"success_rate"`
AvgTokens int `yaml:"avg_tokens,omitempty"`
AvgCost float64 `yaml:"avg_cost,omitempty"`
AvgLatencyMs int `yaml:"avg_latency_ms,omitempty"`
}

func AggregateTestResults(results []TestResultSummary, provider, model string) *ModelTestResultRef

AggregateTestResults computes ModelTestResultRef from test execution summaries

MultimodalExample represents an example multimodal message for testing/documentation

type MultimodalExample struct {
// Example name/identifier
Name string `yaml:"name" json:"name"`
// Human-readable description
Description string `yaml:"description,omitempty" json:"description,omitempty"`
// Message role: "user", "assistant"
Role string `yaml:"role" json:"role"`
// Content parts for this example
Parts []ExampleContentPart `yaml:"parts" json:"parts"`
}

Pack represents the complete JSON pack format containing MULTIPLE prompts for different task types.

DESIGN DECISION: Why separate Pack types in runtime vs sdk?

This runtime Pack is optimized for COMPILATION:

  • Created by PackCompiler from prompt registry
  • Includes Compilation and Metadata for tracking provenance
  • Returns validation warnings ([]string) for compiler feedback
  • No thread-safety needed (single-threaded compilation)
  • Simple types (VariableMetadata, ValidatorConfig) for JSON serialization

The sdk.Pack is optimized for LOADING & EXECUTION:

  • Loaded from .pack.json files for application use
  • Includes Tools map and filePath for execution context
  • Thread-safe with sync.RWMutex for concurrent access
  • Returns validation errors for application error handling
  • Rich types (*Variable, *Validator) with additional methods
  • Has CreateRegistry() to convert back to runtime.Registry for pipeline

Both serialize to/from the SAME JSON format (.pack.json files), ensuring full interoperability. The type duplication is intentional and prevents circular dependencies while allowing each module to evolve independently.

See sdk/pack.go for the corresponding SDK-side documentation.

type Pack struct {
// Schema reference for validation
Schema string `json:"$schema,omitempty"` // JSON Schema URL for validation
// Identity
ID string `json:"id"` // Pack ID (e.g., "customer-support")
Name string `json:"name"` // Human-readable name
Version string `json:"version"` // Pack version
Description string `json:"description"` // Pack description
// Template Engine (shared across all prompts in pack)
TemplateEngine *TemplateEngineInfo `json:"template_engine"`
// Prompts - Map of task_type -> PackPrompt
Prompts map[string]*PackPrompt `json:"prompts"`
// Tools - Map of tool_name -> PackTool (per PromptPack spec Section 9)
// Tools are defined at pack level and referenced by name in prompts
Tools map[string]*PackTool `json:"tools,omitempty"`
// Shared fragments (can be referenced by any prompt)
Fragments map[string]string `json:"fragments,omitempty"` // Resolved fragments: name -> content
// Metadata
Metadata *Metadata `json:"metadata,omitempty"`
Compilation *CompilationInfo `json:"compilation,omitempty"`
}

func LoadPack(filePath string) (*Pack, error)

LoadPack loads a pack from a JSON file

func (p *Pack) GetOptionalVariables(taskType string) map[string]string

GetOptionalVariables returns all optional variable names with defaults for a specific prompt

func (p *Pack) GetPrompt(taskType string) *PackPrompt

GetPrompt returns a specific prompt by task type

func (p *Pack) GetRequiredVariables(taskType string) []string

GetRequiredVariables returns all required variable names for a specific prompt

func (p *Pack) GetToolNames(taskType string) []string

GetToolNames returns the list of allowed tool names for a specific prompt

func (p *Pack) ListPrompts() []string

ListPrompts returns all prompt task types in the pack

func (p *Pack) Summary() string

Summary returns a brief summary of the pack

func (p *Pack) Validate() []string

Validate validates a pack format

PackCompiler compiles Config to Pack format

type PackCompiler struct {
// contains filtered or unexported fields
}

func NewPackCompiler(registry *Registry) *PackCompiler

NewPackCompiler creates a new pack compiler with default dependencies

func NewPackCompilerWithDeps(loader Loader, timeProvider TimeProvider, fileWriter FileWriter) *PackCompiler

NewPackCompilerWithDeps creates a pack compiler with injected dependencies (for testing)

func (pc *PackCompiler) Compile(taskType, compilerVersion string) (*Pack, error)

Compile compiles a single prompt config to Pack format (for backward compatibility)

func (pc *PackCompiler) CompileFromRegistry(packID, compilerVersion string) (*Pack, error)

CompileFromRegistry compiles ALL prompts from the registry into a single Pack

func (*PackCompiler) CompileFromRegistryWithParsedTools

Section titled “func (*PackCompiler) CompileFromRegistryWithParsedTools”
func (pc *PackCompiler) CompileFromRegistryWithParsedTools(packID, compilerVersion string, parsedTools []ParsedTool) (*Pack, error)

CompileFromRegistryWithParsedTools compiles ALL prompts from the registry into a single Pack and includes pre-parsed tool definitions. Use this when YAML parsing happens externally.

func (*PackCompiler) CompileFromRegistryWithTools

Section titled “func (*PackCompiler) CompileFromRegistryWithTools”
func (pc *PackCompiler) CompileFromRegistryWithTools(packID, compilerVersion string, toolData []ToolData) (*Pack, error)

CompileFromRegistryWithTools compiles ALL prompts from the registry into a single Pack and includes tool definitions from the provided tool data. This method satisfies PromptPack spec Section 9 which requires tools to be defined at pack level with name, description, and parameters.

func (pc *PackCompiler) CompileToFile(taskType, outputPath, compilerVersion string) error

CompileToFile compiles a prompt config to a JSON pack file

func (pc *PackCompiler) MarshalPack(pack *Pack) ([]byte, error)

MarshalPack marshals pack to JSON (testable without I/O)

func (pc *PackCompiler) WritePack(pack *Pack, outputPath string) error

WritePack writes a pack to a file

PackPrompt represents a single prompt configuration within a pack

type PackPrompt struct {
// Identity
ID string `json:"id"` // Prompt ID (task_type)
Name string `json:"name"` // Human-readable name
Description string `json:"description"` // Prompt description
Version string `json:"version"` // Prompt version
// Prompt
SystemTemplate string `json:"system_template"`
// Variables
Variables []VariableMetadata `json:"variables,omitempty"`
// Tools
Tools []string `json:"tools,omitempty"` // Allowed tool names
ToolPolicy *ToolPolicyPack `json:"tool_policy,omitempty"` // Tool usage policy
// Multimodal media configuration
MediaConfig *MediaConfig `json:"media,omitempty"`
// Pipeline
Pipeline map[string]interface{} `json:"pipeline,omitempty"` // Pipeline configuration
// Parameters
Parameters *ParametersPack `json:"parameters,omitempty"` // Model-specific parameters
// Validators
Validators []ValidatorConfig `json:"validators,omitempty"`
// Model Testing
TestedModels []ModelTestResultRef `json:"tested_models,omitempty"`
// Model Overrides
ModelOverrides map[string]ModelOverride `json:"model_overrides,omitempty"`
}

PackTool represents a tool definition in the pack (per PromptPack spec Section 9) Tools are defined at pack level and referenced by prompts via the tools array

type PackTool struct {
Name string `json:"name"` // Tool function name (required)
Description string `json:"description"` // Tool description (required)
Parameters interface{} `json:"parameters"` // JSON Schema for input parameters (required)
}

func ConvertToolToPackTool(name, description string, inputSchema json.RawMessage) *PackTool

ConvertToolToPackTool converts a tool descriptor to a PackTool This is the preferred method when tool parsing happens externally

ParametersPack represents model parameters in pack format

type ParametersPack struct {
Temperature *float64 `json:"temperature,omitempty"`
MaxTokens *int `json:"max_tokens,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
TopK *int `json:"top_k,omitempty"`
}

ParsedTool holds pre-parsed tool information for compilation Use this when YAML parsing happens in the calling package

type ParsedTool struct {
Name string
Description string
InputSchema json.RawMessage
}

PerformanceMetrics provides performance benchmarks

type PerformanceMetrics struct {
AvgLatencyMs int `yaml:"avg_latency_ms"` // Average latency in milliseconds
P95LatencyMs int `yaml:"p95_latency_ms"` // 95th percentile latency
AvgTokens int `yaml:"avg_tokens"` // Average tokens used
SuccessRate float64 `yaml:"success_rate"` // Success rate (0.0-1.0)
}

Registry manages prompt templates, versions, and variable substitution.

type Registry struct {
// contains filtered or unexported fields
}

func NewRegistryWithRepository(repository Repository) *Registry

NewRegistryWithRepository creates a registry with a repository (new preferred method). This constructor uses the repository pattern for loading prompts, avoiding direct file I/O.

func (r *Registry) ClearCache()

ClearCache clears all cached prompts and fragments

func (r *Registry) GetAvailableRegions() []string

GetAvailableRegions returns a list of all available regions from prompt fragments

func (r *Registry) GetAvailableTaskTypes() []string

GetAvailableTaskTypes is deprecated: use ListTaskTypes instead

func (r *Registry) GetCachedFragments() []string

GetCachedFragments returns a list of currently cached fragment keys.

func (r *Registry) GetCachedPrompts() []string

GetCachedPrompts returns a list of currently cached prompt task types. For a complete list including uncached prompts, use ListTaskTypes instead.

func (r *Registry) GetInfo(taskType string) (*Info, error)

GetInfo returns detailed information about a prompt configuration

func (r *Registry) GetLoadedFragments() []string

GetLoadedFragments is deprecated: use GetCachedFragments instead

func (r *Registry) GetLoadedPrompts() []string

GetLoadedPrompts is deprecated: use GetCachedPrompts instead

func (r *Registry) ListTaskTypes() []string

ListTaskTypes returns all available task types from the repository. Falls back to cached task types if repository is unavailable or returns empty.

func (r *Registry) Load(activity string) *AssembledPrompt

Load returns an assembled prompt for the specified activity with variable substitution.

func (r *Registry) LoadConfig(activity string) (*Config, error)

LoadConfig is deprecated: use loadConfig directly (internal use) or use Load/LoadWithVars

func (r *Registry) LoadWithVars(activity string, vars map[string]string, model string) *AssembledPrompt

LoadWithVars loads a prompt with variable substitution and optional model override.

func (r *Registry) RegisterConfig(taskType string, config *Config) error

RegisterConfig registers a Config directly into the registry. This allows programmatic registration of prompts without requiring disk files. Useful for loading prompts from compiled packs or other in-memory sources. If a repository is configured, the config is persisted there as well.

Repository interface defines methods for loading prompts (to avoid import cycles) This should match persistence.Repository interface

type Repository interface {
LoadPrompt(taskType string) (*Config, error)
LoadFragment(name string, relativePath string, baseDir string) (*Fragment, error)
ListPrompts() ([]string, error)
SavePrompt(config *Config) error
}

Spec contains the actual prompt configuration

type Spec struct {
TaskType string `yaml:"task_type" json:"task_type"`
Version string `yaml:"version" json:"version"`
Description string `yaml:"description" json:"description"`
TemplateEngine *TemplateEngineInfo `yaml:"template_engine,omitempty" json:"template_engine,omitempty"` // Template engine configuration
Fragments []FragmentRef `yaml:"fragments,omitempty" json:"fragments,omitempty"` // New: fragment assembly
SystemTemplate string `yaml:"system_template" json:"system_template"`
Variables []VariableMetadata `yaml:"variables,omitempty" json:"variables,omitempty"` // Variable definitions with rich metadata
ModelOverrides map[string]ModelOverride `yaml:"model_overrides,omitempty" json:"model_overrides,omitempty"`
AllowedTools []string `yaml:"allowed_tools,omitempty" json:"allowed_tools,omitempty"` // Tools this prompt can use
MediaConfig *MediaConfig `yaml:"media,omitempty" json:"media,omitempty"` // Multimodal media configuration
Validators []ValidatorConfig `yaml:"validators,omitempty" json:"validators,omitempty"` // Validators/Guardrails for production runtime
TestedModels []ModelTestResultRef `yaml:"tested_models,omitempty" json:"tested_models,omitempty"` // Model testing metadata
Metadata *Metadata `yaml:"metadata,omitempty" json:"metadata,omitempty"` // Additional metadata for pack format
Compilation *CompilationInfo `yaml:"compilation,omitempty" json:"compilation,omitempty"` // Compilation information
}

TemplateEngineInfo describes the template engine used for variable substitution

type TemplateEngineInfo struct {
Version string `yaml:"version" json:"version"` // Template engine version (e.g., "v1")
Syntax string `yaml:"syntax" json:"syntax"` // Template syntax (e.g., "{{variable}}")
Features []string `yaml:"features,omitempty" json:"features,omitempty"` // Supported features
}

TestResultSummary contains summarized test execution data

type TestResultSummary struct {
Success bool
Cost float64
LatencyMs int
Tokens int
}

TimeProvider allows injecting time for deterministic tests

type TimeProvider interface {
Now() time.Time
}

ToolData holds raw tool configuration data for compilation

type ToolData struct {
FilePath string
Data []byte
}

ToolPolicyPack represents tool policy in pack format

type ToolPolicyPack struct {
ToolChoice string `json:"tool_choice,omitempty"`
MaxRounds int `json:"max_rounds,omitempty"`
MaxToolCallsPerTurn int `json:"max_tool_calls_per_turn,omitempty"`
Blocklist []string `json:"blocklist,omitempty"`
}

ValidatorConfig extends validators.ValidatorConfig with prompt-pack specific fields

type ValidatorConfig struct {
// Embed base config (Type, Params)
validators.ValidatorConfig `yaml:",inline" json:",inline"`
// Enable/disable validator (default: true)
Enabled *bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
// Fail execution on violation (default: true)
FailOnViolation *bool `yaml:"fail_on_violation,omitempty" json:"fail_on_violation,omitempty"`
}

VariableBinding defines how a variable binds to system resources. This enables automatic population from system resources and type-safe UI selection.

type VariableBinding struct {
// Kind specifies the type of resource to bind to.
Kind VariableBindingKind `yaml:"kind" json:"kind"`
// Field specifies which field of the resource to bind (e.g., "name", "model").
Field string `yaml:"field,omitempty" json:"field,omitempty"`
// AutoPopulate enables automatic population of this variable from the bound resource.
// When true, the variable may be auto-filled and optionally hidden from the wizard.
AutoPopulate bool `yaml:"autoPopulate,omitempty" json:"autoPopulate,omitempty"`
// Filter specifies criteria for filtering bound resources.
Filter *VariableBindingFilter `yaml:"filter,omitempty" json:"filter,omitempty"`
}

VariableBindingFilter specifies criteria for filtering bound resources.

type VariableBindingFilter struct {
// Capability filters resources by capability (e.g., "chat", "embeddings").
Capability string `yaml:"capability,omitempty" json:"capability,omitempty"`
// Labels filters resources by label selectors.
Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"`
}

VariableBindingKind defines the type of resource a variable binds to.

type VariableBindingKind string

const (
// BindingKindProject binds to project metadata (name, description, tags).
BindingKindProject VariableBindingKind = "project"
// BindingKindProvider binds to provider/model selection.
BindingKindProvider VariableBindingKind = "provider"
// BindingKindWorkspace binds to current workspace (name, namespace).
BindingKindWorkspace VariableBindingKind = "workspace"
// BindingKindSecret binds to Kubernetes Secret resources.
BindingKindSecret VariableBindingKind = "secret"
// BindingKindConfigMap binds to Kubernetes ConfigMap resources.
BindingKindConfigMap VariableBindingKind = "configmap"
)

VariableMetadata contains enhanced metadata for a variable VariableMetadata defines a template variable with validation rules This struct matches the SDK Variable type for PromptPack spec compliance

type VariableMetadata struct {
Name string `yaml:"name" json:"name"`
Type string `yaml:"type,omitempty" json:"type,omitempty"` // "string", "number", "boolean", "object", "array"
Required bool `yaml:"required" json:"required"`
Default interface{} `yaml:"default,omitempty" json:"default,omitempty"`
Description string `yaml:"description,omitempty" json:"description,omitempty"`
Example interface{} `yaml:"example,omitempty" json:"example,omitempty"`
Validation map[string]interface{} `yaml:"validation,omitempty" json:"validation,omitempty"`
// Binding enables automatic population from system resources and type-safe UI selection.
// This allows prompts to declare semantic meaning for variables beyond just their data type.
Binding *VariableBinding `yaml:"binding,omitempty" json:"binding,omitempty"`
}

VideoConfig contains video-specific configuration

type VideoConfig struct {
// Maximum video size in MB (0 = unlimited)
MaxSizeMB int `yaml:"max_size_mb,omitempty" json:"max_size_mb,omitempty"`
// Allowed formats: ["mp4", "webm", "ogg"]
AllowedFormats []string `yaml:"allowed_formats,omitempty" json:"allowed_formats,omitempty"`
// Max duration in seconds (0 = unlimited)
MaxDurationSec int `yaml:"max_duration_sec,omitempty" json:"max_duration_sec,omitempty"`
// Whether metadata (resolution, fps) is required
RequireMetadata bool `yaml:"require_metadata,omitempty" json:"require_metadata,omitempty"`
}

func GetVideoConfig(config *MediaConfig) *VideoConfig

GetVideoConfig returns the video configuration if video is supported

import "github.com/AltairaLabs/PromptKit/runtime/providers"

Package providers implements multi-LLM provider support with unified interfaces.

Package providers implements multi-LLM provider support with unified interfaces.

This package provides a common abstraction for predict-based LLM providers including OpenAI, Anthropic Claude, and Google Gemini. It handles:

  • Predict completion requests with streaming support
  • Tool/function calling with provider-specific formats
  • Cost tracking and token usage calculation
  • Rate limiting and error handling

All providers implement the Provider interface for basic predict, and ToolSupport interface for function calling capabilities.

Package providers contains provider contract test helpers.

This file contains exported test helpers that can be used by provider implementations in subpackages to validate their contract compliance.

Common HTTP constants for embedding providers.

const (
ContentTypeHeader = "Content-Type"
AuthorizationHeader = "Authorization"
ApplicationJSON = "application/json"
BearerPrefix = "Bearer "
)

const (
// DefaultGeminiBaseURL is the default base URL for Gemini API (includes version path)
DefaultGeminiBaseURL = "https://generativelanguage.googleapis.com/v1beta"
)

func CheckHTTPError(resp *http.Response, url string) error

CheckHTTPError checks if HTTP response is an error and returns formatted error with body

func ExtractOrderedEmbeddings[T any](data []T, getIndex func(T) int, getEmbedding func(T) []float32, expectedCount int) ([][]float32, error)

ExtractOrderedEmbeddings extracts embeddings from indexed response data and places them in the correct order. Returns an error if count doesn’t match.

func HasAudioSupport(p Provider) bool

HasAudioSupport checks if a provider supports audio inputs

func HasDocumentSupport(p Provider) bool

HasDocumentSupport checks if a provider supports document inputs

func HasImageSupport(p Provider) bool

HasImageSupport checks if a provider supports image inputs

func HasVideoSupport(p Provider) bool

HasVideoSupport checks if a provider supports video inputs

func IsFormatSupported(p Provider, contentType, mimeType string) bool

IsFormatSupported checks if a provider supports a specific media format (MIME type)

func IsValidationAbort(err error) bool

IsValidationAbort checks if an error is a validation abort

func LoadFileAsBase64(filePath string) (string, error)

LoadFileAsBase64 reads a file and returns its content as a base64-encoded string.

Deprecated: Use MediaLoader.GetBase64Data instead for better functionality including storage reference support, URL loading, and proper context handling.

This function is kept for backward compatibility but will be removed in a future version. It now delegates to the new MediaLoader implementation.

func LogEmbeddingRequest(provider, model string, textCount int, start time.Time)

LogEmbeddingRequest logs a completed embedding request with common fields.

func LogEmbeddingRequestWithTokens(provider, model string, textCount, tokens int, start time.Time)

LogEmbeddingRequestWithTokens logs a completed embedding request with token count.

func MarshalRequest(req any) ([]byte, error)

MarshalRequest marshals a request body to JSON with standardized error handling.

func RegisterProviderFactory(providerType string, factory ProviderFactory)

RegisterProviderFactory registers a factory function for a provider type

func RunProviderContractTests(t *testing.T, config ProviderContractTests)

RunProviderContractTests executes all contract tests against a provider. This should be called from each provider’s test file.

func SetErrorResponse(predictResp *PredictionResponse, respBody []byte, start time.Time)

SetErrorResponse sets latency and raw body on error responses

func SkipIfNoCredentials(t *testing.T, provider Provider)

SkipIfNoCredentials skips the test if API credentials are not available. This is a helper for integration tests that need real API access.

func StringPtr(s string) *string

StringPtr is a helper function that returns a pointer to a string. This is commonly used across provider implementations for optional fields.

func SupportsMultimodal(p Provider) bool

SupportsMultimodal checks if a provider implements multimodal support

func UnmarshalJSON(respBody []byte, v any, predictResp *PredictionResponse, start time.Time) error

UnmarshalJSON unmarshals JSON with error recovery that sets latency and raw response

func UnmarshalResponse(body []byte, resp any) error

UnmarshalResponse unmarshals a response body from JSON with standardized error handling.

func ValidateMultimodalMessage(p Provider, msg types.Message) error

ValidateMultimodalMessage checks if a message’s multimodal content is supported by the provider

func ValidateMultimodalRequest(p MultimodalSupport, req PredictionRequest) error

ValidateMultimodalRequest validates all messages in a predict request for multimodal compatibility This is a helper function to reduce duplication across provider implementations

func ValidatePredictReturnsLatency(t *testing.T, provider Provider)

ValidatePredictReturnsLatency verifies that Predict() returns a response with non-zero latency. This is the critical test that would have caught the production bug! Exported for use in provider-specific regression tests.

func ValidatePredictWithToolsReturnsLatency

Section titled “func ValidatePredictWithToolsReturnsLatency”
func ValidatePredictWithToolsReturnsLatency(t *testing.T, provider Provider)

ValidatePredictWithToolsReturnsLatency verifies that PredictWithTools() returns latency. This test is CRITICAL - it would have caught the production bug where PredictWithTools didn’t set latency! Exported for use in provider-specific regression tests.

AudioStreamingCapabilities describes audio streaming support.

type AudioStreamingCapabilities struct {
// SupportedEncodings lists supported audio encodings
// Common values: "pcm", "opus", "mp3", "aac"
SupportedEncodings []string `json:"supported_encodings"`
// SupportedSampleRates lists supported sample rates in Hz
// Common values: 8000, 16000, 24000, 44100, 48000
SupportedSampleRates []int `json:"supported_sample_rates"`
// SupportedChannels lists supported channel counts
// Common values: 1 (mono), 2 (stereo)
SupportedChannels []int `json:"supported_channels"`
// SupportedBitDepths lists supported bit depths
// Common values: 16, 24, 32
SupportedBitDepths []int `json:"supported_bit_depths,omitempty"`
// PreferredEncoding is the recommended encoding for best quality/latency
PreferredEncoding string `json:"preferred_encoding"`
// PreferredSampleRate is the recommended sample rate
PreferredSampleRate int `json:"preferred_sample_rate"`
}

BaseEmbeddingProvider provides common functionality for embedding providers. Embed this struct in provider-specific implementations to reduce duplication.

type BaseEmbeddingProvider struct {
ProviderModel string
BaseURL string
APIKey string
HTTPClient *http.Client
Dimensions int
ProviderID string
BatchSize int
}

func NewBaseEmbeddingProvider(providerID, defaultModel, defaultBaseURL string, defaultDimensions, defaultBatchSize int, defaultTimeout time.Duration) *BaseEmbeddingProvider

NewBaseEmbeddingProvider creates a base embedding provider with defaults.

func (*BaseEmbeddingProvider) DoEmbeddingRequest

Section titled “func (*BaseEmbeddingProvider) DoEmbeddingRequest”
func (b *BaseEmbeddingProvider) DoEmbeddingRequest(ctx context.Context, cfg HTTPRequestConfig) ([]byte, error)

DoEmbeddingRequest performs a common HTTP POST request for embeddings. Returns the response body and any error.

func (*BaseEmbeddingProvider) EmbedWithEmptyCheck

Section titled “func (*BaseEmbeddingProvider) EmbedWithEmptyCheck”
func (b *BaseEmbeddingProvider) EmbedWithEmptyCheck(ctx context.Context, req EmbeddingRequest, embedFn EmbedFunc) (EmbeddingResponse, error)

EmbedWithEmptyCheck wraps embedding logic with empty request handling.

func (*BaseEmbeddingProvider) EmbeddingDimensions

Section titled “func (*BaseEmbeddingProvider) EmbeddingDimensions”
func (b *BaseEmbeddingProvider) EmbeddingDimensions() int

EmbeddingDimensions returns the dimensionality of embedding vectors.

func (*BaseEmbeddingProvider) EmptyResponseForModel

Section titled “func (*BaseEmbeddingProvider) EmptyResponseForModel”
func (b *BaseEmbeddingProvider) EmptyResponseForModel(model string) EmbeddingResponse

EmptyResponseForModel returns an empty EmbeddingResponse with the given model. Use this for handling empty input cases.

func (*BaseEmbeddingProvider) HandleEmptyRequest

Section titled “func (*BaseEmbeddingProvider) HandleEmptyRequest”
func (b *BaseEmbeddingProvider) HandleEmptyRequest(req EmbeddingRequest) (EmbeddingResponse, bool)

HandleEmptyRequest checks if the request has no texts and returns early if so. Returns (response, true) if empty, (zero, false) if not empty.

func (b *BaseEmbeddingProvider) ID() string

ID returns the provider identifier.

func (*BaseEmbeddingProvider) MaxBatchSize

Section titled “func (*BaseEmbeddingProvider) MaxBatchSize”
func (b *BaseEmbeddingProvider) MaxBatchSize() int

MaxBatchSize returns the maximum texts per single API request.

func (b *BaseEmbeddingProvider) Model() string

Model returns the current embedding model.

func (*BaseEmbeddingProvider) ResolveModel

Section titled “func (*BaseEmbeddingProvider) ResolveModel”
func (b *BaseEmbeddingProvider) ResolveModel(reqModel string) string

ResolveModel returns the model to use, preferring the request model over the default.

BaseProvider provides common functionality shared across all provider implementations. It should be embedded in concrete provider structs to avoid code duplication.

type BaseProvider struct {
// contains filtered or unexported fields
}

func NewBaseProvider(id string, includeRawOutput bool, client *http.Client) BaseProvider

NewBaseProvider creates a new BaseProvider with common fields

func NewBaseProviderWithAPIKey(id string, includeRawOutput bool, primaryKey, fallbackKey string) (provider BaseProvider, apiKey string)

NewBaseProviderWithAPIKey creates a BaseProvider and retrieves API key from environment It tries the primary key first, then falls back to the secondary key if primary is empty.

func (b *BaseProvider) Close() error

Close closes the HTTP client’s idle connections

func (b *BaseProvider) GetHTTPClient() *http.Client

GetHTTPClient returns the underlying HTTP client for provider-specific use

func (b *BaseProvider) ID() string

ID returns the provider ID

func (b *BaseProvider) MakeJSONRequest(ctx context.Context, url string, request any, headers RequestHeaders, providerName string) ([]byte, error)

MakeJSONRequest performs a JSON HTTP POST request with common error handling. This reduces duplication across provider implementations. providerName is used for logging purposes.

func (b *BaseProvider) MakeRawRequest(ctx context.Context, url string, body []byte, headers RequestHeaders, providerName string) ([]byte, error)

MakeRawRequest performs an HTTP POST request with pre-marshaled body. Use this when you need to control the serialization yourself.

func (*BaseProvider) ShouldIncludeRawOutput

Section titled “func (*BaseProvider) ShouldIncludeRawOutput”
func (b *BaseProvider) ShouldIncludeRawOutput() bool

ShouldIncludeRawOutput returns whether to include raw API responses in output

func (b *BaseProvider) SupportsStreaming() bool

SupportsStreaming returns true by default (can be overridden by providers that don’t support streaming)

Credential applies authentication to HTTP requests. This is the interface that providers use to authenticate requests.

type Credential interface {
// Apply adds authentication to the HTTP request.
Apply(ctx context.Context, req *http.Request) error
// Type returns the credential type identifier.
Type() string
}

EmbedFunc is the signature for provider-specific embedding logic.

type EmbedFunc func(ctx context.Context, texts []string, model string) (EmbeddingResponse, error)

EmbeddingProvider generates text embeddings for semantic similarity operations. Implementations exist for OpenAI, Gemini, and other embedding APIs.

Embeddings are dense vector representations of text that capture semantic meaning. Similar texts will have embeddings with high cosine similarity scores.

Example usage:

provider, _ := openai.NewEmbeddingProvider()
resp, err := provider.Embed(ctx, providers.EmbeddingRequest{
Texts: []string{"Hello world", "Hi there"},
})
similarity := CosineSimilarity(resp.Embeddings[0], resp.Embeddings[1])
type EmbeddingProvider interface {
// Embed generates embeddings for the given texts.
// The response contains one embedding vector per input text, in the same order.
// Implementations should handle batching internally if the request exceeds MaxBatchSize.
Embed(ctx context.Context, req EmbeddingRequest) (EmbeddingResponse, error)
// EmbeddingDimensions returns the dimensionality of embedding vectors.
// Common values: 1536 (OpenAI ada-002/3-small), 768 (Gemini), 3072 (OpenAI 3-large)
EmbeddingDimensions() int
// MaxBatchSize returns the maximum number of texts per single API request.
// Callers should batch requests appropriately, or rely on the provider
// to handle splitting internally.
MaxBatchSize() int
// ID returns the provider identifier (e.g., "openai-embedding", "gemini-embedding")
ID() string
}

EmbeddingRequest represents a request for text embeddings.

type EmbeddingRequest struct {
// Texts to embed (batched for efficiency)
Texts []string
// Model override for embedding model (optional, uses provider default if empty)
Model string
}

EmbeddingResponse contains the embedding vectors from a provider.

type EmbeddingResponse struct {
// Embeddings contains one vector per input text, in the same order
Embeddings [][]float32
// Model is the model that was used for embedding
Model string
// Usage contains token consumption information (optional)
Usage *EmbeddingUsage
}

EmbeddingUsage tracks token consumption for embedding requests.

type EmbeddingUsage struct {
// TotalTokens is the total number of tokens processed
TotalTokens int
}

ExecutionResult is a forward declaration to avoid circular import.

type ExecutionResult interface{}

HTTPRequestConfig configures how to make an HTTP request.

type HTTPRequestConfig struct {
URL string
Body []byte
UseAPIKey bool // If true, adds Authorization: Bearer <APIKey> header
ContentType string // Defaults to application/json
}

ImageDetail specifies the level of detail for image processing

type ImageDetail string

Image detail levels for multimodal processing.

const (
ImageDetailLow ImageDetail = "low" // Faster, less detailed analysis
ImageDetailHigh ImageDetail = "high" // Slower, more detailed analysis
ImageDetailAuto ImageDetail = "auto" // Provider chooses automatically
)

MediaLoader handles loading media content from various sources (inline data, files, URLs, storage). It provides a unified interface for providers to access media regardless of the source.

type MediaLoader struct {
// contains filtered or unexported fields
}

func NewMediaLoader(config MediaLoaderConfig) *MediaLoader

NewMediaLoader creates a new MediaLoader with the given configuration.

func (ml *MediaLoader) GetBase64Data(ctx context.Context, media *types.MediaContent) (string, error)

GetBase64Data loads media content and returns it as base64-encoded data. It handles all media sources: inline data, file paths, URLs, and storage references.

MediaLoaderConfig configures the MediaLoader behavior.

type MediaLoaderConfig struct {
// StorageService is optional - required only for loading from storage references
StorageService storage.MediaStorageService
// HTTPTimeout for URL fetching (default: 30s)
HTTPTimeout time.Duration
// MaxURLSizeBytes is the maximum size for URL-based media (default: 50MB)
MaxURLSizeBytes int64
}

MultimodalCapabilities describes what types of multimodal content a provider supports

type MultimodalCapabilities struct {
SupportsImages bool // Provider can process image inputs
SupportsAudio bool // Provider can process audio inputs
SupportsVideo bool // Provider can process video inputs
SupportsDocuments bool // Provider can process document inputs (PDF, etc.)
ImageFormats []string // Supported image MIME types (e.g., "image/jpeg", "image/png")
AudioFormats []string // Supported audio MIME types (e.g., "audio/mpeg", "audio/wav")
VideoFormats []string // Supported video MIME types (e.g., "video/mp4")
DocumentFormats []string // Supported document MIME types (e.g., "application/pdf")
MaxImageSizeMB int // Maximum image size in megabytes (0 = unlimited/unknown)
MaxAudioSizeMB int // Maximum audio size in megabytes (0 = unlimited/unknown)
MaxVideoSizeMB int // Maximum video size in megabytes (0 = unlimited/unknown)
MaxDocumentSizeMB int // Maximum document size in megabytes (0 = unlimited/unknown)
}

MultimodalSupport interface for providers that support multimodal inputs

type MultimodalSupport interface {
Provider // Extends the base Provider interface
// GetMultimodalCapabilities returns what types of multimodal content this provider supports
GetMultimodalCapabilities() MultimodalCapabilities
// PredictMultimodal performs a predict request with multimodal message content
// Messages in the request can contain Parts with images, audio, or video
PredictMultimodal(ctx context.Context, req PredictionRequest) (PredictionResponse, error)
// PredictMultimodalStream performs a streaming predict request with multimodal content
PredictMultimodalStream(ctx context.Context, req PredictionRequest) (<-chan StreamChunk, error)
}

func GetMultimodalProvider(p Provider) MultimodalSupport

GetMultimodalProvider safely casts a provider to MultimodalSupport Returns nil if the provider doesn’t support multimodal

MultimodalToolSupport interface for providers that support both multimodal and tools

type MultimodalToolSupport interface {
MultimodalSupport // Extends multimodal support
ToolSupport // Extends tool support
// PredictMultimodalWithTools performs a predict request with both multimodal content and tools
PredictMultimodalWithTools(ctx context.Context, req PredictionRequest, tools interface{}, toolChoice string) (PredictionResponse, []types.MessageToolCall, error)
}

PlatformConfig holds platform-specific settings from config.

type PlatformConfig struct {
Type string
Region string
Project string
Endpoint string
AdditionalConfig map[string]interface{}
}

PredictionRequest represents a request to a predict provider

type PredictionRequest struct {
System string `json:"system"`
Messages []types.Message `json:"messages"`
Temperature float32 `json:"temperature"`
TopP float32 `json:"top_p"`
MaxTokens int `json:"max_tokens"`
Seed *int `json:"seed,omitempty"`
ResponseFormat *ResponseFormat `json:"response_format,omitempty"` // Optional response format (JSON mode)
Metadata map[string]any `json:"metadata,omitempty"` // Provider-specific context
}

PredictionResponse represents a response from a predict provider

type PredictionResponse struct {
Content string `json:"content"`
Parts []types.ContentPart `json:"parts,omitempty"` // Multimodal content parts (text, image, audio, video)
CostInfo *types.CostInfo `json:"cost_info,omitempty"` // Cost breakdown for this response (includes token counts)
Latency time.Duration `json:"latency"`
Raw []byte `json:"raw,omitempty"`
RawRequest any `json:"raw_request,omitempty"` // Raw API request (for debugging)
ToolCalls []types.MessageToolCall `json:"tool_calls,omitempty"` // Tools called in this response
}

Pricing defines cost per 1K tokens for input and output

type Pricing struct {
InputCostPer1K float64
OutputCostPer1K float64
}

Provider interface defines the contract for predict providers

type Provider interface {
ID() string
// Model returns the model name/identifier used by this provider
Model() string
Predict(ctx context.Context, req PredictionRequest) (PredictionResponse, error)
// Streaming support
PredictStream(ctx context.Context, req PredictionRequest) (<-chan StreamChunk, error)
SupportsStreaming() bool
ShouldIncludeRawOutput() bool
Close() error // Close cleans up provider resources (e.g., HTTP connections)
// CalculateCost calculates cost breakdown for given token counts
CalculateCost(inputTokens, outputTokens, cachedTokens int) types.CostInfo
}

func CreateProviderFromSpec(spec ProviderSpec) (Provider, error)

CreateProviderFromSpec creates a provider implementation from a spec. Returns an error if the provider type is unsupported.

ProviderContractTests defines a comprehensive test suite that validates the Provider interface contract. All provider implementations should pass these tests to ensure consistent behavior across the system.

Usage:

func TestOpenAIProviderContract(t *testing.T) {
provider := NewProvider(...)
RunProviderContractTests(t, provider)
}
type ProviderContractTests struct {
// Provider instance to test
Provider Provider
// SupportsToolsExpected indicates whether this provider should support tools
SupportsToolsExpected bool
// SupportsStreamingExpected indicates whether this provider should support streaming
SupportsStreamingExpected bool
}

ProviderDefaults holds default parameters for providers

type ProviderDefaults struct {
Temperature float32
TopP float32
MaxTokens int
Pricing Pricing
}

ProviderFactory is a function that creates a provider from a spec

type ProviderFactory func(spec ProviderSpec) (Provider, error)

ProviderSpec holds the configuration needed to create a provider instance

type ProviderSpec struct {
ID string
Type string
Model string
BaseURL string
Defaults ProviderDefaults
IncludeRawOutput bool
AdditionalConfig map[string]interface{} // Flexible key-value pairs for provider-specific configuration
// Credential holds the resolved credential for this provider.
// If nil, providers fall back to environment variable lookup.
Credential Credential
// Platform identifies the hosting platform (e.g., "bedrock", "vertex", "azure").
// Empty string means direct API access to the provider.
Platform string
// PlatformConfig holds platform-specific configuration.
// Only set when Platform is non-empty.
PlatformConfig *PlatformConfig
}

ProviderTools represents provider-specific tool configuration. Each provider returns its own native format:

  • OpenAI: []openAITool
  • Claude: []claudeTool
  • Gemini: geminiToolWrapper
  • Ollama: []ollamaTool
  • vLLM: []vllmTool
  • Mock: []*ToolDescriptor

The value returned by BuildTooling should be passed directly to PredictWithTools.

type ProviderTools = any

Registry manages available providers

type Registry struct {
// contains filtered or unexported fields
}

func NewRegistry() *Registry

NewRegistry creates a new provider registry

func (r *Registry) Close() error

Close closes all registered providers and cleans up their resources. Returns the first error encountered, if any.

func (r *Registry) Get(id string) (Provider, bool)

Get retrieves a provider by ID, returning the provider and a boolean indicating if it was found.

func (r *Registry) List() []string

List returns all registered provider IDs

func (r *Registry) Register(provider Provider)

Register adds a provider to the registry using its ID as the key.

RequestHeaders is a map of HTTP header key-value pairs

type RequestHeaders map[string]string

ResponseFormat specifies the format of the model’s response

type ResponseFormat struct {
// Type specifies the response format type
Type ResponseFormatType `json:"type"`
// JSONSchema is the schema to use when Type is ResponseFormatJSONSchema
// This should be a valid JSON Schema object
JSONSchema json.RawMessage `json:"json_schema,omitempty"`
// SchemaName is an optional name for the schema (used by OpenAI)
SchemaName string `json:"schema_name,omitempty"`
// Strict enables strict schema validation (OpenAI-specific)
Strict bool `json:"strict,omitempty"`
}

ResponseFormatType defines the type of response format

type ResponseFormatType string

const (
// ResponseFormatText is the default text response format
ResponseFormatText ResponseFormatType = "text"
// ResponseFormatJSON requests JSON output from the model
ResponseFormatJSON ResponseFormatType = "json_object"
// ResponseFormatJSONSchema requests JSON output conforming to a schema
ResponseFormatJSONSchema ResponseFormatType = "json_schema"
)

SSEScanner scans Server-Sent Events (SSE) streams

type SSEScanner struct {
// contains filtered or unexported fields
}

func NewSSEScanner(r io.Reader) *SSEScanner

NewSSEScanner creates a new SSE scanner

func (s *SSEScanner) Data() string

Data returns the current event data

func (s *SSEScanner) Err() error

Err returns any scanning error

func (s *SSEScanner) Scan() bool

Scan advances to the next SSE event

StreamChunk represents a batch of tokens with metadata

type StreamChunk struct {
// Content is the accumulated content so far
Content string `json:"content"`
// Delta is the new content in this chunk
Delta string `json:"delta"`
// MediaDelta contains new media content in this chunk (audio, video, images)
// Uses the same MediaContent type as non-streaming messages for API consistency.
MediaDelta *types.MediaContent `json:"media_delta,omitempty"`
// TokenCount is the total number of tokens so far
TokenCount int `json:"token_count"`
// DeltaTokens is the number of tokens in this delta
DeltaTokens int `json:"delta_tokens"`
// ToolCalls contains accumulated tool calls (for assistant messages that invoke tools)
ToolCalls []types.MessageToolCall `json:"tool_calls,omitempty"`
// FinishReason is nil until stream is complete
// Values: "stop", "length", "content_filter", "tool_calls", "error", "validation_failed", "cancelled"
FinishReason *string `json:"finish_reason,omitempty"`
// Interrupted indicates the response was interrupted (e.g., user started speaking)
// When true, clients should clear any buffered audio and prepare for a new response
Interrupted bool `json:"interrupted,omitempty"`
// Error is set if an error occurred during streaming
Error error `json:"error,omitempty"`
// Metadata contains provider-specific metadata
Metadata map[string]interface{} `json:"metadata,omitempty"`
// FinalResult contains the complete execution result (only set in the final chunk)
FinalResult ExecutionResult `json:"final_result,omitempty"`
// CostInfo contains cost breakdown (only present in final chunk when FinishReason != nil)
CostInfo *types.CostInfo `json:"cost_info,omitempty"`
}

StreamEvent is sent to observers for monitoring

type StreamEvent struct {
// Type is the event type: "chunk", "complete", "error"
Type string `json:"type"`
// Chunk contains the stream chunk data
Chunk *StreamChunk `json:"chunk,omitempty"`
// Error is set for error events
Error error `json:"error,omitempty"`
// Timestamp is when the event occurred
Timestamp time.Time `json:"timestamp"`
}

StreamInputSession manages a bidirectional streaming session with a provider. The session allows sending media chunks (e.g., audio from a microphone) and receiving streaming responses from the LLM.

Example usage:

session, err := provider.CreateStreamSession(ctx, StreamInputRequest{
Config: types.StreamingMediaConfig{
Type: types.ContentTypeAudio,
ChunkSize: 8192,
SampleRate: 16000,
Encoding: "pcm",
Channels: 1,
},
})
if err != nil {
return err
}
defer session.Close()
// Send audio chunks in a goroutine
go func() {
for chunk := range micInput {
if err := session.SendChunk(ctx, chunk); err != nil {
log.Printf("send error: %v", err)
break
}
}
}()
// Receive responses
for chunk := range session.Response() {
if chunk.Error != nil {
log.Printf("response error: %v", chunk.Error)
break
}
fmt.Print(chunk.Delta)
}
type StreamInputSession interface {
// SendChunk sends a media chunk to the provider.
// Returns an error if the chunk cannot be sent or the session is closed.
// This method is safe to call from multiple goroutines.
SendChunk(ctx context.Context, chunk *types.MediaChunk) error
// SendText sends a text message to the provider during the streaming session.
// This is useful for sending text prompts or instructions during audio streaming.
// Note: This marks the turn as complete, triggering a response.
SendText(ctx context.Context, text string) error
// SendSystemContext sends a text message as context without completing the turn.
// Use this for system prompts that provide context but shouldn't trigger an immediate response.
// The audio/text that follows will be processed with this context in mind.
SendSystemContext(ctx context.Context, text string) error
// Response returns a receive-only channel for streaming responses.
// The channel is closed when the session ends or encounters an error.
// Consumers should read from this channel in a separate goroutine.
Response() <-chan StreamChunk
// Close ends the streaming session and releases resources.
// After calling Close, SendChunk and SendText will return errors.
// The Response channel will be closed.
// Close is safe to call multiple times.
Close() error
// Error returns any error that occurred during the session.
// Returns nil if no error has occurred.
Error() error
// Done returns a channel that's closed when the session ends.
// This is useful for select statements to detect session completion.
Done() <-chan struct{}
}

StreamInputSupport extends the Provider interface for bidirectional streaming. Providers that implement this interface can handle streaming media input (e.g., real-time audio) and provide streaming responses.

type StreamInputSupport interface {
Provider // Extends the base Provider interface
// CreateStreamSession creates a new bidirectional streaming session.
// The session remains active until Close() is called or an error occurs.
// Returns an error if the provider doesn't support the requested media type.
CreateStreamSession(ctx context.Context, req *StreamingInputConfig) (StreamInputSession, error)
// SupportsStreamInput returns the media types supported for streaming input.
// Common values: types.ContentTypeAudio, types.ContentTypeVideo
SupportsStreamInput() []string
// GetStreamingCapabilities returns detailed information about streaming support.
// This includes supported codecs, sample rates, and other constraints.
GetStreamingCapabilities() StreamingCapabilities
}

StreamObserver receives stream events for monitoring

type StreamObserver interface {
OnChunk(chunk StreamChunk)
OnComplete(totalTokens int, duration time.Duration)
OnError(err error)
}

StreamingCapabilities describes what streaming features a provider supports.

type StreamingCapabilities struct {
// SupportedMediaTypes lists the media types that can be streamed
// Values: types.ContentTypeAudio, types.ContentTypeVideo
SupportedMediaTypes []string `json:"supported_media_types"`
// Audio capabilities
Audio *AudioStreamingCapabilities `json:"audio,omitempty"`
// Video capabilities
Video *VideoStreamingCapabilities `json:"video,omitempty"`
// BidirectionalSupport indicates if the provider supports full bidirectional streaming
BidirectionalSupport bool `json:"bidirectional_support"`
// MaxSessionDuration is the maximum duration for a streaming session (in seconds)
// Zero means no limit
MaxSessionDuration int `json:"max_session_duration,omitempty"`
// MinChunkSize is the minimum chunk size in bytes
MinChunkSize int `json:"min_chunk_size,omitempty"`
// MaxChunkSize is the maximum chunk size in bytes
MaxChunkSize int `json:"max_chunk_size,omitempty"`
}

StreamingInputConfig configures a new streaming input session.

type StreamingInputConfig struct {
// Config specifies the media streaming configuration (codec, sample rate, etc.)
Config types.StreamingMediaConfig `json:"config"`
// SystemInstruction is the system prompt to configure the model's behavior.
// For Gemini Live API, this is included in the setup message.
SystemInstruction string `json:"system_instruction,omitempty"`
// Tools defines functions the model can call during the session.
// When configured, the model returns structured tool calls instead of
// speaking them as text. Supported by Gemini Live API.
Tools []StreamingToolDefinition `json:"tools,omitempty"`
// Metadata contains provider-specific session configuration
// Example: {"response_modalities": ["TEXT", "AUDIO"]} for Gemini
Metadata map[string]interface{} `json:"metadata,omitempty"`
}

func (r *StreamingInputConfig) Validate() error

Validate checks if the StreamInputRequest is valid

StreamingToolDefinition represents a function/tool available in streaming sessions.

type StreamingToolDefinition struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"` // JSON Schema
}

ToolDescriptor represents a tool that can be used by providers

type ToolDescriptor struct {
Name string `json:"name"`
Description string `json:"description"`
InputSchema json.RawMessage `json:"input_schema"`
OutputSchema json.RawMessage `json:"output_schema"`
}

ToolResponse represents a single tool execution result.

type ToolResponse struct {
ToolCallID string `json:"tool_call_id"`
Result string `json:"result"`
IsError bool `json:"is_error,omitempty"` // True if the tool execution failed
}

ToolResponseSupport is an optional interface for streaming sessions that support tool calling. When the model returns a tool call, the caller can execute the tool and send the result back using this interface. The session will then continue generating a response based on the tool result.

Use type assertion to check if a StreamInputSession supports this interface:

if toolSession, ok := session.(ToolResponseSupport); ok {
err := toolSession.SendToolResponse(ctx, toolCallID, result)
}
type ToolResponseSupport interface {
// SendToolResponse sends the result of a tool execution back to the model.
// The toolCallID must match the ID from the MessageToolCall.
// The result is typically JSON-encoded but the format depends on the tool.
// After receiving the tool response, the model will continue generating.
SendToolResponse(ctx context.Context, toolCallID string, result string) error
// SendToolResponses sends multiple tool results at once (for parallel tool calls).
// This is more efficient than sending individual responses for providers that
// support batched tool responses.
SendToolResponses(ctx context.Context, responses []ToolResponse) error
}

ToolResult represents the result of a tool execution This is an alias to types.MessageToolResult for provider-specific context

type ToolResult = types.MessageToolResult

ToolSupport interface for providers that support tool/function calling

type ToolSupport interface {
Provider // Extends the base Provider interface
// BuildTooling converts tool descriptors to provider-native format.
// Returns a provider-specific type that should be passed to PredictWithTools.
BuildTooling(descriptors []*ToolDescriptor) (ProviderTools, error)
// PredictWithTools performs a predict request with tool support.
// The tools parameter should be the value returned by BuildTooling.
PredictWithTools(
ctx context.Context,
req PredictionRequest,
tools ProviderTools,
toolChoice string,
) (PredictionResponse, []types.MessageToolCall, error)
// PredictStreamWithTools performs a streaming predict request with tool support.
// The tools parameter should be the value returned by BuildTooling.
PredictStreamWithTools(
ctx context.Context,
req PredictionRequest,
tools ProviderTools,
toolChoice string,
) (<-chan StreamChunk, error)
}

UnsupportedContentError is returned when a provider doesn’t support certain content types

type UnsupportedContentError struct {
Provider string // Provider ID
ContentType string // "image", "audio", "video", or "multimodal"
Message string // Human-readable error message
PartIndex int // Index of the unsupported content part (if applicable)
MIMEType string // Specific MIME type that's unsupported (if applicable)
}

func (e *UnsupportedContentError) Error() string

UnsupportedProviderError is returned when a provider type is not recognized

type UnsupportedProviderError struct {
ProviderType string
}

func (e *UnsupportedProviderError) Error() string

Error returns the error message for this unsupported provider error.

ValidationAbortError is returned when a streaming validator aborts a stream

type ValidationAbortError struct {
Reason string
Chunk StreamChunk
}

func (e *ValidationAbortError) Error() string

Error returns the error message for this validation abort error.

VideoResolution represents a video resolution.

type VideoResolution struct {
Width int `json:"width"`
Height int `json:"height"`
}

func (r VideoResolution) String() string

String returns a string representation of the resolution (e.g., “1920x1080”)

VideoStreamingCapabilities describes video streaming support.

type VideoStreamingCapabilities struct {
// SupportedEncodings lists supported video encodings
// Common values: "h264", "vp8", "vp9", "av1"
SupportedEncodings []string `json:"supported_encodings"`
// SupportedResolutions lists supported resolutions (width x height)
SupportedResolutions []VideoResolution `json:"supported_resolutions"`
// SupportedFrameRates lists supported frame rates
// Common values: 15, 24, 30, 60
SupportedFrameRates []int `json:"supported_frame_rates"`
// PreferredEncoding is the recommended encoding
PreferredEncoding string `json:"preferred_encoding"`
// PreferredResolution is the recommended resolution
PreferredResolution VideoResolution `json:"preferred_resolution"`
// PreferredFrameRate is the recommended frame rate
PreferredFrameRate int `json:"preferred_frame_rate"`
}
import "github.com/AltairaLabs/PromptKit/runtime/recording"

Package recording provides session recording export and import for replay and analysis.

Package recording provides session recording export, import, and replay.

EventIterator provides iteration over events in time order.

type EventIterator struct {
// contains filtered or unexported fields
}

func (it *EventIterator) Next() (RecordedEvent, bool)

Next returns the next event and true, or false if no more events.

ExportOptions configures the export process.

type ExportOptions struct {
// ProviderName to include in metadata.
ProviderName string
// Model to include in metadata.
Model string
// Custom metadata to include.
Custom map[string]any
}

Format specifies the recording file format.

type Format string

const (
// FormatJSON uses JSON encoding (human-readable, larger files).
FormatJSON Format = "json"
// FormatJSONLines uses JSON Lines encoding (streamable, one event per line).
FormatJSONLines Format = "jsonl"
)

MessageSnapshot represents a message at a point in time.

type MessageSnapshot struct {
Role string
Content string
Timestamp time.Time
Offset time.Duration
}

Metadata contains session-level information.

type Metadata struct {
// SessionID is the unique identifier for this session.
SessionID string `json:"session_id"`
// ConversationID groups related turns within a session.
ConversationID string `json:"conversation_id,omitempty"`
// StartTime is when the session began.
StartTime time.Time `json:"start_time"`
// EndTime is when the session ended.
EndTime time.Time `json:"end_time"`
// Duration is the total session length.
Duration time.Duration `json:"duration"`
// EventCount is the total number of events.
EventCount int `json:"event_count"`
// ProviderName is the LLM provider used (e.g., "openai", "gemini").
ProviderName string `json:"provider_name,omitempty"`
// Model is the model identifier used.
Model string `json:"model,omitempty"`
// Version is the recording format version.
Version string `json:"version"`
// CreatedAt is when this recording was exported.
CreatedAt time.Time `json:"created_at"`
// Custom allows arbitrary metadata to be attached.
Custom map[string]any `json:"custom,omitempty"`
}

PlaybackState represents the state at a given playback position.

type PlaybackState struct {
// Position is the current offset from session start.
Position time.Duration
// Timestamp is the absolute timestamp at this position.
Timestamp time.Time
// CurrentEvents are events occurring at exactly this position (within tolerance).
CurrentEvents []RecordedEvent
// RecentEvents are events that occurred in the last `window` duration.
RecentEvents []RecordedEvent
// ActiveAnnotations are annotations whose time range includes this position.
ActiveAnnotations []*annotations.Annotation
// Messages accumulated up to this point.
Messages []MessageSnapshot
// AudioInputActive indicates if user audio is present at this position.
AudioInputActive bool
// AudioOutputActive indicates if assistant audio is present at this position.
AudioOutputActive bool
}

RecordedEvent wraps an event with additional recording-specific data.

type RecordedEvent struct {
// Sequence is the event's position in the recording.
Sequence int64 `json:"seq"`
// ParentSequence links to a parent event (for causality).
ParentSequence int64 `json:"parent_seq,omitempty"`
// Type is the event type.
Type events.EventType `json:"type"`
// Timestamp is when the event occurred.
Timestamp time.Time `json:"timestamp"`
// Offset is the time since session start.
Offset time.Duration `json:"offset"`
// SessionID identifies the session.
SessionID string `json:"session_id"`
// ConversationID identifies the conversation within the session.
ConversationID string `json:"conversation_id,omitempty"`
// RunID identifies the specific run/request.
RunID string `json:"run_id,omitempty"`
// DataType is the Go type name of the original data.
DataType string `json:"data_type,omitempty"`
// Data is the event payload as raw JSON.
Data json.RawMessage `json:"data,omitempty"`
}

ReplayPlayer provides synchronized playback of session recordings with event correlation. It allows seeking to any position and retrieving events/annotations at that time.

type ReplayPlayer struct {
// contains filtered or unexported fields
}

func NewReplayPlayer(rec *SessionRecording) (*ReplayPlayer, error)

NewReplayPlayer creates a new replay player for the given recording.

func (rp *ReplayPlayer) Advance(duration time.Duration) []RecordedEvent

Advance moves the position forward by the specified duration and returns any events that occurred during that interval.

func (rp *ReplayPlayer) AdvanceTo(target time.Duration) []RecordedEvent

AdvanceTo moves to the specified position and returns events encountered.

func (rp *ReplayPlayer) Duration() time.Duration

Duration returns the total recording duration.

func (rp *ReplayPlayer) FormatPosition() string

FormatPosition returns a human-readable position string.

func (rp *ReplayPlayer) GetEventsByType(eventType events.EventType) []RecordedEvent

GetEventsByType returns all events of the specified type.

func (rp *ReplayPlayer) GetEventsInRange(start, end time.Duration) []RecordedEvent

GetEventsInRange returns all events within the specified time range.

func (rp *ReplayPlayer) GetState() *PlaybackState

GetState returns the playback state at the current position.

func (rp *ReplayPlayer) GetStateAt(offset time.Duration) *PlaybackState

GetStateAt returns the playback state at the specified offset.

func (rp *ReplayPlayer) NewEventIterator(start, end time.Duration) *EventIterator

NewEventIterator creates an iterator over events from start to end.

func (rp *ReplayPlayer) Position() time.Duration

Position returns the current playback position.

func (rp *ReplayPlayer) Recording() *SessionRecording

Recording returns the underlying session recording.

func (rp *ReplayPlayer) Seek(offset time.Duration)

Seek moves the playback position to the specified offset from session start.

func (rp *ReplayPlayer) SetAnnotations(anns []*annotations.Annotation)

SetAnnotations adds annotations for correlation during playback.

func (rp *ReplayPlayer) Timeline() *events.MediaTimeline

Timeline returns the media timeline for audio access.

SessionRecording is a self-contained artifact for replay and analysis. It contains all information needed to replay a session without access to the original event store.

type SessionRecording struct {
// Metadata about the recording
Metadata Metadata `json:"metadata"`
// Events in chronological order
Events []RecordedEvent `json:"events"`
}

func Export(ctx context.Context, store events.EventStore, sessionID string) (*SessionRecording, error)

Export creates a SessionRecording from stored events.

func ExportWithOptions(ctx context.Context, store events.EventStore, sessionID string, opts ExportOptions) (*SessionRecording, error)

ExportWithOptions creates a SessionRecording with additional metadata.

func Load(path string) (*SessionRecording, error)

Load reads a recording from a file. Supports multiple formats: - JSON: Full SessionRecording struct - JSONL (SessionRecording): First line is {“type”:“metadata”,…}, subsequent lines are {“type”:“event”,…} - JSONL (EventStore): Lines are {“seq”:N,“event”:{…}} format from FileEventStore

func (r *SessionRecording) Duration() time.Duration

Duration returns the total duration of the recording.

func (r *SessionRecording) SaveTo(path string, format Format) error

SaveTo writes the recording to a file.

func (r *SessionRecording) String() string

String returns a human-readable summary of the recording.

func (r *SessionRecording) ToEvents() []*events.Event

ToEvents converts recorded events back to Event objects. Note: Data is left as nil since concrete types cannot be recovered without deserialization. Use ToTypedEvents() for full data recovery.

func (r *SessionRecording) ToMediaTimeline(blobStore events.BlobStore) (*events.MediaTimeline, error)

ToMediaTimeline creates a MediaTimeline from this recording for audio/video reconstruction. The blobStore is optional and used for loading external blob references (nil for inline data only).

func (r *SessionRecording) ToTypedEvents() ([]*events.Event, error)

ToTypedEvents converts recorded events back to Event objects with properly typed Data fields. This enables reconstruction of audio/video tracks via MediaTimeline.

import "github.com/AltairaLabs/PromptKit/runtime/statestore"

Package statestore provides conversation state persistence and management.

ErrInvalidID is returned when an invalid conversation ID is provided.

var ErrInvalidID = errors.New("invalid conversation ID")

ErrInvalidState is returned when a conversation state is invalid.

var ErrInvalidState = errors.New("invalid conversation state")

ErrNotFound is returned when a conversation doesn’t exist in the store.

var ErrNotFound = errors.New("conversation not found")

ConversationState represents stored conversation state in the state store. This is the primary data structure for persisting and loading conversation history.

type ConversationState struct {
ID string // Unique conversation identifier
UserID string // User who owns this conversation
Messages []types.Message // Message history (using unified types.Message)
SystemPrompt string // System prompt for this conversation
Summaries []Summary // Compressed summaries of old turns
TokenCount int // Total tokens in messages
LastAccessedAt time.Time // Last time conversation was accessed
Metadata map[string]interface{} // Arbitrary metadata (e.g., extracted context)
}

ListOptions provides filtering and pagination options for listing conversations.

type ListOptions struct {
// UserID filters conversations by the user who owns them.
// If empty, all conversations are returned (subject to pagination).
UserID string
// Limit is the maximum number of conversation IDs to return.
// If 0, a default limit (e.g., 100) should be applied.
Limit int
// Offset is the number of conversations to skip (for pagination).
Offset int
// SortBy specifies the field to sort by (e.g., "created_at", "updated_at").
// If empty, implementation-specific default sorting is used.
SortBy string
// SortOrder specifies sort direction: "asc" or "desc".
// If empty, defaults to "desc" (newest first).
SortOrder string
}

MemoryStore provides an in-memory implementation of the Store interface. It is thread-safe and suitable for development, testing, and single-instance deployments. For distributed systems, use RedisStore or a database-backed implementation.

type MemoryStore struct {
// contains filtered or unexported fields
}

func NewMemoryStore() *MemoryStore

NewMemoryStore creates a new in-memory state store.

func (s *MemoryStore) Delete(ctx context.Context, id string) error

Delete removes a conversation state by ID.

func (s *MemoryStore) Fork(ctx context.Context, sourceID, newID string) error

Fork creates a copy of an existing conversation state with a new ID.

func (s *MemoryStore) List(ctx context.Context, opts ListOptions) ([]string, error)

List returns conversation IDs matching the given criteria.

func (s *MemoryStore) Load(ctx context.Context, id string) (*ConversationState, error)

Load retrieves a conversation state by ID. Returns a deep copy to prevent external mutations.

func (s *MemoryStore) Save(ctx context.Context, state *ConversationState) error

Save persists a conversation state. If it already exists, it will be updated.

RedisOption configures a RedisStore.

type RedisOption func(*RedisStore)

func WithPrefix(prefix string) RedisOption

WithPrefix sets the key prefix for Redis keys. Default is “promptkit”.

func WithTTL(ttl time.Duration) RedisOption

WithTTL sets the time-to-live for conversation states. After this duration, conversations will be automatically deleted. Default is 24 hours. Set to 0 for no expiration.

RedisStore provides a Redis-backed implementation of the Store interface. It uses JSON serialization for state storage and supports automatic TTL-based cleanup. This implementation is suitable for distributed systems and production deployments.

type RedisStore struct {
// contains filtered or unexported fields
}

func NewRedisStore(client *redis.Client, opts ...RedisOption) *RedisStore

NewRedisStore creates a new Redis-backed state store.

Example:

store := NewRedisStore(
redis.NewClient(&redis.Options{Addr: "localhost:6379"}),
WithTTL(24 * time.Hour),
WithPrefix("myapp"),
)

func (s *RedisStore) Delete(ctx context.Context, id string) error

Delete removes a conversation state from Redis.

func (s *RedisStore) Fork(ctx context.Context, sourceID, newID string) error

Fork creates a copy of an existing conversation state with a new ID.

func (s *RedisStore) List(ctx context.Context, opts ListOptions) ([]string, error)

List returns conversation IDs matching the given criteria.

func (s *RedisStore) Load(ctx context.Context, id string) (*ConversationState, error)

Load retrieves a conversation state by ID from Redis.

func (s *RedisStore) Save(ctx context.Context, state *ConversationState) error

Save persists a conversation state to Redis with TTL.

Store defines the interface for persistent conversation state storage.

type Store interface {
// Load retrieves conversation state by ID
Load(ctx context.Context, id string) (*ConversationState, error)
// Save persists conversation state
Save(ctx context.Context, state *ConversationState) error
// Fork creates a copy of an existing conversation state with a new ID
// The original conversation is left unchanged. Returns ErrNotFound if sourceID doesn't exist.
Fork(ctx context.Context, sourceID, newID string) error
}

Summary represents a compressed version of conversation turns. Used to maintain context while reducing token count for older conversations.

type Summary struct {
StartTurn int // First turn included in this summary
EndTurn int // Last turn included in this summary
Content string // Summarized content
TokenCount int // Token count of the summary
CreatedAt time.Time // When this summary was created
}
import "github.com/AltairaLabs/PromptKit/runtime/storage"

MediaMetadata contains metadata about stored media for organization and policy enforcement. This metadata is used to organize media files in storage and apply retention policies.

type MediaMetadata struct {
// RunID identifies the test run that generated this media
RunID string `json:"run_id"`
// ConversationID identifies the conversation containing this media
ConversationID string `json:"conversation_id,omitempty"`
// SessionID identifies the session (for streaming sessions)
SessionID string `json:"session_id,omitempty"`
// MessageIdx is the index of the message containing this media (0-based)
MessageIdx int `json:"message_idx"`
// PartIdx is the index of the content part containing this media (0-based)
PartIdx int `json:"part_idx"`
// MIMEType is the media MIME type (e.g., "image/jpeg", "audio/mp3")
MIMEType string `json:"mime_type"`
// SizeBytes is the size of the media content in bytes
SizeBytes int64 `json:"size_bytes"`
// ProviderID identifies the provider that generated this media
ProviderID string `json:"provider_id,omitempty"`
// Timestamp is when the media was stored
Timestamp time.Time `json:"timestamp"`
// PolicyName is the retention policy to apply to this media
PolicyName string `json:"policy_name,omitempty"`
}

MediaStorageService defines the interface for storing and retrieving media content. Implementations may store media in local filesystem, cloud storage, or other backends.

Example usage:

storage := local.NewFileStore("/var/promptkit/media")
ref, err := storage.StoreMedia(ctx, mediaContent, metadata)
if err != nil {
return err
}
// Later...
content, err := storage.RetrieveMedia(ctx, ref)

Implementations should be safe for concurrent use by multiple goroutines.

type MediaStorageService interface {
// StoreMedia stores media content and returns a storage reference.
// The reference can be used to retrieve the media later.
//
// Parameters:
// - ctx: Context for cancellation and timeouts
// - content: The media content to store (must have Data, FilePath, or URL set)
// - metadata: Metadata about the media for organization and policies
//
// Returns:
// - Reference that can be used to retrieve the media
// - Error if storage fails
//
// The implementation should:
// - Validate the content and metadata
// - Store the media content durably
// - Apply any configured policies (e.g., retention)
// - Return a reference that uniquely identifies the stored media
StoreMedia(ctx context.Context, content *types.MediaContent, metadata *MediaMetadata) (Reference, error)
// RetrieveMedia retrieves media content by its storage reference.
//
// Parameters:
// - ctx: Context for cancellation and timeouts
// - reference: The storage reference returned by StoreMedia
//
// Returns:
// - MediaContent with FilePath set (Data should NOT be loaded into memory)
// - Error if retrieval fails or reference is invalid
//
// The implementation should:
// - Validate the reference
// - Return MediaContent with FilePath pointing to the stored media
// - NOT load the full media data into memory (caller can use GetBase64Data if needed)
RetrieveMedia(ctx context.Context, reference Reference) (*types.MediaContent, error)
// DeleteMedia deletes media content by its storage reference.
//
// Parameters:
// - ctx: Context for cancellation and timeouts
// - reference: The storage reference to delete
//
// Returns:
// - Error if deletion fails or reference is invalid
//
// The implementation should:
// - Validate the reference
// - Delete the media content if not referenced elsewhere (for dedup)
// - Clean up any associated metadata
// - Handle concurrent deletions safely
DeleteMedia(ctx context.Context, reference Reference) error
// GetURL returns a URL that can be used to access the media.
// For local storage, this returns a file:// URL.
// For cloud storage, this may return a signed URL with expiration.
//
// Parameters:
// - ctx: Context for cancellation and timeouts
// - reference: The storage reference
// - expiry: How long the URL should be valid (ignored for local storage)
//
// Returns:
// - URL string that can be used to access the media
// - Error if URL generation fails or reference is invalid
GetURL(ctx context.Context, reference Reference, expiry time.Duration) (string, error)
}

OrganizationMode defines how media files are organized in storage.

type OrganizationMode string

const (
// OrganizationBySession organizes media by session ID
OrganizationBySession OrganizationMode = "by-session"
// OrganizationByConversation organizes media by conversation ID
OrganizationByConversation OrganizationMode = "by-conversation"
// OrganizationByRun organizes media by run ID
OrganizationByRun OrganizationMode = "by-run"
)

PolicyHandler defines the interface for applying and enforcing storage policies. Policies control media retention, cleanup, and other lifecycle management.

Example usage:

policy := policy.NewTimeBasedPolicy()
err := policy.ApplyPolicy(ctx, "/path/to/media.jpg", "delete-after-10min")
if err != nil {
return err
}
// Background enforcement
go func() {
ticker := time.NewTicker(1 * time.Minute)
for range ticker.C {
policy.EnforcePolicy(ctx)
}
}()
type PolicyHandler interface {
// ApplyPolicy applies a named policy to a media file.
// This typically stores policy metadata alongside the media.
//
// Parameters:
// - ctx: Context for cancellation and timeouts
// - filePath: Path to the media file
// - policyName: Name of the policy to apply (e.g., "delete-after-10min", "retain-30days")
//
// Returns:
// - Error if policy application fails or policy is unknown
ApplyPolicy(ctx context.Context, filePath string, policyName string) error
// EnforcePolicy scans stored media and enforces policies.
// This is typically called periodically in the background.
//
// Parameters:
// - ctx: Context for cancellation and timeouts
//
// Returns:
// - Error if enforcement fails (should log but not crash on individual file errors)
//
// The implementation should:
// - Scan media directories for policy metadata
// - Apply policies (e.g., delete expired files)
// - Log enforcement actions
// - Handle errors gracefully (don't stop on permission denied, etc.)
EnforcePolicy(ctx context.Context) error
}

Reference is a reference to media stored in a backend. The format and meaning is backend-specific.

type Reference string
import "github.com/AltairaLabs/PromptKit/runtime/streaming"

Package streaming provides generic utilities for bidirectional streaming communication with LLM providers.

This package extracts common patterns used in duplex (bidirectional) streaming conversations, including:

  • Response processing state machine for handling provider responses
  • Tool execution interface for streaming tool calls
  • Audio streaming utilities for sending audio chunks to providers
  • Response collection patterns for managing streaming responses

The package is designed to be provider-agnostic, working with any provider that implements the runtime/providers streaming interfaces.

The response state machine (ProcessResponseElement) analyzes stream elements and determines appropriate actions:

  • Continue: informational element, keep waiting
  • Complete: turn finished with valid response
  • Error: error or unexpected empty response
  • ToolCalls: tool calls need execution

The ToolExecutor interface allows custom tool registry implementations to be plugged in. The package provides helpers for sending tool results back through the streaming pipeline.

AudioStreamer provides utilities for streaming audio data in either burst mode (all at once) or real-time mode (paced to match playback speed).

ResponseCollector manages the goroutine pattern for collecting streaming responses from a provider session, with optional tool call handling.

Default audio configuration constants

const (
// DefaultChunkSize is the default audio chunk size in bytes.
// 640 bytes = 20ms at 16kHz 16-bit mono (16000 * 2 * 0.02)
DefaultChunkSize = 640
// DefaultSampleRate is the default audio sample rate in Hz.
// 16kHz is required by Gemini Live API.
DefaultSampleRate = 16000
// DefaultChunkIntervalMs is the default interval between chunks in milliseconds
// when streaming in real-time mode.
DefaultChunkIntervalMs = 20
)

Default image streaming configuration constants.

const (
// DefaultTargetFPS is the default target frame rate for image streaming.
// 1 FPS is suitable for most LLM vision scenarios.
DefaultTargetFPS = 1.0
// DefaultImageQuality is the default JPEG quality (1-100).
DefaultImageQuality = 85
)

Default video streaming configuration constants.

const (
// DefaultChunkDurationMs is the default video chunk duration in milliseconds.
// 1000ms (1 second) chunks provide good balance between latency and efficiency.
DefaultChunkDurationMs = 1000
)

ErrEmptyResponse is returned when a response element has no content. This typically indicates an interrupted response that wasn’t properly handled.

var ErrEmptyResponse = errors.New("empty response, likely interrupted")

ErrSessionEnded is returned when the streaming session has ended. This is not necessarily an error, just indicates the session is complete.

var ErrSessionEnded = errors.New("session ended")

func BuildToolResponseElement(result *ToolExecutionResult) stage.StreamElement

BuildToolResponseElement creates a stream element containing tool results. This element can be sent through the pipeline to: 1. Forward tool responses to the provider (via metadata[“tool_responses”]) 2. Capture tool results in the state store (via metadata[“tool_result_messages”])

func DrainStaleMessages(outputChan <-chan stage.StreamElement) (int, error)

DrainStaleMessages removes any buffered messages from the output channel. This is useful for clearing state between turns.

Returns the number of messages drained, or an error if the session ended.

func ExecuteAndSend(ctx context.Context, executor ToolExecutor, toolCalls []types.MessageToolCall, inputChan chan<- stage.StreamElement) error

ExecuteAndSend is a convenience function that executes tool calls and sends the results through the pipeline in one operation.

If the executor is nil, this function returns nil (no-op).

func SendEndOfStream(ctx context.Context, inputChan chan<- stage.StreamElement) error

SendEndOfStream signals that audio input is complete for the current turn. This triggers the provider to generate a response.

func SendImageEndOfStream(ctx context.Context, output chan<- stage.StreamElement) error

SendImageEndOfStream signals that image/frame input is complete for the current turn. This triggers the provider to generate a response.

func SendToolResults(ctx context.Context, result *ToolExecutionResult, inputChan chan<- stage.StreamElement) error

SendToolResults sends tool execution results back through the pipeline to the provider, and includes tool result messages for state store capture.

This matches the behavior of non-streaming mode where tool results are stored as messages. The tool result messages are sent via inputChan with metadata, and DuplexProviderStage forwards them to output for state store capture.

func SendVideoEndOfStream(ctx context.Context, output chan<- stage.StreamElement) error

SendVideoEndOfStream signals that video input is complete for the current turn. This triggers the provider to generate a response.

func WaitForResponse(ctx context.Context, responseDone <-chan error) error

WaitForResponse waits for the response collection to complete. This is a convenience function for blocking until a response is received.

AudioStreamer provides utilities for streaming audio data through a pipeline.

type AudioStreamer struct {
// ChunkSize is the number of bytes per chunk.
ChunkSize int
// ChunkIntervalMs is the interval between chunks in milliseconds
// when streaming in real-time mode.
ChunkIntervalMs int
}

func NewAudioStreamer() *AudioStreamer

NewAudioStreamer creates a new audio streamer with default settings.

func (a *AudioStreamer) SendChunk(ctx context.Context, chunk []byte, sampleRate int, inputChan chan<- stage.StreamElement) error

SendChunk sends a single audio chunk through the pipeline.

func (a *AudioStreamer) StreamBurst(ctx context.Context, audioData []byte, sampleRate int, inputChan chan<- stage.StreamElement) error

StreamBurst sends all audio data as fast as possible without pacing. This is preferred for pre-recorded audio to avoid false turn detections from natural speech pauses.

The provider receives all audio before detecting any turn boundaries, which prevents “user interrupted” signals from arriving mid-utterance.

func (a *AudioStreamer) StreamRealtime(ctx context.Context, audioData []byte, sampleRate int, inputChan chan<- stage.StreamElement) error

StreamRealtime sends audio data paced to match real-time playback. Each chunk is sent with a delay matching its duration.

Note: This mode can cause issues with some providers (like Gemini) that detect speech pauses mid-utterance. Use StreamBurst for pre-recorded audio.

ImageStreamer provides utilities for streaming image frames through a pipeline. Use this for realtime video scenarios like webcam feeds or screen sharing.

type ImageStreamer struct {
// TargetFPS is the target frame rate for realtime streaming.
// Default: 1.0 (1 frame per second).
TargetFPS float64
}

func NewImageStreamer(targetFPS float64) *ImageStreamer

NewImageStreamer creates a new image streamer with the specified target FPS. Use targetFPS of 0 or less for default (1.0 FPS).

func (s *ImageStreamer) SendFrame(ctx context.Context, data []byte, mimeType string, frameNum int64, timestamp time.Time, output chan<- stage.StreamElement) error

SendFrame sends a single image frame through the pipeline without pacing. This is the burst mode equivalent - sends immediately without delay.

Parameters:

  • data: Raw image data (JPEG, PNG, etc.)
  • mimeType: MIME type of the image (e.g., “image/jpeg”)
  • frameNum: Sequence number for ordering
  • timestamp: When the frame was captured
  • output: Pipeline input channel

func (*ImageStreamer) SendFrameWithDimensions

Section titled “func (*ImageStreamer) SendFrameWithDimensions”
func (s *ImageStreamer) SendFrameWithDimensions(ctx context.Context, data []byte, mimeType string, width, height int, frameNum int64, timestamp time.Time, output chan<- stage.StreamElement) error

SendFrameWithDimensions sends a frame with explicit width and height. Use this when dimensions are known to avoid decoding overhead downstream.

func (s *ImageStreamer) StreamFramesBurst(ctx context.Context, frames [][]byte, mimeType string, output chan<- stage.StreamElement) error

StreamFramesBurst sends all frames as fast as possible without pacing. Use this for pre-recorded frame sequences where real-time pacing isn’t needed.

func (*ImageStreamer) StreamFramesRealtime

Section titled “func (*ImageStreamer) StreamFramesRealtime”
func (s *ImageStreamer) StreamFramesRealtime(ctx context.Context, frames [][]byte, mimeType string, output chan<- stage.StreamElement) error

StreamFramesRealtime sends frames paced to match the target FPS. Use this for simulating real-time playback of pre-recorded frames.

ResponseAction indicates what action to take after processing a response element.

type ResponseAction int

const (
// ResponseActionContinue means the element was informational (e.g., interruption signal),
// and we should continue waiting for the final response.
ResponseActionContinue ResponseAction = iota
// ResponseActionComplete means we received a complete response.
ResponseActionComplete
// ResponseActionError means an error occurred or the response was empty.
ResponseActionError
// ResponseActionToolCalls means the response contains tool calls that need to be executed.
ResponseActionToolCalls
)

func ProcessResponseElement(elem *stage.StreamElement, logPrefix string) (ResponseAction, error)

ProcessResponseElement handles a response element from the pipeline, determining the appropriate action based on interruption signals, turn completion, and errors.

This is the core state machine for duplex streaming response handling. It consolidates the response handling logic needed for bidirectional streaming.

Returns:

  • ResponseAction: what action to take
  • error: any error to return (only set when action is ResponseActionError)

func (a ResponseAction) String() string

String returns a human-readable representation of the action.

ResponseCollector manages response collection from a streaming session. It processes streaming elements, handles tool calls, and signals completion.

type ResponseCollector struct {
// contains filtered or unexported fields
}

func NewResponseCollector(config ResponseCollectorConfig) *ResponseCollector

NewResponseCollector creates a new response collector with the given configuration.

func (c *ResponseCollector) Start(ctx context.Context, outputChan <-chan stage.StreamElement, inputChan chan<- stage.StreamElement) <-chan error

Start begins collecting responses in a goroutine. Returns a channel that receives nil on success or an error on failure.

The collector will: 1. Process incoming stream elements 2. Execute tool calls via the ToolExecutor (if configured) 3. Send tool results back through inputChan 4. Signal completion or error through the returned channel

ResponseCollectorConfig configures response collection behavior.

type ResponseCollectorConfig struct {
// ToolExecutor is called when tool calls are received.
// If nil, tool calls will result in an error.
ToolExecutor ToolExecutor
// LogPrefix is prepended to log messages for identification.
LogPrefix string
}

ToolExecutionResult contains the results of executing tool calls.

type ToolExecutionResult struct {
// ProviderResponses are formatted for sending back to the streaming provider.
ProviderResponses []providers.ToolResponse
// ResultMessages are formatted for state store capture,
// matching the behavior of non-streaming tool execution.
ResultMessages []types.Message
}

ToolExecutor executes tool calls and returns results. Implementations provide the actual tool registry integration.

type ToolExecutor interface {
// Execute runs the given tool calls and returns their results.
// The implementation is responsible for handling execution errors
// and formatting them appropriately in the result.
Execute(ctx context.Context, toolCalls []types.MessageToolCall) (*ToolExecutionResult, error)
}

VideoChunk represents a video chunk with metadata for batch streaming.

type VideoChunk struct {
Data []byte
IsKeyFrame bool
Timestamp time.Time
Duration time.Duration
}

VideoStreamer provides utilities for streaming video chunks through a pipeline. Use this for encoded video segments (H.264, VP8, etc.) rather than individual frames. For individual image frames, use ImageStreamer instead.

type VideoStreamer struct {
// ChunkDurationMs is the target duration of each video chunk in milliseconds.
// Default: 1000 (1 second).
ChunkDurationMs int
}

func NewVideoStreamer(chunkDurationMs int) *VideoStreamer

NewVideoStreamer creates a new video streamer with the specified chunk duration. Use chunkDurationMs of 0 or less for default (1000ms).

func (s *VideoStreamer) SendChunk(ctx context.Context, data []byte, mimeType string, chunkIndex int, isKeyFrame bool, timestamp time.Time, output chan<- stage.StreamElement) error

SendChunk sends a single video chunk through the pipeline.

Parameters:

  • data: Encoded video data (H.264, VP8, etc.)
  • mimeType: MIME type of the video (e.g., “video/h264”, “video/webm”)
  • chunkIndex: Sequence number for ordering
  • isKeyFrame: True if this chunk contains a keyframe (important for decoding)
  • timestamp: When the chunk was captured/created
  • output: Pipeline input channel

func (*VideoStreamer) SendChunkWithDimensions

Section titled “func (*VideoStreamer) SendChunkWithDimensions”
func (s *VideoStreamer) SendChunkWithDimensions(ctx context.Context, data []byte, mimeType string, width, height int, frameRate float64, chunkIndex int, isKeyFrame bool, timestamp time.Time, duration time.Duration, output chan<- stage.StreamElement) error

SendChunkWithDimensions sends a video chunk with explicit dimensions and frame rate. Use this when video metadata is known to avoid parsing overhead downstream.

func (s *VideoStreamer) StreamChunksBurst(ctx context.Context, chunks []VideoChunk, mimeType string, output chan<- stage.StreamElement) error

StreamChunksBurst sends all video chunks as fast as possible without pacing. Use this for pre-recorded video where real-time pacing isn’t needed.

func (*VideoStreamer) StreamChunksRealtime

Section titled “func (*VideoStreamer) StreamChunksRealtime”
func (s *VideoStreamer) StreamChunksRealtime(ctx context.Context, chunks []VideoChunk, mimeType string, output chan<- stage.StreamElement) error

StreamChunksRealtime sends video chunks paced according to their duration. Use this for simulating real-time playback of pre-recorded video.

import "github.com/AltairaLabs/PromptKit/runtime/stt"

Package stt provides speech-to-text services for converting audio to text.

The package defines a common Service interface that abstracts STT providers, enabling voice AI applications to transcribe speech from users.

The package provides:

  • Service interface for STT providers
  • TranscriptionConfig for audio format configuration
  • Multiple provider implementations (OpenAI Whisper, etc.)

Basic usage with OpenAI Whisper:

service := stt.NewOpenAI(os.Getenv("OPENAI_API_KEY"))
text, err := service.Transcribe(ctx, audioData, stt.TranscriptionConfig{
Format: "pcm",
SampleRate: 16000,
Channels: 1,
Language: "en",
})
if err != nil {
log.Fatal(err)
}
fmt.Println("User said:", text)

The package includes implementations for:

  • OpenAI Whisper (whisper-1 model)
  • More providers can be added following the Service interface

const (
// Default audio settings.
DefaultSampleRate = 16000
DefaultChannels = 1
DefaultBitDepth = 16
// Common audio formats.
FormatPCM = "pcm"
FormatWAV = "wav"
FormatMP3 = "mp3"
)

const (
// ModelWhisper1 is the OpenAI Whisper model for transcription.
ModelWhisper1 = "whisper-1"
)

Common errors for STT services.

var (
// ErrEmptyAudio is returned when audio data is empty.
ErrEmptyAudio = errors.New("audio data is empty")
// ErrRateLimited is returned when the provider rate limits requests.
ErrRateLimited = errors.New("rate limited by provider")
// ErrInvalidFormat is returned when the audio format is not supported.
ErrInvalidFormat = errors.New("unsupported audio format")
// ErrAudioTooShort is returned when audio is too short to transcribe.
ErrAudioTooShort = errors.New("audio too short to transcribe")
)

func WrapPCMAsWAV(pcmData []byte, sampleRate, channels, bitsPerSample int) []byte

WrapPCMAsWAV wraps raw PCM audio data in a WAV header. This is necessary for APIs like OpenAI Whisper that expect file uploads.

Parameters:

  • pcmData: Raw PCM audio bytes (little-endian, signed)
  • sampleRate: Sample rate in Hz (e.g., 16000)
  • channels: Number of channels (1=mono, 2=stereo)
  • bitsPerSample: Bits per sample (typically 16)

Returns a byte slice containing WAV-formatted audio.

OpenAIOption configures the OpenAI STT service.

type OpenAIOption func(*OpenAIService)

func WithOpenAIBaseURL(url string) OpenAIOption

WithOpenAIBaseURL sets a custom base URL (for testing or proxies).

func WithOpenAIClient(client *http.Client) OpenAIOption

WithOpenAIClient sets a custom HTTP client.

func WithOpenAIModel(model string) OpenAIOption

WithOpenAIModel sets the STT model to use.

OpenAIService implements STT using OpenAI’s Whisper API.

type OpenAIService struct {
// contains filtered or unexported fields
}

func NewOpenAI(apiKey string, opts ...OpenAIOption) *OpenAIService

NewOpenAI creates an OpenAI STT service using Whisper.

func (s *OpenAIService) Name() string

Name returns the provider identifier.

func (s *OpenAIService) SupportedFormats() []string

SupportedFormats returns audio formats supported by OpenAI Whisper.

func (s *OpenAIService) Transcribe(ctx context.Context, audio []byte, config TranscriptionConfig) (string, error)

Transcribe converts audio to text using OpenAI’s Whisper API.

Service transcribes audio to text. This interface abstracts different STT providers (OpenAI Whisper, Google, etc.) enabling voice AI applications to use any provider interchangeably.

type Service interface {
// Name returns the provider identifier (for logging/debugging).
Name() string
// Transcribe converts audio to text.
// Returns the transcribed text or an error if transcription fails.
Transcribe(ctx context.Context, audio []byte, config TranscriptionConfig) (string, error)
// SupportedFormats returns supported audio input formats.
// Common values: "pcm", "wav", "mp3", "m4a", "webm"
SupportedFormats() []string
}

TranscriptionConfig configures speech-to-text transcription.

type TranscriptionConfig struct {
// Format is the audio format ("pcm", "wav", "mp3").
// Default: "pcm"
Format string
// SampleRate is the audio sample rate in Hz.
// Default: 16000
SampleRate int
// Channels is the number of audio channels (1=mono, 2=stereo).
// Default: 1
Channels int
// BitDepth is the bits per sample for PCM audio.
// Default: 16
BitDepth int
// Language is a hint for the transcription language (e.g., "en", "es").
// Optional - improves accuracy if provided.
Language string
// Model is the STT model to use (provider-specific).
// For OpenAI: "whisper-1"
Model string
// Prompt is a text prompt to guide transcription (provider-specific).
// Can improve accuracy for domain-specific vocabulary.
Prompt string
}

func DefaultTranscriptionConfig() TranscriptionConfig

DefaultTranscriptionConfig returns sensible defaults for transcription.

TranscriptionError represents an error during transcription.

type TranscriptionError struct {
// Provider is the STT provider name.
Provider string
// Code is the provider-specific error code.
Code string
// Message is a human-readable error message.
Message string
// Cause is the underlying error, if any.
Cause error
// Retryable indicates whether the request can be retried.
Retryable bool
}

func NewTranscriptionError(provider, code, message string, cause error, retryable bool) *TranscriptionError

NewTranscriptionError creates a new TranscriptionError.

func (e *TranscriptionError) Error() string

Error implements the error interface.

func (e *TranscriptionError) Is(target error) bool

Is implements error matching for errors.Is.

func (e *TranscriptionError) Unwrap() error

Unwrap returns the underlying error.

import "github.com/AltairaLabs/PromptKit/runtime/telemetry"

Package telemetry provides OpenTelemetry export for session recordings. This enables exporting session events as distributed traces to observability platforms.

EventConverter converts runtime events to OTLP spans.

type EventConverter struct {
// Resource is the resource to attach to spans.
Resource *Resource
}

func NewEventConverter(resource *Resource) *EventConverter

NewEventConverter creates a new event converter.

func (c *EventConverter) ConvertSession(sessionID string, sessionEvents []events.Event) ([]*Span, error)

ConvertSession converts a session’s events to spans. The session becomes the root span, with pipeline/middleware/provider calls as child spans.

Exporter exports session events to an observability backend.

type Exporter interface {
// Export sends events to the backend.
Export(ctx context.Context, spans []*Span) error
// Shutdown performs cleanup and flushes any pending data.
Shutdown(ctx context.Context) error
}

HTTPClient interface for testing. NOSONAR(godre:S8196) - HTTPClient is clearer than Doer for this use case.

type HTTPClient interface {
Do(req *http.Request) (*http.Response, error)
}

OTLPExporter exports spans to an OTLP endpoint over HTTP.

type OTLPExporter struct {
// contains filtered or unexported fields
}

func NewOTLPExporter(endpoint string, opts ...OTLPExporterOption) *OTLPExporter

NewOTLPExporter creates a new OTLP exporter.

func (e *OTLPExporter) Export(ctx context.Context, spans []*Span) error

Export sends spans to the OTLP endpoint.

func (e *OTLPExporter) Shutdown(ctx context.Context) error

Shutdown flushes pending spans and closes the exporter.

OTLPExporterOption configures an OTLPExporter.

type OTLPExporterOption func(*OTLPExporter)

func WithBatchSize(size int) OTLPExporterOption

WithBatchSize sets the batch size for exports.

func WithHTTPClient(client HTTPClient) OTLPExporterOption

WithHTTPClient sets a custom HTTP client.

func WithHeaders(headers map[string]string) OTLPExporterOption

WithHeaders sets custom headers for OTLP requests.

func WithResource(resource *Resource) OTLPExporterOption

WithResource sets the resource for exported spans.

Resource represents the entity producing telemetry.

type Resource struct {
// Attributes are key-value pairs describing the resource.
Attributes map[string]interface{} `json:"attributes"`
}

func DefaultResource() *Resource

DefaultResource returns a default resource for PromptKit.

Span represents a trace span in OpenTelemetry format.

type Span struct {
// TraceID is the unique identifier for the trace (16 bytes, hex-encoded).
TraceID string `json:"traceId"`
// SpanID is the unique identifier for this span (8 bytes, hex-encoded).
SpanID string `json:"spanId"`
// ParentSpanID is the ID of the parent span (empty for root spans).
ParentSpanID string `json:"parentSpanId,omitempty"`
// Name is the operation name.
Name string `json:"name"`
// Kind is the span kind (client, server, producer, consumer, internal).
Kind SpanKind `json:"kind"`
// StartTime is when the span started.
StartTime time.Time `json:"startTimeUnixNano"`
// EndTime is when the span ended.
EndTime time.Time `json:"endTimeUnixNano"`
// Attributes are key-value pairs associated with the span.
Attributes map[string]interface{} `json:"attributes,omitempty"`
// Status is the span status.
Status *SpanStatus `json:"status,omitempty"`
// Events are timestamped events within the span.
Events []*SpanEvent `json:"events,omitempty"`
}

SpanEvent represents an event within a span.

type SpanEvent struct {
// Name is the event name.
Name string `json:"name"`
// Time is when the event occurred.
Time time.Time `json:"timeUnixNano"`
// Attributes are key-value pairs associated with the event.
Attributes map[string]interface{} `json:"attributes,omitempty"`
}

SpanKind represents the type of span.

type SpanKind int

Span kinds.

const (
SpanKindUnspecified SpanKind = 0
SpanKindInternal SpanKind = 1
SpanKindServer SpanKind = 2
SpanKindClient SpanKind = 3
SpanKindProducer SpanKind = 4
SpanKindConsumer SpanKind = 5
)

SpanStatus represents the status of a span.

type SpanStatus struct {
// Code is the status code (0=Unset, 1=Ok, 2=Error).
Code StatusCode `json:"code"`
// Message is the status message.
Message string `json:"message,omitempty"`
}

StatusCode represents the status of a span.

type StatusCode int

Status codes.

const (
StatusCodeUnset StatusCode = 0
StatusCodeOk StatusCode = 1
StatusCodeError StatusCode = 2
)
import "github.com/AltairaLabs/PromptKit/runtime/template"

Package template provides template rendering and variable substitution.

This package implements a flexible template system that can be used by both prompts and personas. It supports:

  • Variable substitution with {{variable}} syntax
  • Recursive template resolution (variables can contain other variables)
  • Validation of required variables
  • Detection of unresolved placeholders

Future versions may support more advanced templating engines like Go templates (similar to Helm charts) for conditional logic, loops, and functions.

func GetUsedVars(vars map[string]string) []string

GetUsedVars returns a list of variable names that had non-empty values. This is useful for debugging and logging which variables were actually used.

Renderer handles variable substitution in templates

type Renderer struct {
}

func NewRenderer() *Renderer

NewRenderer creates a new template renderer

func (r *Renderer) MergeVars(varMaps ...map[string]string) map[string]string

MergeVars merges multiple variable maps with later maps taking precedence. This is useful for combining default values, context variables, and overrides.

Example:

defaults := map[string]string{"color": "blue", "size": "medium"}
overrides := map[string]string{"color": "red"}
result := MergeVars(defaults, overrides)
// result = {"color": "red", "size": "medium"}

func (r *Renderer) Render(templateText string, vars map[string]string) (string, error)

Render applies variable substitution to the template with recursive resolution.

The renderer performs multiple passes (up to maxPasses) to handle nested variable substitution. For example, if var1=“{{var2}}” and var2=“value”, the final result will correctly resolve to “value”.

Returns an error if any placeholders remain unresolved after all passes.

func (r *Renderer) ValidateRequiredVars(requiredVars []string, vars map[string]string) error

ValidateRequiredVars checks that all required variables are provided and non-empty. Returns an error listing any missing variables.

import "github.com/AltairaLabs/PromptKit/runtime/tokenizer"

Package tokenizer provides token counting functionality for LLM context management.

Token counting is essential for managing context windows and ensuring prompts fit within model limits. This package provides:

  • TokenCounter interface for pluggable implementations
  • HeuristicTokenCounter with model-aware word-to-token ratios
  • Support for different model families (GPT, Claude, Gemini, etc.)

The heuristic approach is suitable for context truncation decisions where approximate counts are sufficient. For exact token counts (billing, etc.), use provider-specific CostInfo from API responses.

DefaultTokenCounter is a package-level counter using the default model family. Use this when you don’t need model-specific tokenization.

var DefaultTokenCounter = NewHeuristicTokenCounter(ModelFamilyDefault)

func CountTokens(text string) int

CountTokens is a convenience function using the default token counter.

HeuristicTokenCounter estimates token counts using word-based heuristics. This is fast and suitable for context management decisions where exact counts are not required. For accurate counts, use a tokenizer library like tiktoken-go.

type HeuristicTokenCounter struct {
// contains filtered or unexported fields
}

func NewHeuristicTokenCounter(family ModelFamily) *HeuristicTokenCounter

NewHeuristicTokenCounter creates a token counter for the specified model family.

func NewHeuristicTokenCounterWithRatio(ratio float64) *HeuristicTokenCounter

NewHeuristicTokenCounterWithRatio creates a token counter with a custom ratio. Use this when you have measured the actual token ratio for your specific use case.

func (*HeuristicTokenCounter) CountMultiple

Section titled “func (*HeuristicTokenCounter) CountMultiple”
func (h *HeuristicTokenCounter) CountMultiple(texts []string) int

CountMultiple returns the total token count for multiple text segments.

func (h *HeuristicTokenCounter) CountTokens(text string) int

CountTokens estimates token count for the given text. Returns 0 for empty text.

func (h *HeuristicTokenCounter) Ratio() float64

Ratio returns the current token ratio. Thread-safe.

func (h *HeuristicTokenCounter) SetRatio(ratio float64)

SetRatio updates the token ratio. Thread-safe.

ModelFamily represents a family of LLM models with similar tokenization.

type ModelFamily string

const (
// ModelFamilyGPT covers OpenAI GPT models (GPT-3.5, GPT-4, etc.)
// Uses cl100k_base tokenizer - approximately 1.3 tokens per word for English.
ModelFamilyGPT ModelFamily = "gpt"
// ModelFamilyClaude covers Anthropic Claude models.
// Similar to GPT tokenization - approximately 1.3 tokens per word.
ModelFamilyClaude ModelFamily = "claude"
// ModelFamilyGemini covers Google Gemini models.
// Uses SentencePiece tokenizer - approximately 1.4 tokens per word.
ModelFamilyGemini ModelFamily = "gemini"
// ModelFamilyLlama covers Meta Llama models.
// Uses SentencePiece tokenizer - approximately 1.4 tokens per word.
ModelFamilyLlama ModelFamily = "llama"
// ModelFamilyDefault is used when the model family is unknown.
// Uses a conservative estimate of 1.35 tokens per word.
ModelFamilyDefault ModelFamily = "default"
)

func GetModelFamily(modelName string) ModelFamily

GetModelFamily returns the appropriate ModelFamily for a model name. This performs prefix matching to categorize models.

TokenCounter provides token counting functionality. Implementations may use heuristics or actual tokenization.

type TokenCounter interface {
// CountTokens returns the estimated or actual token count for the given text.
CountTokens(text string) int
// CountMultiple returns the total token count for multiple text segments.
CountMultiple(texts []string) int
}

func NewTokenCounterForModel(modelName string) TokenCounter

NewTokenCounterForModel creates a token counter appropriate for the given model.

import "github.com/AltairaLabs/PromptKit/runtime/tools"

Package tools provides tool/function calling infrastructure for LLM testing.

This package implements a flexible tool execution system with:

  • Tool descriptor registry with JSON Schema validation
  • Mock executors for testing (static and template-based)
  • HTTP executor for live API calls
  • Type coercion and result validation
  • Adapter for prompt registry integration

Tools can be loaded from YAML/JSON files and executed with argument validation, result schema checking, and automatic type coercion for common mismatches.

Sentinel errors for tool operations.

var (
// ErrToolNotFound is returned when a requested tool is not found in the registry.
ErrToolNotFound = errors.New("tool not found")
// ErrToolNameRequired is returned when registering a tool without a name.
ErrToolNameRequired = errors.New("tool name is required")
// ErrToolDescriptionRequired is returned when registering a tool without a description.
ErrToolDescriptionRequired = errors.New("tool description is required")
// ErrInputSchemaRequired is returned when registering a tool without an input schema.
ErrInputSchemaRequired = errors.New("input schema is required")
// ErrOutputSchemaRequired is returned when registering a tool without an output schema.
ErrOutputSchemaRequired = errors.New("output schema is required")
// ErrInvalidToolMode is returned when a tool has an invalid mode.
ErrInvalidToolMode = errors.New("mode must be 'mock', 'live', 'mcp', or a registered executor name")
// ErrMockExecutorOnly is returned when a non-mock tool is passed to a mock executor.
ErrMockExecutorOnly = errors.New("executor can only execute mock tools")
// ErrMCPExecutorOnly is returned when a non-mcp tool is passed to an MCP executor.
ErrMCPExecutorOnly = errors.New("MCP executor can only execute mcp tools")
)

AsyncToolExecutor is a tool that can return pending status instead of blocking. Tools that require human approval or external async operations should implement this.

type AsyncToolExecutor interface {
Executor // Still implements the basic Executor interface
// ExecuteAsync may return immediately with a pending status
ExecuteAsync(descriptor *ToolDescriptor, args json.RawMessage) (*ToolExecutionResult, error)
}

Coercion represents a type coercion that was performed

type Coercion struct {
Path string `json:"path"`
From any `json:"from"`
To any `json:"to"`
}

Executor interface defines how tools are executed

type Executor interface {
Execute(descriptor *ToolDescriptor, args json.RawMessage) (json.RawMessage, error)
Name() string
}

FileToolResponseRepository implements ToolResponseRepository using the provider’s MockConfig YAML structure. This allows Arena scenarios to define tool responses alongside LLM responses.

type FileToolResponseRepository struct {
// contains filtered or unexported fields
}

func NewFileToolResponseRepository(scenarioID string, toolResponses map[string][]MockToolResponseConfig) *FileToolResponseRepository

NewFileToolResponseRepository creates a repository from scenario tool responses. This is typically used by Arena to provide tool mocking from YAML scenarios.

func (*FileToolResponseRepository) GetToolResponse

Section titled “func (*FileToolResponseRepository) GetToolResponse”
func (r *FileToolResponseRepository) GetToolResponse(toolName string, args map[string]any, contextKey string) (*ToolResponseData, error)

GetToolResponse implements ToolResponseRepository. It finds the first matching response based on argument comparison.

HTTPConfig defines configuration for live HTTP tool execution

type HTTPConfig struct {
URL string `json:"url" yaml:"url"`
Method string `json:"method" yaml:"method"`
HeadersFromEnv []string `json:"headers_from_env,omitempty" yaml:"headers_from_env,omitempty"`
TimeoutMs int `json:"timeout_ms" yaml:"timeout_ms"`
Redact []string `json:"redact,omitempty" yaml:"redact,omitempty"`
Headers map[string]string `json:"headers,omitempty" yaml:"headers,omitempty"`
}

InMemoryToolResponseRepository implements ToolResponseRepository using in-memory storage. This is useful for SDK unit tests and programmatic configuration of tool responses.

type InMemoryToolResponseRepository struct {
// contains filtered or unexported fields
}

func NewInMemoryToolResponseRepository() *InMemoryToolResponseRepository

NewInMemoryToolResponseRepository creates a new in-memory tool response repository.

func (*InMemoryToolResponseRepository) AddResponse

Section titled “func (*InMemoryToolResponseRepository) AddResponse”
func (r *InMemoryToolResponseRepository) AddResponse(contextKey, toolName string, response *ToolResponseData)

AddResponse adds a tool response for a specific context and tool name. This method supports simple responses where argument matching is not needed.

func (*InMemoryToolResponseRepository) GetToolResponse

Section titled “func (*InMemoryToolResponseRepository) GetToolResponse”
func (r *InMemoryToolResponseRepository) GetToolResponse(toolName string, args map[string]any, contextKey string) (*ToolResponseData, error)

GetToolResponse implements ToolResponseRepository. For simplicity, this implementation only matches by tool name and context, not by arguments. For argument-based matching, use FileToolResponseRepository or implement a custom repository.

MCPExecutor executes tools using MCP (Model Context Protocol) servers

type MCPExecutor struct {
// contains filtered or unexported fields
}

func NewMCPExecutor(registry mcp.Registry) *MCPExecutor

NewMCPExecutor creates a new MCP executor

func (e *MCPExecutor) Execute(descriptor *ToolDescriptor, args json.RawMessage) (json.RawMessage, error)

Execute executes a tool using an MCP server

func (e *MCPExecutor) Name() string

Name returns the executor name

MockScriptedExecutor executes tools using templated mock data

type MockScriptedExecutor struct{}

func NewMockScriptedExecutor() *MockScriptedExecutor

NewMockScriptedExecutor creates a new scripted mock executor

func (e *MockScriptedExecutor) Execute(descriptor *ToolDescriptor, args json.RawMessage) (json.RawMessage, error)

Execute executes a tool using templated mock data

func (e *MockScriptedExecutor) Name() string

Name returns the executor name

MockStaticExecutor executes tools using static mock data

type MockStaticExecutor struct{}

func NewMockStaticExecutor() *MockStaticExecutor

NewMockStaticExecutor creates a new static mock executor

func (e *MockStaticExecutor) Execute(descriptor *ToolDescriptor, args json.RawMessage) (json.RawMessage, error)

Execute executes a tool using static mock data

func (e *MockStaticExecutor) Name() string

Name returns the executor name

MockToolErrorConfig represents an error configuration.

type MockToolErrorConfig struct {
Type string `yaml:"type"`
Message string `yaml:"message"`
}

MockToolResponseConfig represents a single tool response configuration.

type MockToolResponseConfig struct {
CallArgs map[string]any `yaml:"call_args"`
Result any `yaml:"result,omitempty"`
Error *MockToolErrorConfig `yaml:"error,omitempty"`
}

PendingToolInfo provides context for middleware (email templates, notifications)

type PendingToolInfo struct {
// Reason for pending (e.g., "requires_approval", "waiting_external_api")
Reason string `json:"reason"`
// Human-readable description
Message string `json:"message"`
// Tool details (for middleware to use in notifications)
ToolName string `json:"tool_name"`
Args json.RawMessage `json:"args"`
// Optional: expiration, callback URL, etc.
ExpiresAt *time.Time `json:"expires_at,omitempty"`
CallbackURL string `json:"callback_url,omitempty"`
// Arbitrary metadata for custom middleware
Metadata map[string]any `json:"metadata,omitempty"`
}

PredictMessage represents a predict message (simplified version for tool context)

type PredictMessage struct {
Role string `json:"role"`
Content string `json:"content"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolCallResponseID string `json:"tool_call_id,omitempty"` // For tool result messages
}

PredictionRequest represents a predict request (extending existing type)

type PredictionRequest struct {
System string `json:"system"`
Messages []PredictMessage `json:"messages"`
Temperature float32 `json:"temperature"`
TopP float32 `json:"top_p"`
MaxTokens int `json:"max_tokens"`
Seed *int `json:"seed,omitempty"`
}

PredictionResponse represents a predict response (extending existing type)

type PredictionResponse struct {
Content string `json:"content"`
TokensIn int `json:"tokens_in"`
TokensOut int `json:"tokens_out"`
Latency time.Duration `json:"latency"`
Raw []byte `json:"raw,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"` // Tools called in this response
}

Registry manages tool descriptors and provides access to executors

type Registry struct {
// contains filtered or unexported fields
}

func NewRegistry() *Registry

NewRegistry creates a new tool registry without a repository backend (legacy mode)

func NewRegistryWithRepository(repository ToolRepository) *Registry

NewRegistryWithRepository creates a new tool registry with a repository backend

func (r *Registry) Execute(toolName string, args json.RawMessage) (*ToolResult, error)

Execute executes a tool with the given arguments

func (r *Registry) ExecuteAsync(toolName string, args json.RawMessage) (*ToolExecutionResult, error)

ExecuteAsync executes a tool with async support, checking if it implements AsyncToolExecutor. Returns ToolExecutionResult with status (complete/pending/failed).

func (r *Registry) Get(name string) *ToolDescriptor

Get retrieves a tool descriptor by name with repository fallback

func (r *Registry) GetTool(name string) (*ToolDescriptor, error)

GetTool retrieves a tool descriptor by name

func (r *Registry) GetTools() map[string]*ToolDescriptor

GetTools returns all loaded tool descriptors

func (r *Registry) GetToolsByNames(names []string) ([]*ToolDescriptor, error)

GetToolsByNames returns tool descriptors for the specified names

func (r *Registry) List() []string

List returns all tool names from repository or cache

func (r *Registry) LoadToolFromBytes(filename string, data []byte) error

LoadToolFromBytes loads a tool descriptor from raw bytes data. This is useful when tool data has already been read from a file or received from another source, avoiding redundant file I/O. The filename parameter is used only for error reporting.

func (r *Registry) Register(descriptor *ToolDescriptor) error

Register adds a tool descriptor to the registry with validation

func (r *Registry) RegisterExecutor(executor Executor)

RegisterExecutor registers a tool executor

RepositoryToolExecutor wraps existing tool executors to provide repository-backed mock responses with fallback to real execution. This enables deterministic tool testing while maintaining the ability to fall back to real tool execution when needed.

type RepositoryToolExecutor struct {
// contains filtered or unexported fields
}

func NewRepositoryToolExecutor(baseExecutor Executor, repo ToolResponseRepository, contextKey string) *RepositoryToolExecutor

NewRepositoryToolExecutor creates a new repository-backed tool executor. The executor will first check the repository for configured responses, and fall back to the base executor if no match is found.

func (e *RepositoryToolExecutor) Execute(descriptor *ToolDescriptor, args json.RawMessage) (json.RawMessage, error)

Execute executes a tool, first checking the repository for mock responses. If a matching response is found in the repository, it returns that response. Otherwise, it falls back to the base executor for real execution.

func (e *RepositoryToolExecutor) Name() string

Name returns the executor name with repository suffix.

SchemaValidator handles JSON schema validation for tool inputs and outputs

type SchemaValidator struct {
// contains filtered or unexported fields
}

func NewSchemaValidator() *SchemaValidator

NewSchemaValidator creates a new schema validator

func (sv *SchemaValidator) CoerceResult(descriptor *ToolDescriptor, result json.RawMessage) (json.RawMessage, []Coercion, error)

CoerceResult attempts to coerce simple type mismatches in tool results

func (sv *SchemaValidator) ValidateArgs(descriptor *ToolDescriptor, args json.RawMessage) error

ValidateArgs validates tool arguments against the input schema

func (sv *SchemaValidator) ValidateResult(descriptor *ToolDescriptor, result json.RawMessage) error

ValidateResult validates tool result against the output schema

ToolCall represents a tool invocation request

type ToolCall struct {
Name string `json:"name"`
Args json.RawMessage `json:"args"`
ID string `json:"id"` // Provider-specific call ID
}

ToolConfig represents a K8s-style tool configuration manifest

type ToolConfig struct {
APIVersion string `json:"apiVersion" yaml:"apiVersion"`
Kind string `json:"kind" yaml:"kind"`
Metadata metav1.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"`
Spec ToolDescriptor `json:"spec" yaml:"spec"`
}

ToolDescriptor represents a normalized tool definition

type ToolDescriptor struct {
Name string `json:"name" yaml:"name"`
Description string `json:"description" yaml:"description"`
InputSchema json.RawMessage `json:"input_schema" yaml:"input_schema"` // JSON Schema Draft-07
OutputSchema json.RawMessage `json:"output_schema" yaml:"output_schema"` // JSON Schema Draft-07
Mode string `json:"mode" yaml:"mode"` // "mock" | "live"
TimeoutMs int `json:"timeout_ms" yaml:"timeout_ms"`
// Static mock data (in-memory)
MockResult json.RawMessage `json:"mock_result,omitempty" yaml:"mock_result,omitempty"`
// Template for dynamic mocks (inline or file)
MockTemplate string `json:"mock_template,omitempty" yaml:"mock_template,omitempty"`
MockResultFile string `json:"mock_result_file,omitempty" yaml:"mock_result_file,omitempty"`
MockTemplateFile string `json:"mock_template_file,omitempty" yaml:"mock_template_file,omitempty"`
HTTPConfig *HTTPConfig `json:"http,omitempty" yaml:"http,omitempty"` // Live HTTP configuration
}

ToolErrorData represents an error response for tool execution.

type ToolErrorData struct {
Type string `json:"type"` // Error type/category
Message string `json:"message"` // Error message
}

ToolExecutionResult includes status and optional pending information

type ToolExecutionResult struct {
Status ToolExecutionStatus `json:"status"`
Content json.RawMessage `json:"content,omitempty"`
Error string `json:"error,omitempty"`
// Present when Status == ToolStatusPending
PendingInfo *PendingToolInfo `json:"pending_info,omitempty"`
}

ToolExecutionStatus represents whether a tool completed or needs external input

type ToolExecutionStatus string

const (
// ToolStatusComplete indicates the tool finished executing
ToolStatusComplete ToolExecutionStatus = "complete"
// ToolStatusPending indicates the tool is waiting for external input (e.g., human approval)
ToolStatusPending ToolExecutionStatus = "pending"
// ToolStatusFailed indicates the tool execution failed
ToolStatusFailed ToolExecutionStatus = "failed"
)

ToolGuidance provides hints for different interaction modes This is a flexible structure that can be extended with task-specific guidance

type ToolGuidance struct {
Support string `json:"support,omitempty"`
Assistant string `json:"assistant,omitempty"`
Generic string `json:"generic,omitempty"`
}

ToolPolicy defines constraints for tool usage in scenarios

type ToolPolicy struct {
ToolChoice string `json:"tool_choice"` // "auto" | "required" | "none"
MaxToolCallsPerTurn int `json:"max_tool_calls_per_turn"`
MaxTotalToolCalls int `json:"max_total_tool_calls"`
Blocklist []string `json:"blocklist,omitempty"`
}

ToolRepository provides abstract access to tool descriptors (local interface to avoid import cycles)

type ToolRepository interface {
LoadTool(name string) (*ToolDescriptor, error)
ListTools() ([]string, error)
SaveTool(descriptor *ToolDescriptor) error
}

ToolResponseData represents a configured tool response with optional error.

type ToolResponseData struct {
Result any `json:"result,omitempty"` // Successful response data
Error *ToolErrorData `json:"error,omitempty"` // Error response
}

ToolResponseRepository defines the interface for repositories that can provide mock tool responses based on tool name, arguments, and context.

type ToolResponseRepository interface {
// GetToolResponse retrieves a mock response for a tool execution.
// Returns nil if no matching response is configured (not an error).
GetToolResponse(toolName string, args map[string]any, contextKey string) (*ToolResponseData, error)
}

ToolResult represents the result of a tool execution

type ToolResult struct {
Name string `json:"name"`
ID string `json:"id"` // Matches ToolCall.ID
Result json.RawMessage `json:"result"`
LatencyMs int64 `json:"latency_ms"`
Error string `json:"error,omitempty"`
}

ToolStats tracks tool usage statistics

type ToolStats struct {
TotalCalls int `json:"total_calls"`
ByTool map[string]int `json:"by_tool"`
}

ValidationError represents a tool validation failure

type ValidationError struct {
Type string `json:"type"` // "args_invalid" | "result_invalid" | "policy_violation"
Tool string `json:"tool"`
Detail string `json:"detail"`
Path string `json:"path,omitempty"`
}

func (e *ValidationError) Error() string

Error implements the error interface

import "github.com/AltairaLabs/PromptKit/runtime/tts"

Package tts provides text-to-speech services. This file contains WebSocket streaming implementation for Cartesia TTS. It is excluded from coverage testing due to the difficulty of mocking WebSocket connections.

Package tts provides text-to-speech services for converting text responses to audio.

The package defines a common Service interface that abstracts TTS providers, enabling voice AI applications to convert text-only LLM responses to speech.

The package provides:

  • Service interface for TTS providers
  • SynthesisConfig for voice/format configuration
  • Voice and AudioFormat types for provider capabilities
  • Multiple provider implementations (OpenAI, ElevenLabs, etc.)

Basic usage with OpenAI TTS:

service := tts.NewOpenAI(os.Getenv("OPENAI_API_KEY"))
reader, err := service.Synthesize(ctx, "Hello world", tts.SynthesisConfig{
Voice: "alloy",
Format: tts.FormatMP3,
})
if err != nil {
log.Fatal(err)
}
defer reader.Close()
// Stream audio to speaker or save to file
io.Copy(audioOutput, reader)

For low-latency applications, use StreamingService:

streamer := tts.NewCartesia(os.Getenv("CARTESIA_API_KEY"))
chunks, err := streamer.SynthesizeStream(ctx, "Hello world", config)
for chunk := range chunks {
// Play audio chunk immediately
speaker.Write(chunk)
}

The package includes implementations for:

  • OpenAI TTS (tts-1, tts-1-hd models)
  • ElevenLabs (high-quality voice cloning)
  • Cartesia (ultra-low latency streaming)
  • Google Cloud Text-to-Speech (multi-language)

const (
// ElevenLabsModelMultilingual is the multilingual v2 model.
ElevenLabsModelMultilingual = "eleven_multilingual_v2"
// ElevenLabsModelTurbo is the fast turbo v2.5 model.
ElevenLabsModelTurbo = "eleven_turbo_v2_5"
// ElevenLabsModelEnglish is the English monolingual v1 model.
ElevenLabsModelEnglish = "eleven_monolingual_v1"
// ElevenLabsModelMultilingualV1 is the older multilingual v1 model.
ElevenLabsModelMultilingualV1 = "eleven_multilingual_v1"
)

const (
// ModelTTS1 is the OpenAI TTS model optimized for speed.
ModelTTS1 = "tts-1"
// ModelTTS1HD is the OpenAI TTS model optimized for quality.
ModelTTS1HD = "tts-1-hd"
)

OpenAI voices.

const (
VoiceAlloy = "alloy" // Neutral voice.
VoiceEcho = "echo" // Male voice.
VoiceFable = "fable" // British accent.
VoiceOnyx = "onyx" // Deep male voice.
VoiceNova = "nova" // Female voice.
VoiceShimmer = "shimmer" // Soft female voice.
)

const (
// CartesiaModelSonic is the latest Sonic model for Cartesia TTS.
CartesiaModelSonic = "sonic-2024-10-01"
)

Common TTS errors.

var (
// ErrInvalidVoice is returned when the requested voice is not available.
ErrInvalidVoice = errors.New("invalid or unsupported voice")
// ErrInvalidFormat is returned when the requested format is not supported.
ErrInvalidFormat = errors.New("invalid or unsupported audio format")
// ErrEmptyText is returned when attempting to synthesize empty text.
ErrEmptyText = errors.New("text cannot be empty")
// ErrSynthesisFailed is returned when TTS synthesis fails.
ErrSynthesisFailed = errors.New("speech synthesis failed")
// ErrRateLimited is returned when API rate limits are exceeded.
ErrRateLimited = errors.New("rate limit exceeded")
// ErrQuotaExceeded is returned when account quota is exceeded.
ErrQuotaExceeded = errors.New("quota exceeded")
// ErrServiceUnavailable is returned when the TTS service is unavailable.
ErrServiceUnavailable = errors.New("TTS service unavailable")
)

Common audio formats.

var (
// FormatMP3 is MP3 format (most compatible).
FormatMP3 = AudioFormat{
Name: "mp3",
MIMEType: "audio/mpeg",
SampleRate: sampleRateDefault,
BitDepth: 0,
Channels: 1,
}
// FormatOpus is Opus format (best for streaming).
FormatOpus = AudioFormat{
Name: "opus",
MIMEType: "audio/opus",
SampleRate: sampleRateDefault,
BitDepth: 0,
Channels: 1,
}
// FormatAAC is AAC format.
FormatAAC = AudioFormat{
Name: "aac",
MIMEType: "audio/aac",
SampleRate: sampleRateDefault,
BitDepth: 0,
Channels: 1,
}
// FormatFLAC is FLAC format (lossless).
FormatFLAC = AudioFormat{
Name: "flac",
MIMEType: "audio/flac",
SampleRate: sampleRateDefault,
BitDepth: bitDepthDefault,
Channels: 1,
}
// FormatPCM16 is raw 16-bit PCM (for processing).
FormatPCM16 = AudioFormat{
Name: "pcm",
MIMEType: "audio/pcm",
SampleRate: sampleRateDefault,
BitDepth: bitDepthDefault,
Channels: 1,
}
// FormatWAV is WAV format (PCM with header).
FormatWAV = AudioFormat{
Name: "wav",
MIMEType: "audio/wav",
SampleRate: sampleRateDefault,
BitDepth: bitDepthDefault,
Channels: 1,
}
)

AudioChunk represents a chunk of synthesized audio data.

type AudioChunk struct {
// Data is the raw audio bytes.
Data []byte
// Index is the chunk sequence number (0-indexed).
Index int
// Final indicates this is the last chunk.
Final bool
// Error is set if an error occurred during synthesis.
Error error
}

AudioFormat describes an audio output format.

type AudioFormat struct {
// Name is the format identifier ("mp3", "opus", "pcm", "aac", "flac").
Name string
// MIMEType is the content type (e.g., "audio/mpeg").
MIMEType string
// SampleRate is the audio sample rate in Hz.
SampleRate int
// BitDepth is the bits per sample (for PCM formats).
BitDepth int
// Channels is the number of audio channels (1=mono, 2=stereo).
Channels int
}

func (f AudioFormat) String() string

String returns the format name.

CartesiaOption configures the Cartesia TTS service.

type CartesiaOption func(*CartesiaService)

func WithCartesiaBaseURL(url string) CartesiaOption

WithCartesiaBaseURL sets a custom base URL.

func WithCartesiaClient(client *http.Client) CartesiaOption

WithCartesiaClient sets a custom HTTP client.

func WithCartesiaModel(model string) CartesiaOption

WithCartesiaModel sets the TTS model.

func WithCartesiaWSURL(url string) CartesiaOption

WithCartesiaWSURL sets a custom WebSocket URL.

CartesiaService implements TTS using Cartesia’s ultra-low latency API. Cartesia specializes in real-time streaming TTS with <100ms first-byte latency.

type CartesiaService struct {
// contains filtered or unexported fields
}

func NewCartesia(apiKey string, opts ...CartesiaOption) *CartesiaService

NewCartesia creates a Cartesia TTS service.

func (s *CartesiaService) Name() string

Name returns the provider identifier.

func (s *CartesiaService) SupportedFormats() []AudioFormat

SupportedFormats returns audio formats supported by Cartesia.

func (s *CartesiaService) SupportedVoices() []Voice

SupportedVoices returns a sample of available Cartesia voices.

func (s *CartesiaService) Synthesize(ctx context.Context, text string, config SynthesisConfig) (io.ReadCloser, error)

Synthesize converts text to audio using Cartesia’s REST API. For streaming output, use SynthesizeStream instead.

func (s *CartesiaService) SynthesizeStream(ctx context.Context, text string, config SynthesisConfig) (<-chan AudioChunk, error)

SynthesizeStream converts text to audio with streaming output via WebSocket. This provides ultra-low latency (<100ms first-byte) for real-time applications.

ElevenLabsOption configures the ElevenLabs TTS service.

type ElevenLabsOption func(*ElevenLabsService)

func WithElevenLabsBaseURL(url string) ElevenLabsOption

WithElevenLabsBaseURL sets a custom base URL.

func WithElevenLabsClient(client *http.Client) ElevenLabsOption

WithElevenLabsClient sets a custom HTTP client.

func WithElevenLabsModel(model string) ElevenLabsOption

WithElevenLabsModel sets the TTS model.

ElevenLabsService implements TTS using ElevenLabs’ API. ElevenLabs specializes in high-quality voice cloning and natural-sounding speech.

type ElevenLabsService struct {
// contains filtered or unexported fields
}

func NewElevenLabs(apiKey string, opts ...ElevenLabsOption) *ElevenLabsService

NewElevenLabs creates an ElevenLabs TTS service.

func (s *ElevenLabsService) Name() string

Name returns the provider identifier.

func (*ElevenLabsService) SupportedFormats

Section titled “func (*ElevenLabsService) SupportedFormats”
func (s *ElevenLabsService) SupportedFormats() []AudioFormat

SupportedFormats returns audio formats supported by ElevenLabs.

func (s *ElevenLabsService) SupportedVoices() []Voice

SupportedVoices returns a sample of available ElevenLabs voices. Note: ElevenLabs has many more voices including custom cloned voices. Use the ElevenLabs API to get a complete list of available voices.

func (s *ElevenLabsService) Synthesize(ctx context.Context, text string, config SynthesisConfig) (io.ReadCloser, error)

Synthesize converts text to audio using ElevenLabs’ TTS API.

OpenAIOption configures the OpenAI TTS service.

type OpenAIOption func(*OpenAIService)

func WithOpenAIBaseURL(url string) OpenAIOption

WithOpenAIBaseURL sets a custom base URL (for testing or proxies).

func WithOpenAIClient(client *http.Client) OpenAIOption

WithOpenAIClient sets a custom HTTP client.

func WithOpenAIModel(model string) OpenAIOption

WithOpenAIModel sets the TTS model to use.

OpenAIService implements TTS using OpenAI’s text-to-speech API.

type OpenAIService struct {
// contains filtered or unexported fields
}

func NewOpenAI(apiKey string, opts ...OpenAIOption) *OpenAIService

NewOpenAI creates an OpenAI TTS service.

func (s *OpenAIService) Name() string

Name returns the provider identifier.

func (s *OpenAIService) SupportedFormats() []AudioFormat

SupportedFormats returns audio formats supported by OpenAI TTS.

func (s *OpenAIService) SupportedVoices() []Voice

SupportedVoices returns available OpenAI voices.

func (s *OpenAIService) Synthesize(ctx context.Context, text string, config SynthesisConfig) (io.ReadCloser, error)

Synthesize converts text to audio using OpenAI’s TTS API.

Service converts text to speech audio. This interface abstracts different TTS providers (OpenAI, ElevenLabs, etc.) enabling voice AI applications to use any provider interchangeably.

type Service interface {
// Name returns the provider identifier (for logging/debugging).
Name() string
// Synthesize converts text to audio.
// Returns a reader for streaming audio data.
// The caller is responsible for closing the reader.
Synthesize(ctx context.Context, text string, config SynthesisConfig) (io.ReadCloser, error)
// SupportedVoices returns available voices for this provider.
SupportedVoices() []Voice
// SupportedFormats returns supported audio output formats.
SupportedFormats() []AudioFormat
}

StreamingService extends Service with streaming synthesis capabilities. Streaming TTS provides lower latency by returning audio chunks as they’re generated.

type StreamingService interface {
Service
// SynthesizeStream converts text to audio with streaming output.
// Returns a channel that receives audio chunks as they're generated.
// The channel is closed when synthesis completes or an error occurs.
SynthesizeStream(ctx context.Context, text string, config SynthesisConfig) (<-chan AudioChunk, error)
}

SynthesisConfig configures text-to-speech synthesis.

type SynthesisConfig struct {
// Voice is the voice ID to use for synthesis.
// Available voices vary by provider - use SupportedVoices() to list options.
Voice string
// Format is the output audio format.
// Default is MP3 for most providers.
Format AudioFormat
// Speed is the speech rate multiplier (0.25-4.0, default 1.0).
// Not all providers support speed adjustment.
Speed float64
// Pitch adjusts the voice pitch (-20 to 20 semitones, default 0).
// Not all providers support pitch adjustment.
Pitch float64
// Language is the language code for synthesis (e.g., "en-US").
// Required for some providers, optional for others.
Language string
// Model is the TTS model to use (provider-specific).
// For OpenAI: "tts-1" (fast) or "tts-1-hd" (high quality).
Model string
}

func DefaultSynthesisConfig() SynthesisConfig

DefaultSynthesisConfig returns sensible defaults for synthesis.

SynthesisError provides detailed error information from TTS providers.

type SynthesisError struct {
// Provider is the TTS provider that returned the error.
Provider string
// Code is the provider-specific error code.
Code string
// Message is the error message.
Message string
// Cause is the underlying error (if any).
Cause error
// Retryable indicates if the error is transient and retry may succeed.
Retryable bool
}

func NewSynthesisError(provider, code, message string, cause error, retryable bool) *SynthesisError

NewSynthesisError creates a new SynthesisError.

func (e *SynthesisError) Error() string

Error implements the error interface.

func (e *SynthesisError) Unwrap() error

Unwrap returns the underlying error.

Voice describes a TTS voice available from a provider.

type Voice struct {
// ID is the provider-specific voice identifier.
ID string
// Name is a human-readable voice name.
Name string
// Language is the primary language code (e.g., "en", "es", "fr").
Language string
// Gender is the voice gender ("male", "female", "neutral").
Gender string
// Description provides additional voice characteristics.
Description string
// Preview is a URL to a voice sample (if available).
Preview string
}
import "github.com/AltairaLabs/PromptKit/runtime/types"

ContentType constants for different content part types

const (
ContentTypeText = "text"
ContentTypeImage = "image"
ContentTypeAudio = "audio"
ContentTypeVideo = "video"
ContentTypeDocument = "document"
)

Common MIME types

const (
MIMETypeImageJPEG = "image/jpeg"
MIMETypeImagePNG = "image/png"
MIMETypeImageGIF = "image/gif"
MIMETypeImageWebP = "image/webp"
MIMETypeAudioMP3 = "audio/mpeg"
MIMETypeAudioWAV = "audio/wav"
MIMETypeAudioOgg = "audio/ogg"
MIMETypeAudioWebM = "audio/webm"
MIMETypeVideoMP4 = "video/mp4"
MIMETypeVideoWebM = "video/webm"
MIMETypeVideoOgg = "video/ogg"
MIMETypePDF = "application/pdf"
MIMETypeDocx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
MIMETypeDoc = "application/msword"
MIMETypeMarkdown = "text/markdown"
MIMETypePlainText = "text/plain"
MIMETypeCSV = "text/csv"
MIMETypeJSON = "application/json"
MIMETypeXML = "application/xml"
)

func CountMediaParts(msg Message) int

CountMediaParts returns the number of media parts (image, audio, video) in a message

func CountPartsByType(msg Message, contentType string) int

CountPartsByType returns the number of parts of a specific type in a message

func ExtractTextContent(msg Message) string

ExtractTextContent extracts all text content from a message, regardless of format. This is useful for backward compatibility when you need just the text.

func HasOnlyTextContent(msg Message) bool

HasOnlyTextContent returns true if the message contains only text (no media)

func MigrateMessagesToLegacy(messages []Message) error

MigrateMessagesToLegacy converts a slice of multimodal messages to legacy format in-place. Returns an error if any message contains media content.

func MigrateMessagesToMultimodal(messages []Message)

MigrateMessagesToMultimodal converts a slice of legacy messages to multimodal format in-place

func MigrateToLegacy(msg *Message) error

MigrateToLegacy converts a multimodal message back to legacy text-only format. This is useful for backward compatibility with systems that don’t support multimodal. Returns an error if the message contains non-text content.

func MigrateToMultimodal(msg *Message)

MigrateToMultimodal converts a legacy text-only message to use the Parts structure. This is useful when transitioning existing code to the new multimodal API.

ChunkReader reads from an io.Reader and produces MediaChunks. Useful for converting continuous streams (e.g., microphone input) into chunks.

Example usage:

reader := NewChunkReader(micInput, config)
for {
chunk, err := reader.NextChunk(ctx)
if err == io.EOF {
break
}
if err != nil {
return err
}
session.SendChunk(ctx, chunk)
}
type ChunkReader struct {
// contains filtered or unexported fields
}

func NewChunkReader(r io.Reader, config StreamingMediaConfig) *ChunkReader

NewChunkReader creates a new ChunkReader that reads from the given reader and produces MediaChunks according to the config.

func (cr *ChunkReader) NextChunk(ctx context.Context) (*MediaChunk, error)

NextChunk reads the next chunk from the reader. Returns io.EOF when the stream is complete. The returned chunk’s IsLast field will be true on the final chunk.

ChunkWriter writes MediaChunks to an io.Writer. Useful for converting chunks back into continuous streams (e.g., speaker output).

Example usage:

writer := NewChunkWriter(speakerOutput)
for chunk := range session.Response() {
if chunk.MediaDelta != nil {
err := writer.WriteChunk(chunk.MediaDelta)
if err != nil {
return err
}
}
}
type ChunkWriter struct {
// contains filtered or unexported fields
}

func NewChunkWriter(w io.Writer) *ChunkWriter

NewChunkWriter creates a new ChunkWriter that writes to the given writer.

func (cw *ChunkWriter) Flush() error

Flush flushes any buffered data to the underlying writer (if it supports flushing).

func (cw *ChunkWriter) WriteChunk(chunk *MediaChunk) (int, error)

WriteChunk writes a MediaChunk to the underlying writer. Returns the number of bytes written and any error encountered.

ContentPart represents a single piece of content in a multimodal message. A message can contain multiple parts: text, images, audio, video, etc.

type ContentPart struct {
Type string `json:"type"` // "text", "image", "audio", "video"
// For text content
Text *string `json:"text,omitempty"`
// For media content (image, audio, video)
Media *MediaContent `json:"media,omitempty"`
}

func NewAudioPart(filePath string) (ContentPart, error)

NewAudioPart creates a ContentPart with audio content from a file path

func NewAudioPartFromData(base64Data, mimeType string) ContentPart

NewAudioPartFromData creates a ContentPart with base64-encoded audio data

func NewDocumentPart(filePath string) (ContentPart, error)

NewDocumentPart creates a ContentPart with document content from a file path

func NewDocumentPartFromData(base64Data, mimeType string) ContentPart

NewDocumentPartFromData creates a ContentPart with base64-encoded document data

func NewImagePart(filePath string, detail *string) (ContentPart, error)

NewImagePart creates a ContentPart with image content from a file path

func NewImagePartFromData(base64Data, mimeType string, detail *string) ContentPart

NewImagePartFromData creates a ContentPart with base64-encoded image data

func NewImagePartFromURL(url string, detail *string) ContentPart

NewImagePartFromURL creates a ContentPart with image content from a URL

func NewTextPart(text string) ContentPart

NewTextPart creates a ContentPart with text content

func NewVideoPart(filePath string) (ContentPart, error)

NewVideoPart creates a ContentPart with video content from a file path

func NewVideoPartFromData(base64Data, mimeType string) ContentPart

NewVideoPartFromData creates a ContentPart with base64-encoded video data

func SplitMultimodalMessage(msg Message) (text string, mediaParts []ContentPart)

SplitMultimodalMessage splits a multimodal message into separate text and media parts. Returns the text content and a slice of media content parts.

func (cp *ContentPart) Validate() error

Validate checks if the ContentPart is valid

CostInfo tracks token usage and associated costs for LLM operations. All cost values are in USD. Used for both individual messages and aggregated tracking.

type CostInfo struct {
InputTokens int `json:"input_tokens"` // Number of input tokens consumed
OutputTokens int `json:"output_tokens"` // Number of output tokens generated
CachedTokens int `json:"cached_tokens,omitempty"` // Number of cached tokens used (reduces cost)
InputCostUSD float64 `json:"input_cost_usd"` // Cost of input tokens in USD
OutputCostUSD float64 `json:"output_cost_usd"` // Cost of output tokens in USD
CachedCostUSD float64 `json:"cached_cost_usd,omitempty"` // Cost savings from cached tokens
TotalCost float64 `json:"total_cost_usd"` // Total cost in USD
}

MediaChunk represents a chunk of streaming media data. Used for bidirectional streaming where media is sent or received in chunks.

Example usage:

chunk := &MediaChunk{
Data: audioData,
SequenceNum: 1,
Timestamp: time.Now(),
IsLast: false,
Metadata: map[string]string{"mime_type": "audio/pcm"},
}
type MediaChunk struct {
// Data contains the raw media bytes for this chunk
Data []byte `json:"data"`
// SequenceNum is the sequence number for ordering chunks (starts at 0)
SequenceNum int64 `json:"sequence_num"`
// Timestamp indicates when this chunk was created
Timestamp time.Time `json:"timestamp"`
// IsLast indicates if this is the final chunk in the stream
IsLast bool `json:"is_last"`
// Metadata contains chunk-specific metadata (MIME type, encoding, etc.)
Metadata map[string]string `json:"metadata,omitempty"`
}

MediaContent represents media data (image, audio, video) in a message. Supports both inline base64 data and external file/URL references.

type MediaContent struct {
// Data source - exactly one should be set
Data *string `json:"data,omitempty"` // Base64-encoded media data
FilePath *string `json:"file_path,omitempty"` // Local file path
URL *string `json:"url,omitempty"` // External URL (http/https)
// Storage backend reference (used when media is externalized)
StorageReference *string `json:"storage_reference,omitempty"` // Backend-specific storage reference
// Media metadata
MIMEType string `json:"mime_type"` // e.g., "image/jpeg", "audio/mp3", "video/mp4"
Format *string `json:"format,omitempty"` // Optional format hint (e.g., "png", "mp3", "mp4")
SizeKB *int64 `json:"size_kb,omitempty"` // Optional size in kilobytes
Detail *string `json:"detail,omitempty"` // Optional detail level for images: "low", "high", "auto"
Caption *string `json:"caption,omitempty"` // Optional caption/description
Duration *int `json:"duration,omitempty"` // Optional duration in seconds (for audio/video)
BitRate *int `json:"bit_rate,omitempty"` // Optional bit rate in kbps (for audio/video)
Channels *int `json:"channels,omitempty"` // Optional number of channels (for audio)
Width *int `json:"width,omitempty"` // Optional width in pixels (for image/video)
Height *int `json:"height,omitempty"` // Optional height in pixels (for image/video)
FPS *int `json:"fps,omitempty"` // Optional frames per second (for video)
PolicyName *string `json:"policy_name,omitempty"` // Retention policy name
}

func (mc *MediaContent) GetBase64Data() (string, error)

GetBase64Data returns the base64-encoded data for this media content. If the data is already base64-encoded, it returns it directly. If the data is from a file, it reads and encodes the file. If the data is from a URL or StorageReference, it returns an error (caller should use MediaLoader).

Deprecated: For new code, use providers.MediaLoader.GetBase64Data which supports all sources including storage references and URLs with proper context handling.

func (mc *MediaContent) ReadData() (io.ReadCloser, error)

ReadData returns an io.Reader for the media content. For base64 data, it decodes and returns a reader. For file paths, it opens and returns the file. For URLs, it returns an error (caller should fetch separately).

func (mc *MediaContent) Validate() error

Validate checks if the MediaContent is valid

MediaItemSummary provides details about a single media item in a message.

type MediaItemSummary struct {
Type string `json:"type"` // Content type: "image", "audio", "video"
Source string `json:"source"` // Source description (file path, URL, or "inline data")
MIMEType string `json:"mime_type"` // MIME type
SizeBytes int `json:"size_bytes"` // Size in bytes (0 if unknown)
Detail string `json:"detail,omitempty"` // Detail level for images
Loaded bool `json:"loaded"` // Whether media was successfully loaded
Error string `json:"error,omitempty"` // Error message if loading failed
}

MediaSummary provides a high-level overview of media content in a message. This is included in JSON output to make multimodal messages more observable.

type MediaSummary struct {
TotalParts int `json:"total_parts"` // Total number of content parts
TextParts int `json:"text_parts"` // Number of text parts
ImageParts int `json:"image_parts"` // Number of image parts
AudioParts int `json:"audio_parts"` // Number of audio parts
VideoParts int `json:"video_parts"` // Number of video parts
DocumentParts int `json:"document_parts"` // Number of document parts
MediaItems []MediaItemSummary `json:"media_items,omitempty"` // Details of each media item
}

Message represents a single message in a conversation. This is the canonical message type used throughout the system.

type Message struct {
Role string `json:"role"` // "system", "user", "assistant", "tool"
Content string `json:"content"` // Message content (legacy text-only, maintained for backward compatibility)
// Multimodal content parts (text, images, audio, video)
// If Parts is non-empty, it takes precedence over Content.
// For backward compatibility, if Parts is empty, Content will be used.
Parts []ContentPart `json:"parts,omitempty"`
// Tool invocations (for assistant messages that call tools)
ToolCalls []MessageToolCall `json:"tool_calls,omitempty"`
// Tool result (for tool role messages)
// When Role="tool", this contains the tool execution result
ToolResult *MessageToolResult `json:"tool_result,omitempty"`
// Source indicates where this message originated (runtime-only, not persisted in JSON)
// Values: "statestore" (loaded from StateStore), "pipeline" (created during execution), "" (user input)
Source string `json:"-"`
// Metadata for observability and tracking
Timestamp time.Time `json:"timestamp,omitempty"` // When the message was created
LatencyMs int64 `json:"latency_ms,omitempty"` // Time taken to generate (for assistant messages)
CostInfo *CostInfo `json:"cost_info,omitempty"` // Token usage and cost tracking
Meta map[string]interface{} `json:"meta,omitempty"` // Custom metadata
// Validation results (for assistant messages)
Validations []ValidationResult `json:"validations,omitempty"`
}

func CloneMessage(msg Message) Message

CloneMessage creates a deep copy of a message

func CombineTextAndMedia(role, text string, mediaParts []ContentPart) Message

CombineTextAndMedia creates a multimodal message from separate text and media parts. This is the inverse of SplitMultimodalMessage.

func ConvertTextToMultimodal(role, content string) Message

ConvertTextToMultimodal is a convenience function that creates a multimodal message from a role and text content. This helps with code migration.

func NewAssistantMessage(content string) Message

NewAssistantMessage creates an assistant message with text content.

func NewMultimodalMessage(role string, parts []ContentPart) Message

NewMultimodalMessage creates a message with multimodal content parts. When using Parts, the Content field is intentionally left empty as GetContent() will extract text from Parts.

func NewSystemMessage(content string) Message

NewSystemMessage creates a system message with text content.

func NewTextMessage(role, content string) Message

NewTextMessage creates a simple text message. Use this for user, assistant, or system messages with text-only content.

func NewToolResultMessage(result MessageToolResult) Message

NewToolResultMessage creates a properly normalized tool result message. This ensures Content and ToolResult.Content are always synchronized, which is required for provider compatibility and consistent behavior.

IMPORTANT: Always use this constructor instead of directly creating Message{Role: “tool”, ToolResult: …} to avoid Content/ToolResult.Content synchronization issues.

func NewUserMessage(content string) Message

NewUserMessage creates a user message with text content.

func (m *Message) AddAudioPart(filePath string) error

AddAudioPart adds an audio content part from a file path

func (m *Message) AddDocumentPart(filePath string) error

AddDocumentPart adds a document content part from a file path

func (m *Message) AddImagePart(filePath string, detail *string) error

AddImagePart adds an image content part from a file path

func (m *Message) AddImagePartFromURL(url string, detail *string)

AddImagePartFromURL adds an image content part from a URL

func (m *Message) AddPart(part ContentPart)

AddPart adds a content part to the message. If this is the first part added, it clears the legacy Content field.

func (m *Message) AddTextPart(text string)

AddTextPart adds a text content part to the message

func (m *Message) AddVideoPart(filePath string) error

AddVideoPart adds a video content part from a file path

func (m *Message) GetContent() string

GetContent returns the content of the message. This is the recommended way to access message content as it handles all cases: 1. For tool messages (Role=“tool”): returns ToolResult.Content (authoritative source) 2. For multimodal messages: returns concatenated text parts 3. For legacy messages: returns the Content field

func (m *Message) HasMediaContent() bool

HasMediaContent returns true if the message contains any media (image, audio, video, document)

func (m *Message) IsMultimodal() bool

IsMultimodal returns true if the message contains multimodal content (Parts)

func (m Message) MarshalJSON() ([]byte, error)

MarshalJSON implements custom JSON marshaling for Message. This enhances the output by: 1. Populating the Content field with a human-readable summary when Parts exist 2. Adding a MediaSummary field for observability of multimodal content 3. Omitting Content field when ToolResult is present to avoid duplication

func (m *Message) SetMultimodalContent(parts []ContentPart)

SetMultimodalContent sets the message content to multimodal parts. This clears the legacy Content field.

func (m *Message) SetTextContent(text string)

SetTextContent sets the message content to simple text. This clears any existing Parts and sets the legacy Content field.

func (m *Message) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom JSON unmarshaling for Message. After unmarshaling, if ToolResult is present, copy its Content to Message.Content for provider compatibility (providers expect Content field to be populated).

MessageToolCall represents a request to call a tool within a Message. The Args field contains the JSON-encoded arguments for the tool.

type MessageToolCall struct {
ID string `json:"id"` // Unique identifier for this tool call
Name string `json:"name"` // Name of the tool to invoke
Args json.RawMessage `json:"args"` // JSON-encoded tool arguments
}

MessageToolResult represents the result of a tool execution in a Message. When embedded in Message, the Message.Role should be “tool”.

type MessageToolResult struct {
ID string `json:"id"` // References the MessageToolCall.ID that triggered this result
Name string `json:"name"` // Tool name that was executed
Content string `json:"content"` // Result content or error message
Error string `json:"error,omitempty"` // Error message if tool execution failed
LatencyMs int64 `json:"latency_ms"` // Tool execution latency in milliseconds
}

StreamingMediaConfig configures streaming media input parameters. Used to configure audio/video streaming sessions with providers.

Example usage for audio streaming:

config := &StreamingMediaConfig{
Type: ContentTypeAudio,
ChunkSize: 8192, // 8KB chunks
SampleRate: 16000, // 16kHz audio
Encoding: "pcm", // Raw PCM audio
Channels: 1, // Mono
BufferSize: 10, // Buffer 10 chunks
}
type StreamingMediaConfig struct {
// Type specifies the media type being streamed
// Values: ContentTypeAudio, ContentTypeVideo
Type string `json:"type"`
// ChunkSize is the target size in bytes for each chunk
// Typical values: 4096-8192 for audio, 32768-65536 for video
ChunkSize int `json:"chunk_size"`
// SampleRate is the audio sample rate in Hz
// Common values: 8000 (phone quality), 16000 (wideband), 44100 (CD quality), 48000 (pro audio)
SampleRate int `json:"sample_rate,omitempty"`
// Encoding specifies the audio encoding format
// Values: "pcm" (raw), "opus", "mp3", "aac"
Encoding string `json:"encoding,omitempty"`
// Channels is the number of audio channels
// Values: 1 (mono), 2 (stereo)
Channels int `json:"channels,omitempty"`
// BitDepth is the audio bit depth in bits
// Common values: 16, 24, 32
BitDepth int `json:"bit_depth,omitempty"`
// Width is the video width in pixels
Width int `json:"width,omitempty"`
// Height is the video height in pixels
Height int `json:"height,omitempty"`
// FrameRate is the video frame rate (FPS)
// Common values: 24, 30, 60
FrameRate int `json:"frame_rate,omitempty"`
// BufferSize is the maximum number of chunks to buffer
// Larger values increase latency but provide more stability
// Typical values: 5-20
BufferSize int `json:"buffer_size,omitempty"`
// FlushInterval is how often to flush buffered data (if applicable)
FlushInterval time.Duration `json:"flush_interval,omitempty"`
// Metadata contains additional provider-specific configuration
Metadata map[string]interface{} `json:"metadata,omitempty"`
}

func (c *StreamingMediaConfig) Validate() error

Validate checks if the StreamingMediaConfig is valid

ToolDef represents a tool definition that can be provided to an LLM. The InputSchema and OutputSchema use JSON Schema format for validation.

type ToolDef struct {
Name string `json:"name"` // Unique tool name
Description string `json:"description"` // Human-readable description of what the tool does
InputSchema json.RawMessage `json:"input_schema"` // JSON Schema for input validation
OutputSchema json.RawMessage `json:"output_schema,omitempty"` // Optional JSON Schema for output validation
}

ToolStats tracks tool usage statistics across a conversation or run. Useful for monitoring which tools are being used and how frequently.

type ToolStats struct {
TotalCalls int `json:"total_calls"` // Total number of tool calls
ByTool map[string]int `json:"by_tool"` // Count of calls per tool name
}

ValidationError represents a validation failure in tool usage or message content. Used to provide structured error information when validation fails.

type ValidationError struct {
Type string `json:"type"` // Error type: "args_invalid" | "result_invalid" | "policy_violation"
Tool string `json:"tool"` // Name of the tool that failed validation
Detail string `json:"detail"` // Human-readable error details
}

ValidationResult represents the outcome of a validator check on a message. These are attached to assistant messages to show which validations passed or failed.

type ValidationResult struct {
ValidatorType string `json:"validator_type"` // Type of validator
Passed bool `json:"passed"` // Whether the validation passed
Details map[string]interface{} `json:"details,omitempty"` // Validator-specific details
Timestamp time.Time `json:"timestamp,omitempty"` // When validation was performed
}
import "github.com/AltairaLabs/PromptKit/runtime/validators"

Package validators provides content validation for LLM responses and user inputs.

This package implements various validators to ensure conversation quality:

  • Length and sentence count limits
  • Banned word detection
  • Role integrity (preventing role confusion)
  • Required field presence
  • Question and commit block validation

Validators are used during test execution to catch policy violations and ensure LLM responses meet quality standards.

DefaultRegistry is the global validator registry.

var DefaultRegistry = NewRegistry()

BannedWordsValidator checks for banned words

type BannedWordsValidator struct {
// contains filtered or unexported fields
}

func NewBannedWordsValidator(bannedWords []string) *BannedWordsValidator

NewBannedWordsValidator creates a new banned words validator

func (*BannedWordsValidator) SupportsStreaming

Section titled “func (*BannedWordsValidator) SupportsStreaming”
func (v *BannedWordsValidator) SupportsStreaming() bool

SupportsStreaming returns true as banned words can be detected incrementally

func (v *BannedWordsValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks for banned words in content

func (*BannedWordsValidator) ValidateChunk

Section titled “func (*BannedWordsValidator) ValidateChunk”
func (v *BannedWordsValidator) ValidateChunk(chunk providers.StreamChunk, params ...map[string]interface{}) error

ValidateChunk validates a stream chunk for banned words and aborts if found

CommitValidator checks for commit/decision blocks in conversation responses

type CommitValidator struct{}

func NewCommitValidator() *CommitValidator

NewCommitValidator creates a new commit validator

func (v *CommitValidator) SupportsStreaming() bool

SupportsStreaming returns false as commit validation requires complete content

func (v *CommitValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks for commit block with required fields

LengthValidator checks content length limits

type LengthValidator struct{}

func NewLengthValidator() *LengthValidator

NewLengthValidator creates a new length validator

func (v *LengthValidator) SupportsStreaming() bool

SupportsStreaming returns true as length can be checked incrementally

func (v *LengthValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks content length against limits

func (v *LengthValidator) ValidateChunk(chunk providers.StreamChunk, params ...map[string]interface{}) error

ValidateChunk validates stream chunk against length limits and aborts if exceeded

MaxSentencesValidator checks sentence count limits

type MaxSentencesValidator struct{}

func NewMaxSentencesValidator() *MaxSentencesValidator

NewMaxSentencesValidator creates a new sentence count validator

func (*MaxSentencesValidator) SupportsStreaming

Section titled “func (*MaxSentencesValidator) SupportsStreaming”
func (v *MaxSentencesValidator) SupportsStreaming() bool

SupportsStreaming returns false as sentence counting requires complete content

func (v *MaxSentencesValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks sentence count against max limit

Registry maps validator type names to factory functions. This allows dynamic instantiation of validators from configuration.

type Registry struct {
// contains filtered or unexported fields
}

func NewRegistry() *Registry

NewRegistry creates a new validator registry with built-in validators.

func (r *Registry) Get(validatorType string) (ValidatorFactory, bool)

Get retrieves a validator factory by type.

func (r *Registry) HasValidator(validatorType string) bool

HasValidator returns true if a validator type is registered.

func (r *Registry) Register(validatorType string, factory ValidatorFactory)

Register adds a validator factory to the registry.

RequiredFieldsValidator checks for required fields in content

type RequiredFieldsValidator struct{}

func NewRequiredFieldsValidator() *RequiredFieldsValidator

NewRequiredFieldsValidator creates a new required fields validator

func (*RequiredFieldsValidator) SupportsStreaming

Section titled “func (*RequiredFieldsValidator) SupportsStreaming”
func (v *RequiredFieldsValidator) SupportsStreaming() bool

SupportsStreaming returns false as required fields must be in complete content

func (v *RequiredFieldsValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks for required fields in content

StreamingValidator interface for validators that can check content incrementally and abort streaming early if validation fails

type StreamingValidator interface {
Validator
// ValidateChunk validates a stream chunk and returns error to abort stream
// Returns nil to continue, ValidationAbortError to abort stream
ValidateChunk(chunk providers.StreamChunk, params ...map[string]interface{}) error
// SupportsStreaming returns true if this validator can validate incrementally
SupportsStreaming() bool
}

ValidationResult holds the result of a validation check

type ValidationResult struct {
Passed bool `json:"passed"`
Details interface{} `json:"details,omitempty"`
}

Validator interface for all validation checks

type Validator interface {
Validate(content string, params map[string]interface{}) ValidationResult
}

ValidatorConfig defines a validator configuration from a prompt pack. This is just configuration data - validators are instantiated by the registry.

type ValidatorConfig struct {
Type string `json:"type" yaml:"type"`
Params map[string]interface{} `json:"params" yaml:"params"`
}

ValidatorFactory creates a validator instance from configuration params. Params from the config are passed at construction time to allow validators to pre-compile patterns, build state, etc.

type ValidatorFactory func(params map[string]interface{}) Validator
import "github.com/AltairaLabs/PromptKit/runtime/variables"

Package variables provides dynamic variable resolution for prompt templates. Variable providers can inject context from external sources (databases, APIs, conversation state) before template rendering.

ChainProvider composes multiple providers into a single provider. Providers are called in order, with later providers overriding variables from earlier providers when keys conflict.

type ChainProvider struct {
// contains filtered or unexported fields
}

func Chain(providers ...Provider) *ChainProvider

Chain creates a ChainProvider from multiple providers. Providers are called in the order given. Later providers override variables from earlier providers.

func (c *ChainProvider) Add(p Provider) *ChainProvider

Add appends a provider to the chain.

func (c *ChainProvider) Name() string

Name returns the provider identifier.

func (c *ChainProvider) Provide(ctx context.Context) (map[string]string, error)

Provide calls all chained providers and merges their results. Returns an error if any provider fails.

func (c *ChainProvider) Providers() []Provider

Providers returns the list of providers in the chain.

Provider resolves variables dynamically at runtime. Variables returned override static variables with the same key. Providers are called before template rendering to inject dynamic context.

Providers that need access to conversation state (like StateProvider) should receive it via constructor injection rather than through Provide().

type Provider interface {
// Name returns the provider identifier (for logging/debugging)
Name() string
// Provide returns variables to inject into template context.
// Called before each template render.
Provide(ctx context.Context) (map[string]string, error)
}

StateProvider resolves variables from conversation state metadata. It extracts key-value pairs from the state’s Metadata field and converts them to string variables for template substitution.

The StateStore is injected via constructor, allowing the provider to look up state for the current conversation.

type StateProvider struct {
// KeyPrefix filters metadata keys. Only keys with this prefix are included.
// If empty, all metadata keys are included.
KeyPrefix string
// StripPrefix removes the KeyPrefix from variable names when true.
// For example, if KeyPrefix="user_" and StripPrefix=true,
// metadata key "user_name" becomes variable "name".
StripPrefix bool
// contains filtered or unexported fields
}

func NewStatePrefixProvider(store statestore.Store, conversationID, prefix string, stripPrefix bool) *StateProvider

NewStatePrefixProvider creates a StateProvider that only extracts metadata keys with the given prefix. If stripPrefix is true, the prefix is removed from the resulting variable names.

func NewStateProvider(store statestore.Store, conversationID string) *StateProvider

NewStateProvider creates a StateProvider that extracts all metadata as variables from the given conversation’s state.

func (p *StateProvider) Name() string

Name returns the provider identifier.

func (p *StateProvider) Provide(ctx context.Context) (map[string]string, error)

Provide extracts variables from conversation state metadata. Returns nil if store is nil, conversation not found, or has no metadata.

TimeProvider provides current time and date variables. Useful for prompts that need temporal context like “What day is it?” or time-sensitive instructions.

type TimeProvider struct {
// Format is the time format string for current_time variable.
// Defaults to time.RFC3339 if empty.
Format string
// Location specifies the timezone. Defaults to UTC if nil.
Location *time.Location
// contains filtered or unexported fields
}

func NewTimeProvider() *TimeProvider

NewTimeProvider creates a TimeProvider with default settings (UTC, RFC3339 format).

func NewTimeProviderWithFormat(format string) *TimeProvider

NewTimeProviderWithFormat creates a TimeProvider with a custom time format.

func NewTimeProviderWithLocation(loc *time.Location) *TimeProvider

NewTimeProviderWithLocation creates a TimeProvider for a specific timezone.

func (p *TimeProvider) Name() string

Name returns the provider identifier.

func (p *TimeProvider) Provide(ctx context.Context) (map[string]string, error)

Provide returns time-related variables. Variables provided:

  • current_time: Full timestamp in configured format
  • current_date: Date in YYYY-MM-DD format
  • current_year: Four-digit year
  • current_month: Full month name (e.g., “January”)
  • current_weekday: Full weekday name (e.g., “Monday”)
  • current_hour: Hour in 24-hour format (00-23)

func (p *TimeProvider) WithNowFunc(fn func() time.Time) *TimeProvider

WithNowFunc sets a custom time source (primarily for testing).

import "github.com/AltairaLabs/PromptKit/runtime/version"

Package version provides version information for the PromptKit runtime. Version variables can be overridden at build time using ldflags:

go build -ldflags "-X github.com/AltairaLabs/PromptKit/runtime/version.version=1.0.0"

func GetBuildInfo() []any

GetBuildInfo returns version details as structured slog attributes. This is useful for including version info in log messages.

func GetVersion() string

GetVersion returns the current version string. Falls back to build info from go modules if version is “dev”.

func GetVersionInfo() string

GetVersionInfo returns detailed version information in the same format as promptarena.

func LogStartup()

LogStartup logs version information at debug level. This is called by the logger package after initialization.

import "github.com/AltairaLabs/PromptKit/runtime/metrics/prometheus"

Package prometheus provides Prometheus metrics exporters for PromptKit pipelines.

Package prometheus provides Prometheus metrics exporters for PromptKit pipelines.

Package prometheus provides Prometheus metrics exporters for PromptKit pipelines.

func RecordPipelineEnd(status string, durationSeconds float64)

RecordPipelineEnd records a pipeline completion.

func RecordPipelineStart()

RecordPipelineStart records a pipeline start.

func RecordProviderCost(provider, model string, cost float64)

RecordProviderCost records cost from a provider call.

func RecordProviderRequest(provider, model, status string, durationSeconds float64)

RecordProviderRequest records a provider API call.

func RecordProviderTokens(provider, model string, inputTokens, outputTokens, cachedTokens int)

RecordProviderTokens records token consumption.

func RecordStageDuration(stageName, stageType string, durationSeconds float64)

RecordStageDuration records the duration of a stage.

func RecordStageElement(stageName, status string)

RecordStageElement records a processed element.

func RecordToolCall(toolName, status string, durationSeconds float64)

RecordToolCall records a tool call.

func RecordValidation(validator, validatorType, status string, durationSeconds float64)

RecordValidation records a validation check.

Exporter serves Prometheus metrics over HTTP.

type Exporter struct {
// contains filtered or unexported fields
}

func NewExporter(addr string) *Exporter

NewExporter creates a new Prometheus exporter that serves metrics at the given address.

func NewExporterWithRegistry(addr string, registry *prometheus.Registry) *Exporter

NewExporterWithRegistry creates a new Prometheus exporter with a custom registry. This is useful for testing or when you want more control over metric registration.

func (e *Exporter) Handler() http.Handler

Handler returns an http.Handler for the metrics endpoint. This is useful when you want to integrate metrics into an existing HTTP server.

func (e *Exporter) MustRegister(cs ...prometheus.Collector)

MustRegister registers additional collectors with the exporter’s registry. Panics if registration fails.

func (e *Exporter) Register(c prometheus.Collector) error

Register registers additional collectors with the exporter’s registry. Returns an error if registration fails.

func (e *Exporter) Registry() *prometheus.Registry

Registry returns the underlying Prometheus registry.

func (e *Exporter) Shutdown(ctx context.Context) error

Shutdown gracefully stops the exporter with the given context.

func (e *Exporter) Start() error

Start begins serving metrics at /metrics endpoint. This method blocks until the server is stopped or encounters an error. Returns http.ErrServerClosed when shut down gracefully.

MetricsListener records pipeline events as Prometheus metrics. It implements the events.Listener signature and should be registered with an EventBus using SubscribeAll.

type MetricsListener struct{}

func NewMetricsListener() *MetricsListener

NewMetricsListener creates a new MetricsListener.

func (l *MetricsListener) Handle(event *events.Event)

Handle processes an event and records relevant metrics. This method is designed to be used with EventBus.SubscribeAll.

func (l *MetricsListener) Listener() events.Listener

Listener returns an events.Listener function that can be registered with an EventBus.

import "github.com/AltairaLabs/PromptKit/runtime/persistence/common"

Package common provides shared functionality for persistence repositories.

File permission constants for persistence operations

const (
DirPerm = 0o750 // Directory permissions: rwxr-x---
FilePerm = 0o600 // File permissions: rw-------
)

func ValidatePromptConfig(config *prompt.Config) error

ValidatePromptConfig validates the prompt configuration structure

BasePromptRepository provides common prompt repository functionality

type BasePromptRepository struct {
BasePath string
TaskTypeToFile map[string]string
Cache map[string]*prompt.Config
Extensions []string
Unmarshal UnmarshalFunc
Marshal MarshalFunc
}

func NewBasePromptRepository(basePath string, taskTypeToFile map[string]string, extensions []string, unmarshal UnmarshalFunc) *BasePromptRepository

NewBasePromptRepository creates a new base repository

func (*BasePromptRepository) HasMatchingTaskType

Section titled “func (*BasePromptRepository) HasMatchingTaskType”
func (r *BasePromptRepository) HasMatchingTaskType(path, taskType string) bool

HasMatchingTaskType checks if a file contains the specified task type

func (*BasePromptRepository) HasValidExtension

Section titled “func (*BasePromptRepository) HasValidExtension”
func (r *BasePromptRepository) HasValidExtension(path string) bool

HasValidExtension checks if a file has a valid extension

func (r *BasePromptRepository) ListPrompts() ([]string, error)

ListPrompts returns all available prompt task types

func (r *BasePromptRepository) LoadPrompt(taskType string) (*prompt.Config, error)

LoadPrompt loads a prompt configuration by task type

func (*BasePromptRepository) ResolveFilePath

Section titled “func (*BasePromptRepository) ResolveFilePath”
func (r *BasePromptRepository) ResolveFilePath(taskType string) (string, error)

ResolveFilePath finds the file path for a given task type

func (r *BasePromptRepository) SavePrompt(config *prompt.Config) error

SavePrompt saves a prompt configuration to disk

func (*BasePromptRepository) SearchByContent

Section titled “func (*BasePromptRepository) SearchByContent”
func (r *BasePromptRepository) SearchByContent(taskType string) string

SearchByContent searches for files by parsing and checking task type

func (*BasePromptRepository) SearchByFilename

Section titled “func (*BasePromptRepository) SearchByFilename”
func (r *BasePromptRepository) SearchByFilename(taskType string) string

SearchByFilename searches for files by filename patterns

func (*BasePromptRepository) SearchForPrompt

Section titled “func (*BasePromptRepository) SearchForPrompt”
func (r *BasePromptRepository) SearchForPrompt(taskType string) (string, error)

SearchForPrompt searches for a file matching the task type

MarshalFunc is a function that marshals a prompt config to bytes

type MarshalFunc func(interface{}) ([]byte, error)

UnmarshalFunc is a function that unmarshals data into a prompt config

type UnmarshalFunc func([]byte, interface{}) error
import "github.com/AltairaLabs/PromptKit/runtime/persistence/json"

Package json provides JSON file-based implementations of persistence repositories.

This package can be used for production environments where JSON is preferred over YAML.

JSONPromptRepository loads prompts from JSON files on disk

type PromptRepository struct {
*common.BasePromptRepository
}

func NewJSONPromptRepository(basePath string, taskTypeToFile map[string]string) *PromptRepository

NewJSONPromptRepository creates a JSON file-based prompt repository

func (r *PromptRepository) LoadFragment(name, relativePath, baseDir string) (*prompt.Fragment, error)

LoadFragment loads a fragment by name

func (r *PromptRepository) SavePrompt(config *prompt.Config) error

SavePrompt saves a prompt configuration to a JSON file

JSONToolRepository loads tools from JSON files on disk

type ToolRepository struct {
// contains filtered or unexported fields
}

func NewJSONToolRepository(basePath string) *ToolRepository

NewJSONToolRepository creates a JSON file-based tool repository

func (r *ToolRepository) ListTools() ([]string, error)

ListTools returns all available tool names

func (r *ToolRepository) LoadDirectory(dirPath string) error

LoadDirectory recursively loads all JSON tool files from a directory

func (r *ToolRepository) LoadTool(name string) (*tools.ToolDescriptor, error)

LoadTool loads a tool descriptor by name

func (r *ToolRepository) LoadToolFromFile(filename string) error

LoadToolFromFile loads a tool from a JSON file

func (r *ToolRepository) RegisterTool(name string, descriptor *tools.ToolDescriptor)

RegisterTool adds a tool descriptor directly

func (r *ToolRepository) SaveTool(descriptor *tools.ToolDescriptor) error

SaveTool saves a tool descriptor to a JSON file using K8s manifest format. The file will be named <tool-name>.json in the repository’s base path.

import "github.com/AltairaLabs/PromptKit/runtime/persistence/memory"

Package memory provides in-memory implementations of persistence repositories.

This package is primarily for testing and SDK use, allowing prompts and tools to be registered programmatically without file system dependencies.

PromptRepository stores prompts in memory (for testing/SDK)

type PromptRepository struct {
// contains filtered or unexported fields
}

func NewPromptRepository() *PromptRepository

NewPromptRepository creates a new in-memory prompt repository

func (r *PromptRepository) ListPrompts() ([]string, error)

ListPrompts returns all available prompt task types

func (r *PromptRepository) LoadFragment(name, relativePath, baseDir string) (*prompt.Fragment, error)

LoadFragment loads a fragment by name

func (r *PromptRepository) LoadPrompt(taskType string) (*prompt.Config, error)

LoadPrompt loads a prompt configuration by task type

func (r *PromptRepository) RegisterFragment(name string, fragment *prompt.Fragment)

RegisterFragment adds a fragment to the in-memory store

func (r *PromptRepository) RegisterPrompt(taskType string, config *prompt.Config)

RegisterPrompt adds a prompt to the in-memory store

func (r *PromptRepository) SavePrompt(config *prompt.Config) error

SavePrompt saves a prompt configuration

ToolRepository stores tools in memory (for testing/SDK)

type ToolRepository struct {
// contains filtered or unexported fields
}

func NewToolRepository() *ToolRepository

NewToolRepository creates a new in-memory tool repository

func (r *ToolRepository) ListTools() ([]string, error)

ListTools returns all available tool names

func (r *ToolRepository) LoadTool(name string) (*tools.ToolDescriptor, error)

LoadTool loads a tool descriptor by name

func (r *ToolRepository) RegisterTool(name string, descriptor *tools.ToolDescriptor)

RegisterTool adds a tool to the in-memory store

func (r *ToolRepository) SaveTool(descriptor *tools.ToolDescriptor) error

SaveTool saves a tool descriptor

import "github.com/AltairaLabs/PromptKit/runtime/persistence/yaml"

Package yaml provides YAML file-based implementations of persistence repositories.

This package is primarily for Arena and development use, loading prompts and tools from YAML configuration files on disk.

YAMLPromptRepository loads prompts from YAML files on disk

type PromptRepository struct {
*common.BasePromptRepository
}

func NewYAMLPromptRepository(basePath string, taskTypeToFile map[string]string) *PromptRepository

NewYAMLPromptRepository creates a YAML file-based prompt repository If taskTypeToFile mappings are provided, they will be used for lookups. Otherwise, the repository will search the basePath directory.

func (r *PromptRepository) LoadFragment(name, relativePath, baseDir string) (*prompt.Fragment, error)

LoadFragment loads a fragment by name and optional path

func (r *PromptRepository) SavePrompt(config *prompt.Config) error

SavePrompt saves a prompt configuration to a YAML file

YAMLToolRepository loads tools from YAML files on disk

type ToolRepository struct {
// contains filtered or unexported fields
}

func NewYAMLToolRepository(basePath string) *ToolRepository

NewYAMLToolRepository creates a YAML file-based tool repository

func (r *ToolRepository) ListTools() ([]string, error)

ListTools returns all available tool names

func (r *ToolRepository) LoadDirectory(dirPath string) error

LoadDirectory recursively loads all YAML tool files from a directory

func (r *ToolRepository) LoadTool(name string) (*tools.ToolDescriptor, error)

LoadTool loads a tool descriptor by name

func (r *ToolRepository) LoadToolFromFile(filename string) error

LoadToolFromFile loads a tool from a YAML file and registers it

func (r *ToolRepository) RegisterTool(name string, descriptor *tools.ToolDescriptor)

RegisterTool adds a tool descriptor directly to the repository

func (r *ToolRepository) SaveTool(descriptor *tools.ToolDescriptor) error

SaveTool saves a tool descriptor to a YAML file using K8s manifest format. The file will be named <tool-name>.yaml in the repository’s base path.

import "github.com/AltairaLabs/PromptKit/runtime/pipeline/stage"

Package stage provides the reactive streams architecture for pipeline execution.

Package stage provides the reactive streams architecture for pipeline execution.

Package stage provides the reactive streams architecture for pipeline execution.

Package stage provides the reactive streams architecture for pipeline execution.

Package stage provides the reactive streams architecture for pipeline execution.

Package stage provides pipeline stages for media processing.

Package stage provides pipeline stages for media processing.

Package stage provides pipeline stages for media processing.

Package stage provides pipeline stages for media processing.

Package stage provides pipeline stages for media processing.

Package stage provides pipeline stages for media processing.

Package stage provides the reactive streams architecture for pipeline execution.

Package stage provides pipeline stages for audio processing.

Package stage provides pipeline stages for media processing.

Package stage provides pipeline stages for media processing.

This file contains FFmpeg-dependent integration code for video frame extraction. These functions require FFmpeg to be installed and cannot be unit tested without it.

const (
// DefaultChannelBufferSize is the default buffer size for channels between stages.
DefaultChannelBufferSize = 16
// DefaultMaxConcurrentPipelines is the default maximum number of concurrent pipeline executions.
DefaultMaxConcurrentPipelines = 100
// DefaultExecutionTimeoutSeconds is the default execution timeout in seconds.
DefaultExecutionTimeoutSeconds = 30
// DefaultGracefulShutdownTimeoutSeconds is the default graceful shutdown timeout in seconds.
DefaultGracefulShutdownTimeoutSeconds = 10
)

Metadata keys for media extraction correlation.

const (
// MediaExtractMessageIDKey tracks which message an extracted element belongs to.
MediaExtractMessageIDKey = "media_extract_message_id"
// MediaExtractPartIndexKey tracks the part index within the message.
MediaExtractPartIndexKey = "media_extract_part_index"
// MediaExtractTotalPartsKey tracks total media parts in the original message.
MediaExtractTotalPartsKey = "media_extract_total_parts"
// MediaExtractMediaTypeKey tracks the media type (image, video).
MediaExtractMediaTypeKey = "media_extract_media_type"
// MediaExtractOriginalMessageKey stores the original message for later composition.
MediaExtractOriginalMessageKey = "media_extract_original_message"
)

Metadata keys for video-to-frames correlation.

const (
// VideoFramesVideoIDKey uniquely identifies the source video.
VideoFramesVideoIDKey = "video_frames_video_id"
// VideoFramesFrameIndexKey tracks the frame index within the video.
VideoFramesFrameIndexKey = "video_frames_frame_index"
// VideoFramesTotalFramesKey tracks expected total frames.
VideoFramesTotalFramesKey = "video_frames_total_frames"
// VideoFramesTimestampKey tracks the frame's timestamp in the original video.
VideoFramesTimestampKey = "video_frames_timestamp"
// VideoFramesOriginalVideoKey stores reference to original VideoData.
VideoFramesOriginalVideoKey = "video_frames_original_video"
)

Default configuration values.

const (
// DefaultFrameInterval is the default time between extracted frames.
DefaultFrameInterval = time.Second
// DefaultTargetFPS is the default target frame rate.
DefaultTargetFPS = 1.0
// DefaultMaxFrames is the default maximum frames to extract.
DefaultMaxFrames = 30
// DefaultOutputFormat is the default output image format.
DefaultOutputFormat = "jpeg"
// OutputFormatPNG is the PNG output format.
OutputFormatPNG = "png"
// DefaultOutputQuality is the default JPEG quality.
DefaultOutputQuality = 85
// DefaultFFmpegPath is the default path to FFmpeg.
DefaultFFmpegPath = "ffmpeg"
// DefaultFFmpegTimeout is the default FFmpeg execution timeout.
DefaultFFmpegTimeout = 5 * time.Minute
// DefaultFramesCompletionTimeout is the default timeout for frame accumulation.
DefaultFramesCompletionTimeout = 30 * time.Second
// DefaultMaxFramesPerMessage is the default max frames in composed message.
DefaultMaxFramesPerMessage = 30
)

Default configuration values.

const (
// DefaultCompletionTimeout is the default timeout for waiting for all message parts.
DefaultCompletionTimeout = 30 * time.Second
)

Default frame rate limit configuration constants.

const (
// DefaultFrameRateLimitFPS is the default target frame rate.
// 1 FPS is suitable for most LLM vision scenarios.
DefaultFrameRateLimitFPS = 1.0
)

Common errors

var (
// ErrPipelineShuttingDown is returned when attempting to execute a pipeline that is shutting down.
ErrPipelineShuttingDown = errors.New("pipeline is shutting down")
// ErrShutdownTimeout is returned when pipeline shutdown times out.
ErrShutdownTimeout = errors.New("shutdown timeout exceeded")
// ErrInvalidPipeline is returned when building an invalid pipeline.
ErrInvalidPipeline = errors.New("invalid pipeline configuration")
// ErrCyclicDependency is returned when the pipeline DAG contains cycles.
ErrCyclicDependency = errors.New("cyclic dependency detected in pipeline")
// ErrStageNotFound is returned when a referenced stage doesn't exist.
ErrStageNotFound = errors.New("stage not found")
// ErrDuplicateStageName is returned when multiple stages have the same name.
ErrDuplicateStageName = errors.New("duplicate stage name")
// ErrNoStages is returned when trying to build a pipeline with no stages.
ErrNoStages = errors.New("pipeline must have at least one stage")
// ErrInvalidChannelBufferSize is returned for invalid buffer size.
ErrInvalidChannelBufferSize = errors.New("channel buffer size must be non-negative")
// ErrInvalidMaxConcurrentPipelines is returned for invalid max concurrent pipelines.
ErrInvalidMaxConcurrentPipelines = errors.New("max concurrent pipelines must be non-negative")
// ErrInvalidExecutionTimeout is returned for invalid execution timeout.
ErrInvalidExecutionTimeout = errors.New("execution timeout must be non-negative")
// ErrInvalidGracefulShutdownTimeout is returned for invalid graceful shutdown timeout.
ErrInvalidGracefulShutdownTimeout = errors.New("graceful shutdown timeout must be non-negative")
// ErrFFmpegNotFound is returned when FFmpeg binary cannot be found.
ErrFFmpegNotFound = errors.New("ffmpeg not found")
// ErrFFmpegFailed is returned when FFmpeg execution fails.
ErrFFmpegFailed = errors.New("ffmpeg execution failed")
// ErrFFmpegTimeout is returned when FFmpeg execution times out.
ErrFFmpegTimeout = errors.New("ffmpeg execution timeout")
// ErrInvalidVideoFormat is returned when video cannot be processed.
ErrInvalidVideoFormat = errors.New("invalid or unsupported video format")
// ErrNoFramesExtracted is returned when FFmpeg produces no output frames.
ErrNoFramesExtracted = errors.New("no frames extracted from video")
// ErrVideoDataRequired is returned when video data is required but missing.
ErrVideoDataRequired = errors.New("video data required but not available")
)

func BatchEmbeddingTexts(texts []string, batchSize int) [][]string

BatchEmbeddingTexts splits texts into batches of the given size. Useful for respecting embedding provider batch limits.

func CosineSimilarity(a, b []float32) float64

CosineSimilarity computes the cosine similarity between two embedding vectors. Returns a value between -1.0 and 1.0, where:

  • 1.0 means vectors are identical in direction
  • 0.0 means vectors are orthogonal (unrelated)
  • -1.0 means vectors are opposite

For text embeddings, values typically range from 0.0 to 1.0, with higher values indicating greater semantic similarity.

Returns 0.0 if vectors have different lengths, are empty, or have zero magnitude.

func DescribeCapabilities(stage Stage) string

DescribeCapabilities returns a human-readable description of a stage’s capabilities. Useful for debugging and logging.

func GetTraceInfo(elem *StreamElement) (traceID string, stageTimes map[string]time.Time)

GetTraceInfo extracts trace information from an element.

func NormalizeEmbedding(embedding []float32) []float32

NormalizeEmbedding normalizes an embedding vector to unit length. This can improve similarity comparisons by ensuring all vectors have the same magnitude.

func PutElement(elem *StreamElement)

PutElement returns a StreamElement to the pool for reuse. The element is reset before being returned to the pool to prevent data leaks. After calling PutElement, the caller must not use the element again.

func ValidateCapabilities(stages []Stage, edges map[string][]string)

ValidateCapabilities checks format compatibility between connected stages. It logs warnings for potential mismatches but does not return errors, as format compatibility can often only be fully determined at runtime.

This function is called during pipeline building to provide early feedback about potential issues.

AudioCapability describes audio format requirements for a stage.

type AudioCapability struct {
// Formats lists accepted audio formats. Empty slice means any format.
Formats []AudioFormat
// SampleRates lists accepted sample rates in Hz. Empty slice means any rate.
SampleRates []int
// Channels lists accepted channel counts. Empty slice means any.
Channels []int
}

func (ac *AudioCapability) AcceptsAudio(audio *AudioData) bool

AcceptsAudio returns true if this capability accepts the given audio data.

func (ac *AudioCapability) AcceptsChannels(channels int) bool

AcceptsChannels returns true if this capability accepts the given channel count. Returns true if Channels is empty (accepts any).

func (ac *AudioCapability) AcceptsFormat(format AudioFormat) bool

AcceptsFormat returns true if this capability accepts the given format. Returns true if Formats is empty (accepts any).

func (ac *AudioCapability) AcceptsSampleRate(rate int) bool

AcceptsSampleRate returns true if this capability accepts the given sample rate. Returns true if SampleRates is empty (accepts any).

AudioData carries audio samples with metadata.

type AudioData struct {
Samples []byte // Raw audio samples
SampleRate int // Sample rate in Hz (e.g., 16000, 44100)
Channels int // Number of audio channels (1=mono, 2=stereo)
Format AudioFormat // Audio encoding format
Duration time.Duration // Duration of the audio segment
Encoding string // Encoding scheme (e.g., "pcm", "opus")
}

AudioFormat represents the encoding format of audio data.

type AudioFormat int

const (
// AudioFormatPCM16 is 16-bit PCM encoding
AudioFormatPCM16 AudioFormat = iota
// AudioFormatFloat32 is 32-bit floating point encoding
AudioFormatFloat32
// AudioFormatOpus is Opus codec encoding
AudioFormatOpus
// AudioFormatMP3 is MP3 encoding
AudioFormatMP3
// AudioFormatAAC is AAC encoding
AudioFormatAAC
)

func (af AudioFormat) String() string

String returns the string representation of the audio format.

AudioResampleConfig contains configuration for the audio resampling stage.

type AudioResampleConfig struct {
// TargetSampleRate is the desired output sample rate in Hz.
// Common values: 16000 (Gemini), 24000 (OpenAI TTS), 44100 (CD quality).
TargetSampleRate int
// PassthroughIfSameRate skips resampling if input rate matches target rate.
// Default: true.
PassthroughIfSameRate bool
}

func DefaultAudioResampleConfig() AudioResampleConfig

DefaultAudioResampleConfig returns sensible defaults for audio resampling.

AudioResampleStage resamples audio data to a target sample rate. This is useful for normalizing audio from different sources (TTS, files) to match provider requirements.

This is a Transform stage: audio element → resampled audio element (1:1)

type AudioResampleStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewAudioResampleStage(config AudioResampleConfig) *AudioResampleStage

NewAudioResampleStage creates a new audio resampling stage.

func (s *AudioResampleStage) GetConfig() AudioResampleConfig

GetConfig returns the stage configuration.

func (s *AudioResampleStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Resamples audio in each element to the target sample rate.

AudioTurnConfig configures the AudioTurnStage.

type AudioTurnConfig struct {
// VAD is the voice activity detector.
// If nil, a SimpleVAD with default params is created.
VAD audio.VADAnalyzer
// TurnDetector determines when user has finished speaking.
// If nil, VAD state transitions are used for turn detection.
TurnDetector audio.TurnDetector
// InterruptionHandler detects when user interrupts TTS output.
// This should be shared with TTSStageWithInterruption.
// If nil, interruption detection is disabled.
InterruptionHandler *audio.InterruptionHandler
// SilenceDuration is how long silence must persist to trigger turn complete.
// Default: 800ms
SilenceDuration time.Duration
// MinSpeechDuration is minimum speech before turn can complete.
// Default: 200ms
MinSpeechDuration time.Duration
// MaxTurnDuration is maximum turn length before forcing completion.
// Default: 30s
MaxTurnDuration time.Duration
// SampleRate is the audio sample rate for output AudioData.
// Default: 16000
SampleRate int
}

func DefaultAudioTurnConfig() AudioTurnConfig

DefaultAudioTurnConfig returns sensible defaults for AudioTurnStage.

AudioTurnStage detects voice activity and accumulates audio into complete turns. It outputs complete audio utterances when the user stops speaking.

This stage consolidates: - Voice Activity Detection (VAD) - Turn boundary detection - Audio accumulation - Interruption detection (shared with TTSStageWithInterruption)

This is an Accumulate stage: N audio chunks → 1 audio utterance

type AudioTurnStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewAudioTurnStage(config AudioTurnConfig) (*AudioTurnStage, error)

NewAudioTurnStage creates a new audio turn stage.

func (s *AudioTurnStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Accumulates audio chunks until turn complete, then emits audio utterance.

BaseStage provides common functionality for stage implementations. Stages can embed this to reduce boilerplate.

type BaseStage struct {
// contains filtered or unexported fields
}

func NewBaseStage(name string, stageType StageType) BaseStage

NewBaseStage creates a new BaseStage with the given name and type.

func (b *BaseStage) Name() string

Name returns the stage name.

func (b *BaseStage) Type() StageType

Type returns the stage type.

BroadcastRouter sends each element to ALL registered outputs. Useful for fan-out scenarios where all consumers need every element.

type BroadcastRouter struct {
BaseStage
// contains filtered or unexported fields
}

func NewBroadcastRouter(name string) *BroadcastRouter

NewBroadcastRouter creates a router that broadcasts to all outputs.

func (r *BroadcastRouter) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process broadcasts each element to all outputs.

func (r *BroadcastRouter) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name.

ByOriginalIndex sorts ScoredMessages by their original index (ascending).

type ByOriginalIndex []ScoredMessage

func (s ByOriginalIndex) Len() int

func (s ByOriginalIndex) Less(i, j int) bool

func (s ByOriginalIndex) Swap(i, j int)

Capabilities describes what a stage accepts or produces.

type Capabilities struct {
// ContentTypes lists the content types handled. Empty means any.
ContentTypes []ContentType
// Audio specifies audio-specific requirements. Nil means N/A or any.
Audio *AudioCapability
}

func AnyCapabilities() Capabilities

AnyCapabilities returns capabilities that accept any content type.

func AudioCapabilities(formats []AudioFormat, sampleRates, channels []int) Capabilities

AudioCapabilities returns capabilities for audio content with optional format constraints.

func MessageCapabilities() Capabilities

MessageCapabilities returns capabilities for message content.

func TextCapabilities() Capabilities

TextCapabilities returns capabilities for text-only content.

func (c *Capabilities) AcceptsContentType(ct ContentType) bool

AcceptsContentType returns true if this capability accepts the given content type. Returns true if ContentTypes is empty (accepts any).

func (c *Capabilities) AcceptsElement(elem *StreamElement) bool

AcceptsElement returns true if this capability accepts the given stream element.

ContentRouter routes elements to different outputs based on predicate rules. Rules are evaluated in order; the first matching rule determines the destination. Elements that don’t match any rule are dropped with a warning log.

type ContentRouter struct {
BaseStage
// contains filtered or unexported fields
}

func NewContentRouter(name string, rules ...RoutingRule) *ContentRouter

NewContentRouter creates a new content-aware router with the given rules.

func (r *ContentRouter) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process routes elements based on the configured rules.

func (r *ContentRouter) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name. This must be called before Process() to set up routing destinations.

ContentType describes the type of content a stage handles.

type ContentType int

const (
// ContentTypeAny indicates the stage accepts any content type.
ContentTypeAny ContentType = iota
// ContentTypeText indicates text content.
ContentTypeText
// ContentTypeAudio indicates audio content.
ContentTypeAudio
// ContentTypeVideo indicates video content.
ContentTypeVideo
// ContentTypeImage indicates image content.
ContentTypeImage
// ContentTypeMessage indicates a complete message.
ContentTypeMessage
// ContentTypeToolCall indicates a tool invocation.
ContentTypeToolCall
)

func (ct ContentType) String() string

String returns the string representation of the content type.

ContextBuilderPolicy defines token budget and truncation behavior.

type ContextBuilderPolicy struct {
TokenBudget int
ReserveForOutput int
Strategy TruncationStrategy
CacheBreakpoints bool
// RelevanceConfig for TruncateLeastRelevant strategy.
// Required when using TruncateLeastRelevant; must include EmbeddingProvider.
RelevanceConfig *RelevanceConfig
// TokenCounter provides token counting for budget management.
// If nil, a default heuristic counter is used with ModelFamilyDefault ratio (1.35).
// Use tokenizer.NewTokenCounterForModel(modelName) to create a model-aware counter.
TokenCounter tokenizer.TokenCounter
}

ContextBuilderStage manages token budget and truncates messages if needed.

This stage ensures the conversation context fits within the LLM’s token budget by applying truncation strategies when messages exceed the limit.

Token budget calculation:

available = TokenBudget - ReserveForOutput - systemPromptTokens

Truncation strategies (TruncationStrategy):

  • TruncateOldest: removes oldest messages first (keeps most recent context)
  • TruncateLeastRelevant: removes least relevant messages (requires RelevanceConfig with EmbeddingProvider)
  • TruncateSummarize: not yet implemented (returns error)
  • TruncateFail: returns error if budget exceeded (strict mode)

Configuration (ContextBuilderPolicy):

  • TokenBudget: total tokens allowed (0 = unlimited, pass-through mode)
  • ReserveForOutput: tokens reserved for LLM response
  • Strategy: truncation strategy to apply
  • CacheBreakpoints: enable prompt caching hints

Metadata added:

  • context_truncated: true if truncation was applied
  • enable_cache_breakpoints: copied from policy.CacheBreakpoints

This is an Accumulate stage: N input elements → N (possibly fewer) output elements

type ContextBuilderStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewContextBuilderStage(policy *ContextBuilderPolicy) *ContextBuilderStage

NewContextBuilderStage creates a context builder stage.

func (s *ContextBuilderStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process enforces token budget and truncates messages if needed.

DebugStage logs StreamElements for debugging pipeline state. Useful for development and troubleshooting.

type DebugStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewDebugStage(stageName string) *DebugStage

NewDebugStage creates a debug stage that logs elements at a specific pipeline location.

func (s *DebugStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process logs each element as it passes through (passthrough transform).

DropStrategy defines how frames are dropped when rate limiting.

type DropStrategy int

const (
// DropStrategyKeepLatest keeps the most recent frame and drops older ones.
// This ensures the model sees the most current state.
DropStrategyKeepLatest DropStrategy = iota
// DropStrategyUniform attempts to keep frames uniformly distributed.
// This provides a more representative sampling across time.
DropStrategyUniform
)

func (s DropStrategy) String() string

String returns the string representation of the drop strategy.

DuplexProviderStage handles bidirectional streaming through a session. It forwards elements from input to the provider’s session and forwards responses from the session to output.

This stage is PROVIDER-AGNOSTIC. Provider-specific behaviors (interruptions, reconnection, protocol quirks) are handled BY the provider internally.

System Prompt Handling: The first element received may contain a system prompt in metadata[“system_prompt”]. This is sent to the session via SendSystemContext() before processing audio/text.

Response Accumulation: Streaming providers send text/audio responses in chunks. This stage accumulates content across chunks and creates a Message on turn completion (FinishReason).

Session Closure: When the session closes unexpectedly, any accumulated content is emitted as a partial response. The executor is responsible for session recreation if needed.

This is a Bidirectional stage: input elements ⟷ session ⟷ output elements

type DuplexProviderStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewDuplexProviderStage(provider providers.StreamInputSupport, baseConfig *providers.StreamingInputConfig) *DuplexProviderStage

NewDuplexProviderStage creates a new duplex provider stage. The session is created lazily when the first element arrives, using system_prompt from element metadata. This allows the pipeline to be the single source of truth for prompt assembly.

func NewDuplexProviderStageWithEmitter(provider providers.StreamInputSupport, baseConfig *providers.StreamingInputConfig, emitter *events.Emitter) *DuplexProviderStage

NewDuplexProviderStageWithEmitter creates a new duplex provider stage with event emission support. The emitter is used to emit audio.input and audio.output events for session recording.

func (s *DuplexProviderStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Handles bidirectional streaming between input channel and WebSocket session.

For duplex streaming (Gemini Live API), this runs until: - Context is canceled (user stops the session) - Session response channel is closed (server ends session) - Input channel is closed (upstream ends)

If no session is pre-configured, the session is created lazily when the first element arrives. The system_prompt from element metadata is used as the SystemInstruction for session creation.

EndInputter is an optional interface for sessions that support explicit end-of-input signaling. This is primarily used by mock sessions to trigger responses after all audio has been sent.

type EndInputter interface {
EndInput()
}

ExecutionResult represents the final result of a pipeline execution. This matches the existing pipeline.ExecutionResult for compatibility.

type ExecutionResult struct {
Messages []types.Message // All messages in the conversation
Response *Response // The final response
Trace ExecutionTrace // Execution trace
CostInfo types.CostInfo // Cost information
Metadata map[string]interface{} // Additional metadata
}

ExecutionTrace captures execution history (for compatibility).

type ExecutionTrace struct {
StartedAt time.Time
CompletedAt *time.Time
Duration time.Duration
}

FilterStage filters elements based on a predicate function.

type FilterStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewFilterStage(name string, predicate func(StreamElement) bool) *FilterStage

NewFilterStage creates a new filter stage.

func (fs *FilterStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process filters elements based on the predicate.

FormatCapable is an optional interface that stages can implement to declare their input/output format requirements. Stages that don’t implement this are treated as accepting/producing any format.

type FormatCapable interface {
// InputCapabilities returns what formats/content types this stage accepts.
InputCapabilities() Capabilities
// OutputCapabilities returns what formats/content types this stage produces.
OutputCapabilities() Capabilities
}

FrameExtractionMode defines how frames are selected from video.

type FrameExtractionMode int

const (
// FrameExtractionInterval extracts frames at fixed time intervals.
FrameExtractionInterval FrameExtractionMode = iota
// FrameExtractionKeyframes extracts only keyframes (I-frames).
FrameExtractionKeyframes
// FrameExtractionFPS extracts at a specific frame rate.
FrameExtractionFPS
)

func (m FrameExtractionMode) String() string

String returns the string representation of the extraction mode.

FrameRateLimitConfig configures the FrameRateLimitStage behavior.

type FrameRateLimitConfig struct {
// TargetFPS is the target output frame rate.
// Frames exceeding this rate will be dropped.
// Default: 1.0 FPS.
TargetFPS float64
// DropStrategy determines which frames to drop when rate limiting.
// Default: DropStrategyKeepLatest.
DropStrategy DropStrategy
// PassthroughAudio allows audio elements to bypass rate limiting.
// This is important for maintaining audio quality in mixed streams.
// Default: true.
PassthroughAudio bool
// PassthroughNonMedia allows non-media elements (text, messages, etc.)
// to bypass rate limiting.
// Default: true.
PassthroughNonMedia bool
}

func DefaultFrameRateLimitConfig() FrameRateLimitConfig

DefaultFrameRateLimitConfig returns sensible defaults for frame rate limiting.

FrameRateLimitStage drops frames to maintain a target frame rate. This is useful for high-FPS video feeds (e.g., 30fps webcam) that need to be reduced to a rate suitable for LLM processing (e.g., 1fps).

This is a Transform stage that may drop elements (N:M where M <= N).

type FrameRateLimitStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewFrameRateLimitStage(config FrameRateLimitConfig) *FrameRateLimitStage

NewFrameRateLimitStage creates a new frame rate limiting stage.

func (s *FrameRateLimitStage) GetConfig() FrameRateLimitConfig

GetConfig returns the stage configuration.

func (s *FrameRateLimitStage) GetStats() (emitted, dropped int64)

GetStats returns the current frame statistics.

func (s *FrameRateLimitStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Drops video/image frames to maintain the target frame rate.

FrameSelectionStrategy defines how frames are selected when limiting.

type FrameSelectionStrategy int

const (
// FrameSelectionUniform selects frames uniformly distributed across the video.
FrameSelectionUniform FrameSelectionStrategy = iota
// FrameSelectionFirst selects the first N frames.
FrameSelectionFirst
// FrameSelectionLast selects the last N frames.
FrameSelectionLast
)

func (s FrameSelectionStrategy) String() string

String returns the string representation of the selection strategy.

FramesToMessageConfig configures the FramesToMessageStage behavior.

type FramesToMessageConfig struct {
// CompletionTimeout is how long to wait for all frames of a video.
// If timeout is reached, compose with available frames.
// Default: 30s.
CompletionTimeout time.Duration
// MaxFramesPerMessage limits frames included in the composed message.
// 0 means unlimited.
// Default: 30.
MaxFramesPerMessage int
// FrameSelectionStrategy determines which frames to include when limiting.
// Default: FrameSelectionUniform.
FrameSelectionStrategy FrameSelectionStrategy
// StorageService for externalizing composed images (optional).
StorageService storage.MediaStorageService
}

func DefaultFramesToMessageConfig() FramesToMessageConfig

DefaultFramesToMessageConfig returns sensible defaults for frame composition.

FramesToMessageStage collects extracted frames and composes them into Messages. Elements are correlated by video ID from VideoToFramesStage metadata.

Input: StreamElements with Image and video_frames metadata Output: StreamElement with Message containing composed image Parts[]

Non-frame elements (those without video_frames metadata) are passed through unchanged.

This is an Accumulate stage (N:1 fan-in pattern).

type FramesToMessageStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewFramesToMessageStage(config FramesToMessageConfig) *FramesToMessageStage

NewFramesToMessageStage creates a new frame composition stage.

func (s *FramesToMessageStage) GetConfig() FramesToMessageConfig

GetConfig returns the stage configuration.

func (s *FramesToMessageStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Collects frames and composes them into messages.

HashRouter routes elements based on consistent hashing of a key. This ensures elements with the same key always go to the same destination.

type HashRouter struct {
BaseStage
// contains filtered or unexported fields
}

func NewHashRouter(name string, outputNames []string, keyFunc func(StreamElement) string) *HashRouter

NewHashRouter creates a router that uses consistent hashing. The keyFunc extracts a key from each element (e.g., session ID). Elements with the same key always route to the same destination.

func (r *HashRouter) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process routes elements based on hash of key.

func (r *HashRouter) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name.

ImageData carries image data with metadata. Supports externalization to avoid holding large data in memory.

type ImageData struct {
Data []byte // Raw image data (encoded as JPEG, PNG, etc.)
MIMEType string // MIME type (e.g., "image/jpeg", "image/png")
Width int // Image width in pixels
Height int // Image height in pixels
Format string // Format identifier (e.g., "jpeg", "png", "webp")
// Streaming fields (for realtime video/image streaming)
FrameNum int64 // Frame sequence number (for ordering in streams)
Timestamp time.Time // Frame capture timestamp (for synchronization)
// StorageRef holds a reference to externalized data (when Data is nil).
// Use IsExternalized() to check, Load() to retrieve data.
StorageRef storage.Reference
}

func (d *ImageData) EnsureLoaded(ctx context.Context, store storage.MediaStorageService) ([]byte, error)

EnsureLoaded ensures the image data is loaded into memory. This is a convenience method that calls Load if externalized.

func (d *ImageData) Externalize(ctx context.Context, store storage.MediaStorageService, metadata *storage.MediaMetadata) error

Externalize stores the image data to external storage and clears in-memory data. The StorageRef is updated to point to the stored data.

func (d *ImageData) IsExternalized() bool

IsExternalized returns true if the image data has been externalized to storage. When externalized, Data is nil and StorageRef contains the storage reference.

func (d *ImageData) Load(ctx context.Context, store storage.MediaStorageService) error

Load retrieves externalized image data from storage. Returns immediately if data is already in memory.

ImagePreprocessConfig contains configuration for the image preprocessing stage.

type ImagePreprocessConfig struct {
// Resize configuration for image resizing.
Resize ImageResizeStageConfig
// EnableResize enables image resizing.
// Default: true.
EnableResize bool
}

func DefaultImagePreprocessConfig() ImagePreprocessConfig

DefaultImagePreprocessConfig returns sensible defaults for image preprocessing.

ImagePreprocessStage preprocesses images in messages before sending to providers. This stage processes images directly within Message.Parts[].Media, performing operations like resizing, format conversion, and size optimization.

This is a Transform stage: message with images → message with processed images (1:1)

type ImagePreprocessStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewImagePreprocessStage(config ImagePreprocessConfig) *ImagePreprocessStage

NewImagePreprocessStage creates a new image preprocessing stage.

func (s *ImagePreprocessStage) GetConfig() ImagePreprocessConfig

GetConfig returns the stage configuration.

func (s *ImagePreprocessStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Preprocesses images in messages that flow through the stage.

ImageResizeStage resizes images to fit within configured dimensions. This is useful for reducing image sizes before sending to providers or for normalizing images from different sources.

This is a Transform stage: image element → resized image element (1:1)

type ImageResizeStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewImageResizeStage(config ImageResizeStageConfig) *ImageResizeStage

NewImageResizeStage creates a new image resizing stage.

func (s *ImageResizeStage) GetConfig() ImageResizeStageConfig

GetConfig returns the stage configuration.

func (s *ImageResizeStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Resizes images in each element to fit within the configured dimensions.

ImageResizeStageConfig is an alias for media.ImageResizeConfig. This provides stage-specific naming while avoiding code duplication.

type ImageResizeStageConfig = media.ImageResizeConfig

func DefaultImageResizeStageConfig() ImageResizeStageConfig

DefaultImageResizeStageConfig returns sensible defaults for image resizing.

MapStage transforms elements using a mapping function.

type MapStage struct {
BaseStage
// contains filtered or unexported fields
}
Example

ExampleMapStage demonstrates using a map stage to transform elements.

package main
import (
"context"
"fmt"
"github.com/AltairaLabs/PromptKit/runtime/pipeline/stage"
"github.com/AltairaLabs/PromptKit/runtime/types"
)
func main() {
// Create a map stage that uppercases text
uppercaseStage := stage.NewMapStage("uppercase", func(elem stage.StreamElement) (stage.StreamElement, error) {
if elem.Message != nil {
msg := *elem.Message
msg.Content = "TRANSFORMED: " + msg.Content
elem.Message = &msg
}
return elem, nil
})
// Build pipeline
pipeline, _ := stage.NewPipelineBuilder().
Chain(uppercaseStage).
Build()
// Execute
input := make(chan stage.StreamElement, 1)
input <- stage.NewMessageElement(&types.Message{
Role: "user",
Content: "hello",
})
close(input)
output, _ := pipeline.Execute(context.Background(), input)
for elem := range output {
if elem.Message != nil {
fmt.Printf("%s\n", elem.Message.Content)
}
}
}
TRANSFORMED: hello

func NewMapStage(name string, mapFunc func(StreamElement) (StreamElement, error)) *MapStage

NewMapStage creates a new map stage.

func (ms *MapStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process transforms each element using the map function.

MediaComposeConfig configures the MediaComposeStage behavior.

type MediaComposeConfig struct {
// CompletionTimeout is how long to wait for all parts of a message.
// If timeout is reached, compose with available parts.
// Default: 30s.
CompletionTimeout time.Duration
// StorageService for externalizing composed media (optional).
StorageService storage.MediaStorageService
}

func DefaultMediaComposeConfig() MediaComposeConfig

DefaultMediaComposeConfig returns sensible defaults for media composition.

MediaComposeStage collects processed media and composes back into messages. Elements are correlated by message ID from MediaExtractStage metadata.

Input: StreamElements with Image or Video and extract metadata Output: StreamElement with Message containing composed Parts[]

Non-media elements (those without extract metadata) are passed through unchanged.

This is an Accumulate stage (N:1 fan-in pattern).

type MediaComposeStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewMediaComposeStage(config MediaComposeConfig) *MediaComposeStage

NewMediaComposeStage creates a new media composition stage.

func (s *MediaComposeStage) GetConfig() MediaComposeConfig

GetConfig returns the stage configuration.

func (s *MediaComposeStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Collects media elements and composes them back into messages.

MediaConvertConfig configures the MediaConvertStage behavior.

type MediaConvertConfig struct {
// TargetAudioFormats lists accepted audio MIME types.
// Audio will be converted to the first format if not already supported.
TargetAudioFormats []string
// TargetImageFormats lists accepted image MIME types.
// Images will be converted to the first format if not already supported.
// Supported formats: image/jpeg, image/png
TargetImageFormats []string
// TargetVideoFormats lists accepted video MIME types.
// Video conversion not yet implemented.
TargetVideoFormats []string
// AudioConverterConfig configures audio conversion.
AudioConverterConfig media.AudioConverterConfig
// ImageResizeConfig configures image conversion/resizing.
// Only the Format and Quality fields are used for format conversion.
ImageResizeConfig media.ImageResizeConfig
// PassthroughOnError passes through unconverted content if conversion fails.
// If false, errors are propagated to the pipeline.
// Default: true.
PassthroughOnError bool
}

func DefaultMediaConvertConfig() MediaConvertConfig

DefaultMediaConvertConfig returns sensible defaults for media conversion.

MediaConvertStage converts media content to match target format requirements. This is useful for normalizing media from various sources to match provider capabilities.

This is a Transform stage: element → converted element (1:1)

type MediaConvertStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewMediaConvertStage(config *MediaConvertConfig) *MediaConvertStage

NewMediaConvertStage creates a new media conversion stage.

func (s *MediaConvertStage) GetConfig() MediaConvertConfig

GetConfig returns the stage configuration.

func (*MediaConvertStage) InputCapabilities

Section titled “func (*MediaConvertStage) InputCapabilities”
func (s *MediaConvertStage) InputCapabilities() Capabilities

InputCapabilities implements FormatCapable interface.

func (*MediaConvertStage) OutputCapabilities

Section titled “func (*MediaConvertStage) OutputCapabilities”
func (s *MediaConvertStage) OutputCapabilities() Capabilities

OutputCapabilities implements FormatCapable interface.

func (s *MediaConvertStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Converts media elements to target formats as needed.

MediaExternalizerConfig configures media externalization behavior.

type MediaExternalizerConfig struct {
Enabled bool
StorageService storage.MediaStorageService
SizeThresholdKB int64
DefaultPolicy string
RunID string
SessionID string
ConversationID string
}

MediaExternalizerStage externalizes large media content to external storage.

When messages contain large inline media (images, audio, video), this stage moves the data to external storage and replaces it with a storage reference. This reduces memory usage and allows for media lifecycle management.

Behavior:

  • Skipped if Enabled=false or no StorageService configured
  • Only externalizes media exceeding SizeThresholdKB (base64 size)
  • Preserves media.StorageReference if already externalized
  • Clears media.Data after successful externalization

Configuration:

  • Enabled: master switch for externalization
  • SizeThresholdKB: minimum size to externalize (0 = externalize all)
  • StorageService: where to store media (S3, GCS, local filesystem, etc.)
  • DefaultPolicy: retention policy name for stored media

This is a Transform stage: 1 input element → 1 output element (with externalized media)

type MediaExternalizerStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewMediaExternalizerStage(config *MediaExternalizerConfig) *MediaExternalizerStage

NewMediaExternalizerStage creates a media externalizer stage.

func (s *MediaExternalizerStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process externalizes media from messages if they exceed size threshold.

MediaExtractConfig configures the MediaExtractStage behavior.

type MediaExtractConfig struct {
// ExtractImages enables image extraction.
// Default: true.
ExtractImages bool
// ExtractVideos enables video extraction.
// Default: true.
ExtractVideos bool
// PreserveStorageRefs when true, keeps storage references without loading data.
// This enables lazy loading where data is only fetched when needed.
// Default: true.
PreserveStorageRefs bool
// StorageService for loading externalized media (optional).
// Only needed if PreserveStorageRefs is false and media has storage references.
StorageService storage.MediaStorageService
}

func DefaultMediaExtractConfig() MediaExtractConfig

DefaultMediaExtractConfig returns sensible defaults for media extraction.

MediaExtractStage extracts media from messages into individual StreamElements. This enables batch processing of images/videos through separate pipeline stages.

Input: StreamElement with Message containing Parts[] Output: Multiple StreamElements with Image or Video, preserving correlation metadata

For messages without media, the element is passed through unchanged.

This is a Transform stage with fan-out behavior (1 message → N media elements).

type MediaExtractStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewMediaExtractStage(config MediaExtractConfig) *MediaExtractStage

NewMediaExtractStage creates a new media extraction stage.

func (s *MediaExtractStage) GetConfig() MediaExtractConfig

GetConfig returns the stage configuration.

func (s *MediaExtractStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Extracts media from messages and emits individual elements for each.

MergeStage merges multiple input channels into a single output channel. This enables fan-in patterns where multiple stages feed into one.

This is an Accumulate stage type that handles multiple inputs (N:1 merge).

type MergeStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewMergeStage(name string, inputCount int) *MergeStage

NewMergeStage creates a new merge stage that merges N inputs into 1 output.

func (s *MergeStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface (single input). For merge stage, this is not typically used - use ProcessMultiple instead.

func (s *MergeStage) ProcessMultiple(ctx context.Context, inputs []<-chan StreamElement, output chan<- StreamElement) error

ProcessMultiple processes multiple input channels and merges them into one output. This is a special method for merge stages that differs from the standard Process signature.

MetricsStage wraps another stage and collects metrics about its performance. This is a transparent wrapper that doesn’t modify element flow.

type MetricsStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewMetricsStage(wrappedStage Stage) *MetricsStage

NewMetricsStage wraps a stage with metrics collection.

func (s *MetricsStage) GetMetrics() StageMetrics

GetMetrics returns the collected metrics.

func (s *MetricsStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface with metrics collection.

PassthroughStage is a simple stage that passes all elements through unchanged. Useful for testing or as a placeholder.

type PassthroughStage struct {
BaseStage
}

func NewPassthroughStage(name string) *PassthroughStage

NewPassthroughStage creates a new passthrough stage.

func (ps *PassthroughStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process passes all elements through unchanged.

PipelineBuilder constructs a pipeline DAG. It provides methods for creating linear chains and branching topologies.

type PipelineBuilder struct {
// contains filtered or unexported fields
}
Example

ExamplePipelineBuilder demonstrates building a simple linear pipeline.

package main
import (
"context"
"fmt"
"github.com/AltairaLabs/PromptKit/runtime/pipeline/stage"
"github.com/AltairaLabs/PromptKit/runtime/types"
)
func main() {
// Create some simple stages
inputStage := stage.NewPassthroughStage("input")
processStage := stage.NewPassthroughStage("process")
outputStage := stage.NewPassthroughStage("output")
// Build a linear pipeline
pipeline, err := stage.NewPipelineBuilder().
Chain(inputStage, processStage, outputStage).
Build()
if err != nil {
fmt.Printf("Error building pipeline: %v\n", err)
return
}
// Create input channel with a message
input := make(chan stage.StreamElement, 1)
input <- stage.NewMessageElement(&types.Message{
Role: "user",
Content: "Hello, world!",
})
close(input)
// Execute pipeline
ctx := context.Background()
output, err := pipeline.Execute(ctx, input)
if err != nil {
fmt.Printf("Error executing pipeline: %v\n", err)
return
}
// Consume output
for elem := range output {
if elem.Message != nil {
fmt.Printf("Received message: %s\n", elem.Message.Content)
}
}
}
Received message: Hello, world!

Example (With Config)

ExamplePipelineBuilder_withConfig demonstrates building a pipeline with custom configuration.

package main
import (
"fmt"
"github.com/AltairaLabs/PromptKit/runtime/pipeline/stage"
)
func main() {
// Create custom config
config := stage.DefaultPipelineConfig().
WithChannelBufferSize(32).
WithPriorityQueue(true).
WithMetrics(true)
// Build pipeline with config
pipeline, err := stage.NewPipelineBuilderWithConfig(config).
Chain(
stage.NewPassthroughStage("stage1"),
stage.NewPassthroughStage("stage2"),
).
Build()
if err != nil {
fmt.Printf("Error: %v\n", err)
return
}
fmt.Printf("Pipeline created with %d stages\n", 2)
_ = pipeline
}
Pipeline created with 2 stages

func NewPipelineBuilder() *PipelineBuilder

NewPipelineBuilder creates a new PipelineBuilder with default configuration.

func NewPipelineBuilderWithConfig(config *PipelineConfig) *PipelineBuilder

NewPipelineBuilderWithConfig creates a new PipelineBuilder with custom configuration.

func (b *PipelineBuilder) AddStage(stage Stage) *PipelineBuilder

AddStage adds a stage to the builder without connecting it. This is useful when building complex topologies manually.

func (b *PipelineBuilder) Branch(fromStage string, toStages ...string) *PipelineBuilder

Branch creates multiple outgoing connections from a single stage. This allows one stage’s output to fan out to multiple downstream stages.

Example:

pipeline := NewPipelineBuilder().
Chain(NewStageA(), NewStageB()).
Branch("stageB", "stageC", "stageD"). // B's output goes to both C and D
Build()

func (b *PipelineBuilder) Build() (*StreamPipeline, error)

Build constructs the pipeline from the builder’s configuration. It validates the pipeline structure and returns an error if invalid.

func (b *PipelineBuilder) Chain(stages ...Stage) *PipelineBuilder

Chain creates a linear chain of stages. This is the most common pattern: stage1 -> stage2 -> stage3. Each stage’s output is connected to the next stage’s input.

Example:

pipeline := NewPipelineBuilder().
Chain(
NewStageA(),
NewStageB(),
NewStageC(),
).
Build()

func (b *PipelineBuilder) Clone() *PipelineBuilder

Clone creates a deep copy of the builder.

func (b *PipelineBuilder) Connect(fromStage, toStage string) *PipelineBuilder

Connect creates a directed edge from one stage to another. The output of fromStage will be connected to the input of toStage.

func (b *PipelineBuilder) WithConfig(config *PipelineConfig) *PipelineBuilder

WithConfig sets the pipeline configuration.

func (b *PipelineBuilder) WithEventEmitter(emitter *events.Emitter) *PipelineBuilder

WithEventEmitter sets the event emitter for the pipeline.

PipelineConfig defines configuration options for pipeline execution.

type PipelineConfig struct {
// ChannelBufferSize controls buffering between stages.
// Smaller values = lower latency but more backpressure.
// Larger values = higher throughput but more memory usage.
// Default: 16
ChannelBufferSize int
// PriorityQueueEnabled enables priority-based scheduling.
// When enabled, high-priority elements (audio) are processed before low-priority (logs).
// Default: false
PriorityQueueEnabled bool
// MaxConcurrentPipelines limits the number of concurrent pipeline executions.
// This is used by PipelinePool to control concurrency.
// Default: 100
MaxConcurrentPipelines int
// ExecutionTimeout sets the maximum duration for a single pipeline execution.
// Set to 0 to disable timeout.
// Default: 30 seconds
ExecutionTimeout time.Duration
// GracefulShutdownTimeout sets the maximum time to wait for in-flight executions during shutdown.
// Default: 10 seconds
GracefulShutdownTimeout time.Duration
// EnableMetrics enables collection of per-stage metrics (latency, throughput, etc.).
// Default: false
EnableMetrics bool
// EnableTracing enables detailed tracing of element flow through stages.
// Default: false (can be expensive for high-throughput pipelines)
EnableTracing bool
// PrometheusEnabled enables Prometheus metrics export via HTTP.
// Default: false
PrometheusEnabled bool
// PrometheusAddr is the address to serve Prometheus metrics on (e.g., ":9090").
// Only used when PrometheusEnabled is true.
// Default: ":9090"
PrometheusAddr string
}

func DefaultPipelineConfig() *PipelineConfig

DefaultPipelineConfig returns a PipelineConfig with sensible defaults.

func (c *PipelineConfig) Validate() error

Validate checks if the configuration is valid.

func (*PipelineConfig) WithChannelBufferSize

Section titled “func (*PipelineConfig) WithChannelBufferSize”
func (c *PipelineConfig) WithChannelBufferSize(size int) *PipelineConfig

WithChannelBufferSize sets the channel buffer size.

func (*PipelineConfig) WithExecutionTimeout

Section titled “func (*PipelineConfig) WithExecutionTimeout”
func (c *PipelineConfig) WithExecutionTimeout(timeout time.Duration) *PipelineConfig

WithExecutionTimeout sets the execution timeout.

func (*PipelineConfig) WithGracefulShutdownTimeout

Section titled “func (*PipelineConfig) WithGracefulShutdownTimeout”
func (c *PipelineConfig) WithGracefulShutdownTimeout(timeout time.Duration) *PipelineConfig

WithGracefulShutdownTimeout sets the graceful shutdown timeout.

func (*PipelineConfig) WithMaxConcurrentPipelines

Section titled “func (*PipelineConfig) WithMaxConcurrentPipelines”
func (c *PipelineConfig) WithMaxConcurrentPipelines(maxPipelines int) *PipelineConfig

WithMaxConcurrentPipelines sets the maximum number of concurrent pipeline executions.

func (c *PipelineConfig) WithMetrics(enabled bool) *PipelineConfig

WithMetrics enables or disables metrics collection.

func (c *PipelineConfig) WithPriorityQueue(enabled bool) *PipelineConfig

WithPriorityQueue enables or disables priority-based scheduling.

func (*PipelineConfig) WithPrometheusExporter

Section titled “func (*PipelineConfig) WithPrometheusExporter”
func (c *PipelineConfig) WithPrometheusExporter(addr string) *PipelineConfig

WithPrometheusExporter enables Prometheus metrics export at the given address. The address should be in the format ":port" or “host:port”. Example: “:9090” or “localhost:9090”

func (c *PipelineConfig) WithTracing(enabled bool) *PipelineConfig

WithTracing enables or disables detailed tracing.

Priority defines the scheduling priority for stream elements. Higher priority elements are processed before lower priority ones.

type Priority int

const (
// PriorityLow is for non-critical data like logs or metrics
PriorityLow Priority = iota
// PriorityNormal is the default priority for most elements
PriorityNormal
// PriorityHigh is for real-time audio/video that requires low latency
PriorityHigh
// PriorityCritical is for control signals, errors, and system messages
PriorityCritical
)

PriorityChannel is a channel that supports priority-based element delivery. Higher priority elements are delivered before lower priority elements.

type PriorityChannel struct {
// contains filtered or unexported fields
}

func NewPriorityChannel(capacity int) *PriorityChannel

NewPriorityChannel creates a new priority channel with the given capacity.

func (pc *PriorityChannel) Close()

Close closes the priority channel.

func (pc *PriorityChannel) Len() int

Len returns the current number of elements in the channel.

func (pc *PriorityChannel) Receive(ctx context.Context) (StreamElement, bool, error)

Receive receives the highest priority element from the channel. Blocks if the channel is empty.

func (pc *PriorityChannel) Send(ctx context.Context, elem StreamElement) error

Send sends an element to the priority channel. Blocks if the channel is at capacity.

PromptAssemblyStage loads and assembles prompts from the prompt registry. It enriches elements with system prompt, allowed tools, and variables.

type PromptAssemblyStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewPromptAssemblyStage(promptRegistry *prompt.Registry, taskType string, baseVariables map[string]string) *PromptAssemblyStage

NewPromptAssemblyStage creates a new prompt assembly stage.

func (s *PromptAssemblyStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process loads and assembles the prompt, enriching elements with prompt data.

ProviderConfig contains configuration for the provider stage.

type ProviderConfig struct {
MaxTokens int
Temperature float32
Seed *int
ResponseFormat *providers.ResponseFormat // Optional response format (JSON mode)
}

ProviderStage executes LLM calls and handles tool execution. This is the request/response mode implementation.

type ProviderStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewProviderStage(provider providers.Provider, toolRegistry *tools.Registry, toolPolicy *pipeline.ToolPolicy, config *ProviderConfig) *ProviderStage

NewProviderStage creates a new provider stage for request/response mode.

func NewProviderStageWithEmitter(provider providers.Provider, toolRegistry *tools.Registry, toolPolicy *pipeline.ToolPolicy, config *ProviderConfig, emitter *events.Emitter) *ProviderStage

NewProviderStageWithEmitter creates a new provider stage with event emission support. The emitter is used to emit provider.call.started, provider.call.completed, and provider.call.failed events for observability and session recording.

func (s *ProviderStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process executes the LLM provider call and handles tool execution.

QuerySourceType defines how to construct the relevance query.

type QuerySourceType string

const (
// QuerySourceLastUser uses the last user message as the query
QuerySourceLastUser QuerySourceType = "last_user"
// QuerySourceLastN concatenates the last N messages as the query
QuerySourceLastN QuerySourceType = "last_n"
// QuerySourceCustom uses a custom query string
QuerySourceCustom QuerySourceType = "custom"
)

RandomRouter distributes elements randomly across outputs.

type RandomRouter struct {
BaseStage
// contains filtered or unexported fields
}

func NewRandomRouter(name string, outputNames []string) *RandomRouter

NewRandomRouter creates a router that distributes elements randomly.

func (r *RandomRouter) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process distributes elements randomly.

func (r *RandomRouter) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name.

RecordingPosition indicates where in the pipeline the recording stage is placed.

type RecordingPosition string

const (
// RecordingPositionInput records elements entering the pipeline (user input).
RecordingPositionInput RecordingPosition = "input"
// RecordingPositionOutput records elements leaving the pipeline (agent output).
RecordingPositionOutput RecordingPosition = "output"
)

RecordingStage captures pipeline elements as events for session recording. It observes elements flowing through without modifying them.

type RecordingStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewRecordingStage(eventBus *events.EventBus, config RecordingStageConfig) *RecordingStage

NewRecordingStage creates a new recording stage.

func (rs *RecordingStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process observes elements and records them as events.

func (rs *RecordingStage) WithConversationID(conversationID string) *RecordingStage

WithConversationID sets the conversation ID for recorded events.

func (rs *RecordingStage) WithSessionID(sessionID string) *RecordingStage

WithSessionID sets the session ID for recorded events.

RecordingStageConfig configures the recording stage behavior.

type RecordingStageConfig struct {
// Position indicates where this stage is in the pipeline.
Position RecordingPosition
// SessionID is the session identifier for recorded events.
SessionID string
// ConversationID groups events within a session.
ConversationID string
// IncludeAudio records audio data (may be large).
IncludeAudio bool
// IncludeVideo records video data (may be large).
IncludeVideo bool
// IncludeImages records image data.
IncludeImages bool
}

func DefaultRecordingStageConfig() RecordingStageConfig

DefaultRecordingStageConfig returns sensible defaults.

RelevanceConfig configures embedding-based relevance truncation. Used when TruncationStrategy is TruncateLeastRelevant.

type RelevanceConfig struct {
// EmbeddingProvider generates embeddings for similarity scoring.
// Required for relevance-based truncation; if nil, falls back to oldest.
EmbeddingProvider providers.EmbeddingProvider
// MinRecentMessages always keeps the N most recent messages
// regardless of relevance score. Default: 3
MinRecentMessages int
// AlwaysKeepSystemRole keeps all system role messages regardless of score.
AlwaysKeepSystemRole bool
// SimilarityThreshold is the minimum score to consider a message relevant (0.0-1.0).
// Messages below this threshold may be dropped first.
SimilarityThreshold float64
// QuerySource determines what text to compare messages against.
// Default: QuerySourceLastUser
QuerySource QuerySourceType
// LastNCount is the number of messages to use when QuerySource is QuerySourceLastN.
// Default: 3
LastNCount int
// CustomQuery is the query text when QuerySource is QuerySourceCustom.
CustomQuery string
// CacheEmbeddings enables caching of embeddings across truncation calls.
// Useful when context changes incrementally.
CacheEmbeddings bool
}

Response represents a response message (for compatibility with existing pipeline).

type Response struct {
Role string
Content string
Parts []types.ContentPart
ToolCalls []types.MessageToolCall
FinalResponse string
}

ResponseVADConfig configures the ResponseVADStage.

type ResponseVADConfig struct {
// VAD is the voice activity detector.
// If nil, a SimpleVAD with default params is created.
VAD audio.VADAnalyzer
// SilenceDuration is how long silence must persist after EndOfStream
// to confirm turn completion.
// Default: 500ms
SilenceDuration time.Duration
// MaxWaitDuration is the maximum time to wait for silence after EndOfStream.
// If silence is not detected within this time, EndOfStream is emitted anyway.
// Default: 3s
MaxWaitDuration time.Duration
// SampleRate is the expected audio sample rate.
// Default: 24000 (Gemini output)
SampleRate int
}

func DefaultResponseVADConfig() ResponseVADConfig

DefaultResponseVADConfig returns sensible defaults for ResponseVADStage.

ResponseVADStage monitors response audio for silence and delays EndOfStream until actual silence is detected. This decouples turn completion from provider signaling (e.g., Gemini’s turnComplete) which may arrive before all audio chunks have been received.

This stage: 1. Passes through all elements immediately (audio, text, messages) 2. When EndOfStream is received from upstream, starts monitoring for silence 3. Only emits EndOfStream downstream when VAD confirms sustained silence 4. Has a max wait timeout to prevent indefinite blocking

This is a Transform stage with buffering: it may hold EndOfStream temporarily.

type ResponseVADStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewResponseVADStage(config ResponseVADConfig) (*ResponseVADStage, error)

NewResponseVADStage creates a new response VAD stage.

func (s *ResponseVADStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Monitors response audio for silence and delays EndOfStream until confirmed.

RoundRobinRouter distributes elements across outputs in sequence.

type RoundRobinRouter struct {
BaseStage
// contains filtered or unexported fields
}

func NewRoundRobinRouter(name string, outputNames []string) *RoundRobinRouter

NewRoundRobinRouter creates a router that cycles through outputs sequentially.

func (r *RoundRobinRouter) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process distributes elements in round-robin fashion.

func (r *RoundRobinRouter) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name.

RouterFunc determines which output channel(s) to route an element to. Returns a slice of output names. Empty slice means drop the element.

type RouterFunc func(elem *StreamElement) []string

RouterStage routes elements to different output channels based on a routing function. This enables conditional branching and dynamic routing in the pipeline.

This is a special stage type that supports multiple outputs (1:N routing).

type RouterStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewRouterStage(name string, routerFunc RouterFunc) *RouterStage

NewRouterStage creates a new router stage with the given routing function.

func (s *RouterStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Routes each element to appropriate output channel(s) based on routing function.

func (s *RouterStage) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name. This must be called before Process() to set up routing destinations.

RoutingRule defines a predicate-based routing rule.

type RoutingRule struct {
// Name identifies this rule for logging/debugging.
Name string
// Predicate returns true if the element should be routed to this rule's output.
Predicate func(StreamElement) bool
// Output is the destination name for matching elements.
Output string
}

func RouteAudio(output string, format AudioFormat) RoutingRule

RouteAudio creates a routing rule for audio elements with specific format.

func RouteContentType(output string, ct ContentType) RoutingRule

RouteContentType creates a routing rule for elements of a specific content type.

func RouteWhen(output string, predicate func(StreamElement) bool) RoutingRule

RouteWhen creates a routing rule with the given predicate.

STTStage transcribes audio to text using a speech-to-text service.

This is a Transform stage: audio element → text element (1:1)

type STTStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewSTTStage(service stt.Service, config STTStageConfig) *STTStage

NewSTTStage creates a new STT stage.

func (s *STTStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Transcribes audio elements to text.

STTStageConfig configures the STTStage.

type STTStageConfig struct {
// Language hint for transcription (e.g., "en")
Language string
// SkipEmpty skips transcription for empty audio
SkipEmpty bool
// MinAudioBytes is minimum audio size to transcribe
MinAudioBytes int
}

func DefaultSTTStageConfig() STTStageConfig

DefaultSTTStageConfig returns sensible defaults.

ScoredMessage pairs a message with its relevance score and metadata. Used during relevance-based truncation to track which messages to keep.

type ScoredMessage struct {
// Index is the original position in the message slice
Index int
// Message is the actual message content
Message types.Message
// Score is the cosine similarity to the query (0.0 to 1.0)
Score float64
// IsProtected indicates if this message should always be kept
// (e.g., recent messages or system messages)
IsProtected bool
// TokenCount is the estimated token count for this message
TokenCount int
}

ScoredMessages is a sortable slice of ScoredMessage.

type ScoredMessages []ScoredMessage

func (s ScoredMessages) Len() int

func (s ScoredMessages) Less(i, j int) bool

func (s ScoredMessages) Swap(i, j int)

Stage is a processing unit in the pipeline DAG. Unlike traditional middleware, stages explicitly declare their I/O characteristics and operate on channels of StreamElements, enabling true streaming execution.

Stages read from an input channel, process elements, and write to an output channel. The stage MUST close the output channel when done (or when input closes).

Example implementation:

type ExampleStage struct {
name string
}
func (s *ExampleStage) Name() string {
return s.name
}
func (s *ExampleStage) Type() StageType {
return StageTypeTransform
}
func (s *ExampleStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error {
defer close(output)
for elem := range input {
// Process element
processedElem := s.transform(elem)
// Write to output
select {
case output <- processedElem:
case <-ctx.Done():
return ctx.Err()
}
}
return nil
}
type Stage interface {
// Name returns a unique identifier for this stage.
// This is used for logging, tracing, and debugging.
Name() string
// Type returns the stage's processing model.
// This helps the pipeline builder understand how the stage behaves.
Type() StageType
// Process is called once when the pipeline starts.
// The stage reads from input, processes elements, and writes to output.
// The stage MUST close output when done (or when input closes).
// Returns an error if processing fails.
Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error
}

StageError wraps an error with stage information.

type StageError struct {
StageName string
StageType StageType
Err error
}

func NewStageError(stageName string, stageType StageType, err error) *StageError

NewStageError creates a new StageError.

func (e *StageError) Error() string

Error returns the error message.

func (e *StageError) Unwrap() error

Unwrap returns the underlying error.

StageFunc is a functional adapter that allows using a function as a Stage. This is useful for simple transformations without defining a new type.

type StageFunc struct {
BaseStage
// contains filtered or unexported fields
}

func NewStageFunc(name string, stageType StageType, fn func(context.Context, <-chan StreamElement, chan<- StreamElement) error) *StageFunc

NewStageFunc creates a new functional stage.

func (sf *StageFunc) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process executes the stage function.

StageMetrics contains performance metrics for a stage.

type StageMetrics struct {
StageName string
ElementsIn int64
ElementsOut int64
ElementsErrored int64
TotalLatency time.Duration
MinLatency time.Duration
MaxLatency time.Duration
AvgLatency time.Duration
LastUpdated time.Time
// contains filtered or unexported fields
}

func NewStageMetrics(stageName string) *StageMetrics

NewStageMetrics creates a new metrics collector for a stage.

func (m *StageMetrics) GetMetrics() StageMetrics

GetMetrics returns a copy of the current metrics (thread-safe).

func (m *StageMetrics) RecordElement(latency time.Duration, hasError bool)

RecordElement records metrics for a processed element.

func (m *StageMetrics) Reset()

Reset resets all metrics to zero.

StageType defines the processing model of a stage.

type StageType int

const (
// StageTypeTransform performs 1:1 or 1:N element transformation.
// Each input element produces one or more output elements.
// Examples: validation, prompt assembly, text formatting.
StageTypeTransform StageType = iota
// StageTypeAccumulate performs N:1 accumulation.
// Multiple input elements are collected and combined into one output element.
// Examples: VAD buffering, message accumulation.
StageTypeAccumulate
// StageTypeGenerate performs 0:N generation.
// Generates output elements without consuming input (or consumes once then generates many).
// Examples: LLM streaming response, TTS generation.
StageTypeGenerate
// StageTypeSink is a terminal stage (N:0).
// Consumes input elements but produces no output.
// Examples: state store save, metrics collection, logging.
StageTypeSink
// StageTypeBidirectional supports full duplex communication.
// Both reads from input and writes to output concurrently.
// Examples: WebSocket session, duplex provider.
StageTypeBidirectional
)

func (st StageType) String() string

String returns the string representation of the stage type.

StateStoreLoadStage loads conversation history from state store.

type StateStoreLoadStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewStateStoreLoadStage(config *pipeline.StateStoreConfig) *StateStoreLoadStage

NewStateStoreLoadStage creates a new state store load stage.

func (s *StateStoreLoadStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process loads conversation history and emits it before current input.

StateStoreSaveStage saves conversation state to state store.

type StateStoreSaveStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewStateStoreSaveStage(config *pipeline.StateStoreConfig) *StateStoreSaveStage

NewStateStoreSaveStage creates a new state store save stage.

func (s *StateStoreSaveStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process collects all messages and saves them to state store.

StreamElement is the unit of data flowing through the pipeline. It can carry different types of content and supports backpressure. Each element should contain at most one content type.

type StreamElement struct {
// Content types (at most one should be set per element)
Text *string // Text content
Audio *AudioData // Audio samples
Video *VideoData // Video frame
Image *ImageData // Image data
Message *types.Message // Complete message
ToolCall *types.MessageToolCall // Tool invocation
Part *types.ContentPart // Generic content part (text, image, audio, video)
MediaData *types.MediaContent // Media content with MIME type
// Metadata
Sequence int64 // Monotonic sequence number
Timestamp time.Time // When element was created
Source string // Stage that produced this element
Priority Priority // Scheduling priority (for QoS)
Metadata map[string]interface{} // Additional metadata for passing data between stages
// Control signals
EndOfStream bool // No more elements after this
Error error // Error propagation
}
Example

ExampleStreamElement demonstrates creating different types of stream elements.

package main
import (
"fmt"
"github.com/AltairaLabs/PromptKit/runtime/pipeline/stage"
"github.com/AltairaLabs/PromptKit/runtime/types"
)
func main() {
// Text element
textElem := stage.NewTextElement("Hello")
fmt.Printf("Text element: %v\n", *textElem.Text)
// Message element
msgElem := stage.NewMessageElement(&types.Message{
Role: "user",
Content: "Hello",
})
fmt.Printf("Message element: %s\n", msgElem.Message.Content)
// Error element
errElem := stage.NewErrorElement(fmt.Errorf("test error"))
fmt.Printf("Error element: %v\n", errElem.Error)
}
Text element: Hello
Message element: Hello
Error element: test error

func GetAudioElement(audio *AudioData) *StreamElement

GetAudioElement retrieves a StreamElement from the pool and initializes it with audio data. This is a pooled alternative to NewAudioElement.

func GetElement() *StreamElement

GetElement retrieves a StreamElement from the pool or creates a new one. The returned element is reset to its zero state with an initialized Metadata map. Callers should use PutElement when the element is no longer needed.

func GetEndOfStreamElement() *StreamElement

GetEndOfStreamElement retrieves a StreamElement from the pool and marks it as end-of-stream. This is a pooled alternative to NewEndOfStreamElement.

func GetErrorElement(err error) *StreamElement

GetErrorElement retrieves a StreamElement from the pool and initializes it with an error. This is a pooled alternative to NewErrorElement.

func GetImageElement(image *ImageData) *StreamElement

GetImageElement retrieves a StreamElement from the pool and initializes it with image data. This is a pooled alternative to NewImageElement.

func GetMessageElement(msg *types.Message) *StreamElement

GetMessageElement retrieves a StreamElement from the pool and initializes it with a message. This is a pooled alternative to NewMessageElement.

func GetTextElement(text string) *StreamElement

GetTextElement retrieves a StreamElement from the pool and initializes it with text content. This is a pooled alternative to NewTextElement.

func GetVideoElement(video *VideoData) *StreamElement

GetVideoElement retrieves a StreamElement from the pool and initializes it with video data. This is a pooled alternative to NewVideoElement.

func NewAudioElement(audio *AudioData) StreamElement

NewAudioElement creates a new StreamElement with audio data.

func NewEndOfStreamElement() StreamElement

NewEndOfStreamElement creates a new StreamElement marking end of stream.

func NewErrorElement(err error) StreamElement

NewErrorElement creates a new StreamElement with an error.

func NewImageElement(image *ImageData) StreamElement

NewImageElement creates a new StreamElement with image data.

func NewMessageElement(msg *types.Message) StreamElement

NewMessageElement creates a new StreamElement with a message.

func NewTextElement(text string) StreamElement

NewTextElement creates a new StreamElement with text content.

func NewVideoElement(video *VideoData) StreamElement

NewVideoElement creates a new StreamElement with video data.

func (e *StreamElement) GetMetadata(key string) interface{}

GetMetadata retrieves metadata by key, returning nil if not found.

func (e *StreamElement) HasContent() bool

HasContent returns true if the element contains any content (excluding control signals).

func (e *StreamElement) IsControl() bool

IsControl returns true if the element is a control signal (error or end-of-stream).

func (e *StreamElement) IsEmpty() bool

IsEmpty returns true if the element contains no content.

func (e *StreamElement) Reset()

Reset clears all fields of the StreamElement to their zero values. This is called automatically by PutElement before returning to the pool. The Metadata map is cleared but retained to avoid reallocation.

func (e *StreamElement) WithMetadata(key string, value interface{}) *StreamElement

WithMetadata adds metadata to this element.

func (e *StreamElement) WithPriority(priority Priority) *StreamElement

WithPriority sets the priority for this element.

func (e *StreamElement) WithSequence(seq int64) *StreamElement

WithSequence sets the sequence number for this element.

func (e *StreamElement) WithSource(source string) *StreamElement

WithSource sets the source stage name for this element.

StreamPipeline represents an executable pipeline of stages. It manages the DAG of stages, creates channels between them, and orchestrates execution.

type StreamPipeline struct {
// contains filtered or unexported fields
}

func (p *StreamPipeline) Execute(ctx context.Context, input <-chan StreamElement) (<-chan StreamElement, error)

Execute starts the pipeline execution with the given input channel. Returns an output channel that will receive all elements from terminal stages. The pipeline executes in background goroutines and closes the output channel when complete.

func (p *StreamPipeline) ExecuteSync(ctx context.Context, input ...StreamElement) (*ExecutionResult, error)

ExecuteSync runs the pipeline synchronously and returns the accumulated result. This is a convenience method for request/response mode where you want a single result. It converts the streaming execution into a blocking call.

func (p *StreamPipeline) Shutdown(ctx context.Context) error

Shutdown gracefully shuts down the pipeline, waiting for in-flight executions to complete.

TTSConfig contains configuration for TTS stage.

type TTSConfig struct {
// SkipEmpty skips synthesis for empty or whitespace-only text
SkipEmpty bool
// MinTextLength is the minimum text length to synthesize (0 = no minimum)
MinTextLength int
}

func DefaultTTSConfig() TTSConfig

DefaultTTSConfig returns sensible defaults for TTS configuration.

TTSService converts text to audio.

type TTSService interface {
// Synthesize converts text to audio bytes.
Synthesize(ctx context.Context, text string) ([]byte, error)
// MIMEType returns the MIME type of the synthesized audio.
MIMEType() string
}

TTSStage synthesizes audio for streaming text elements. It reads text elements from input and adds audio data to them.

This is a Transform stage: text element → text+audio element (1:1)

type TTSStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewTTSStage(tts TTSService, config TTSConfig) *TTSStage

NewTTSStage creates a new TTS stage.

func (s *TTSStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Synthesizes audio for each text element and adds it to the element.

TTSStageWithInterruption synthesizes text to audio with interruption support. When the user starts speaking (detected via shared InterruptionHandler), synthesis is stopped and pending output is discarded.

This is a Transform stage: text element → audio element (1:1)

type TTSStageWithInterruption struct {
BaseStage
// contains filtered or unexported fields
}

func NewTTSStageWithInterruption(service tts.Service, config TTSStageWithInterruptionConfig) *TTSStageWithInterruption

NewTTSStageWithInterruption creates a new TTS stage with interruption support.

func (s *TTSStageWithInterruption) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Synthesizes audio for text elements with interruption support.

TTSStageWithInterruptionConfig configures TTSStageWithInterruption.

type TTSStageWithInterruptionConfig struct {
// Voice is the voice ID to use
Voice string
// Speed is the speech rate (0.5-2.0)
Speed float64
// InterruptionHandler for detecting user interrupts during TTS output.
// Should be shared with AudioTurnStage.
InterruptionHandler *audio.InterruptionHandler
// SkipEmpty skips synthesis for empty text
SkipEmpty bool
// MinTextLength is minimum text length to synthesize
MinTextLength int
}

func DefaultTTSStageWithInterruptionConfig

Section titled “func DefaultTTSStageWithInterruptionConfig”
func DefaultTTSStageWithInterruptionConfig() TTSStageWithInterruptionConfig

DefaultTTSStageWithInterruptionConfig returns sensible defaults.

TemplateStage substitutes {{variable}} placeholders in messages and metadata.

This stage reads variables from the element’s metadata[“variables”] map and replaces all occurrences of {{variable_name}} in:

  • metadata[“system_prompt”] - the system prompt for the LLM
  • message.Content - the message text content
  • message.Parts[].Text - individual content parts

Variables are typically set by:

  • PromptAssemblyStage (from base_variables in config)
  • VariableProviderStage (from dynamic variable providers)

Example:

Input: "Hello {{name}}, the topic is {{topic}}"
Variables: {"name": "Alice", "topic": "AI"}
Output: "Hello Alice, the topic is AI"

This is a Transform stage: 1 input element → 1 output element

type TemplateStage struct {
BaseStage
}

func NewTemplateStage() *TemplateStage

NewTemplateStage creates a template substitution stage.

func (s *TemplateStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process substitutes variables in messages and system prompt metadata.

TracingStage wraps another stage and adds element-level tracing. Each element gets a trace ID and timing information.

type TracingStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewTracingStage(wrappedStage Stage, traceIDGen func() string) *TracingStage

NewTracingStage wraps a stage with tracing support.

func (s *TracingStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface with tracing.

Transcriber converts audio bytes to text. Follows Go naming convention for single-method interfaces.

type Transcriber interface {
Transcribe(ctx context.Context, audio []byte) (string, error)
}

TruncationStrategy defines how to handle messages when over token budget.

type TruncationStrategy string

const (
// TruncateOldest drops oldest messages first
TruncateOldest TruncationStrategy = "oldest"
// TruncateLeastRelevant drops least relevant messages (requires embeddings)
TruncateLeastRelevant TruncationStrategy = "relevance"
// TruncateSummarize compresses old messages into summaries
TruncateSummarize TruncationStrategy = "summarize"
// TruncateFail returns error if over budget
TruncateFail TruncationStrategy = "fail"
)

VADAccumulatorStage reads streaming audio chunks, detects turn boundaries via VAD, and emits a single Message element with the transcribed text.

This is an Accumulate stage: N audio chunks → 1 message element

type VADAccumulatorStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewVADAccumulatorStage(analyzer audio.VADAnalyzer, transcriber Transcriber, config VADConfig) *VADAccumulatorStage

NewVADAccumulatorStage creates a new VAD accumulator stage.

func (s *VADAccumulatorStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Accumulates audio chunks until turn complete, then transcribes and emits a message.

VADConfig contains configuration for VAD accumulator stage.

type VADConfig struct {
// Threshold for silence detection (0.0 = silence, 1.0 = speech)
Threshold float64
// MinSpeechDuration is the minimum duration of speech before turn can complete
MinSpeechDuration time.Duration
// MaxTurnDuration is the maximum duration before forcing turn completion
MaxTurnDuration time.Duration
// SilenceDuration is how long silence must persist to trigger turn complete
SilenceDuration time.Duration
}

func DefaultVADConfig() VADConfig

DefaultVADConfig returns sensible defaults for VAD configuration.

ValidationStage validates responses using configured validators.

type ValidationStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewValidationStage(registry *validators.Registry, suppressExceptions bool) *ValidationStage

NewValidationStage creates a new validation stage.

func (s *ValidationStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process validates response elements and attaches results to metadata.

VariableProviderStage resolves variables from dynamic providers and adds them to metadata.

This stage calls each registered variable provider to fetch dynamic variables (e.g., from environment, external services, databases) and merges them into the element’s metadata[“variables”] map for use by TemplateStage.

Provider resolution order:

  1. Variables from earlier stages (e.g., PromptAssemblyStage base_variables)
  2. Each provider is called in sequence; later providers can override earlier values

Error handling:

  • If any provider fails, the stage returns an error and aborts the pipeline
  • This ensures variable resolution failures are surfaced early

Example providers:

  • Environment variable provider: reads from OS environment
  • Config provider: reads from configuration files
  • External API provider: fetches user context from external services

This is a Transform stage: 1 input element → 1 output element (with enriched metadata)

type VariableProviderStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewVariableProviderStage(providers ...variables.Provider) *VariableProviderStage

NewVariableProviderStage creates a variable provider stage.

func (s *VariableProviderStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process resolves variables from all providers and merges them into element metadata.

VideoData carries video frame data with metadata. Supports externalization to avoid holding large data in memory.

type VideoData struct {
Data []byte // Raw video frame data or encoded video segment
MIMEType string // MIME type (e.g., "video/mp4", "video/webm")
Width int // Frame width in pixels
Height int // Frame height in pixels
FrameRate float64 // Frames per second
Duration time.Duration // Duration of the video segment
Timestamp time.Time // Timestamp of this frame/chunk
Format string // Format identifier (e.g., "h264", "vp8")
IsKeyFrame bool // True if this is a key frame
FrameNum int64 // Frame/chunk sequence number (for ordering in streams)
// StorageRef holds a reference to externalized data (when Data is nil).
// Use IsExternalized() to check, Load() to retrieve data.
StorageRef storage.Reference
}

func (d *VideoData) EnsureLoaded(ctx context.Context, store storage.MediaStorageService) ([]byte, error)

EnsureLoaded ensures the video data is loaded into memory.

func (d *VideoData) Externalize(ctx context.Context, store storage.MediaStorageService, metadata *storage.MediaMetadata) error

Externalize stores the video data to external storage and clears in-memory data.

func (d *VideoData) IsExternalized() bool

IsExternalized returns true if the video data has been externalized to storage.

func (d *VideoData) Load(ctx context.Context, store storage.MediaStorageService) error

Load retrieves externalized video data from storage.

VideoToFramesConfig configures the VideoToFramesStage behavior.

type VideoToFramesConfig struct {
// Mode determines how frames are extracted.
// Default: FrameExtractionInterval.
Mode FrameExtractionMode
// Interval is the time between extracted frames (for FrameExtractionInterval mode).
// Default: 1 second.
Interval time.Duration
// TargetFPS is the target frame rate (for FrameExtractionFPS mode).
// Default: 1.0 (1 frame per second).
TargetFPS float64
// MaxFrames limits the maximum number of frames to extract.
// 0 means unlimited.
// Default: 30.
MaxFrames int
// OutputFormat specifies the output image format.
// Default: "jpeg".
OutputFormat string // "jpeg" or "png"
// OutputQuality specifies JPEG quality (1-100).
// Default: 85.
OutputQuality int
// OutputWidth resizes frames to this width (0 = original).
// Height is calculated to maintain aspect ratio.
// Default: 0 (original).
OutputWidth int
// FFmpegPath is the path to the ffmpeg binary.
// Default: "ffmpeg".
FFmpegPath string
// FFmpegTimeout is the maximum time for FFmpeg execution per video.
// Default: 5 minutes.
FFmpegTimeout time.Duration
// StorageService for loading externalized video data (optional).
StorageService storage.MediaStorageService
}

func DefaultVideoToFramesConfig() VideoToFramesConfig

DefaultVideoToFramesConfig returns sensible defaults for frame extraction.

VideoToFramesStage extracts frames from video StreamElements into individual image StreamElements. This is a Transform stage with fan-out behavior (1 video → N images).

Input: StreamElement with Video Output: Multiple StreamElements with Image, preserving correlation metadata

Non-video elements are passed through unchanged.

type VideoToFramesStage struct {
BaseStage
// contains filtered or unexported fields
}

func NewVideoToFramesStage(config VideoToFramesConfig) *VideoToFramesStage

NewVideoToFramesStage creates a new video-to-frames extraction stage.

func (s *VideoToFramesStage) GetConfig() VideoToFramesConfig

GetConfig returns the stage configuration.

func (s *VideoToFramesStage) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process implements the Stage interface. Extracts frames from videos and emits individual image elements for each frame.

WeightedRouter distributes elements across outputs based on configured weights.

type WeightedRouter struct {
BaseStage
// contains filtered or unexported fields
}

func NewWeightedRouter(name string, weights map[string]float64) *WeightedRouter

NewWeightedRouter creates a router that distributes elements based on weights. Weights are normalized to sum to 1.0. Example: {“primary”: 0.7, “secondary”: 0.3} routes 70% to primary, 30% to secondary.

func (r *WeightedRouter) Process(ctx context.Context, input <-chan StreamElement, output chan<- StreamElement) error

Process distributes elements based on weights.

func (r *WeightedRouter) RegisterOutput(name string, output chan<- StreamElement)

RegisterOutput registers an output channel with a name.

import "github.com/AltairaLabs/PromptKit/runtime/prompt/schema"

Package schema provides embedded PromptPack schema for offline validation.

DefaultSchemaURL is the canonical URL for the PromptPack schema.

const DefaultSchemaURL = "https://promptpack.org/schema/latest/promptpack.schema.json"

SchemaSourceEnvVar is the environment variable to override schema source. Values: “local” (embedded), “remote” (fetch from URL), or a file path.

const SchemaSourceEnvVar = "PROMPTKIT_SCHEMA_SOURCE"

func ExtractSchemaURL(packJSON []byte) string

ExtractSchemaURL extracts the $schema URL from pack JSON data. Returns empty string if not present or invalid.

func GetEmbeddedSchema() string

GetEmbeddedSchema returns the embedded schema as a string.

func GetEmbeddedSchemaVersion() (string, error)

GetEmbeddedSchemaVersion returns the version from the embedded schema.

func GetSchemaLoader(packSchemaURL string) (gojsonschema.JSONLoader, error)

GetSchemaLoader returns a gojsonschema loader for the PromptPack schema. Priority:

  1. If PROMPTKIT_SCHEMA_SOURCE is set to “local”, use embedded schema
  2. If PROMPTKIT_SCHEMA_SOURCE is a file path, load from that file
  3. If PROMPTKIT_SCHEMA_SOURCE is “remote” and packSchemaURL is provided, fetch from that URL
  4. Otherwise, use embedded schema (default for offline support)
import "github.com/AltairaLabs/PromptKit/runtime/providers/all"

Package all provides a convenient way to register all PromptKit providers with a single import. Instead of importing each provider individually:

import (
_ "github.com/AltairaLabs/PromptKit/runtime/providers/claude"
_ "github.com/AltairaLabs/PromptKit/runtime/providers/gemini"
_ "github.com/AltairaLabs/PromptKit/runtime/providers/ollama"
_ "github.com/AltairaLabs/PromptKit/runtime/providers/openai"
)

You can simply import this package:

import _ "github.com/AltairaLabs/PromptKit/runtime/providers/all"

This registers all available providers with the provider registry, making them available for use in your application.

import "github.com/AltairaLabs/PromptKit/runtime/providers/claude"

Package claude provides Anthropic Claude LLM provider integration.

Provider implements the Provider interface for Anthropic Claude

type Provider struct {
providers.BaseProvider
// contains filtered or unexported fields
}

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider

NewProvider creates a new Claude provider

func NewProviderWithCredential(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, cred providers.Credential) *Provider

NewProviderWithCredential creates a new Claude provider with explicit credential.

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates detailed cost breakdown including optional cached tokens

func (*Provider) GetMultimodalCapabilities

Section titled “func (*Provider) GetMultimodalCapabilities”
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns Claude’s multimodal support capabilities

func (p *Provider) Model() string

Model returns the model name/identifier used by this provider.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict sends a predict request to Claude

func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

PredictMultimodal sends a multimodal predict request to Claude

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream sends a streaming multimodal predict request to Claude

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream performs a streaming prediction request to Claude

ToolProvider extends ClaudeProvider with tool support

type ToolProvider struct {
*Provider
}

func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *ToolProvider

NewToolProvider creates a new Claude provider with tool support

func NewToolProviderWithCredential(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, cred providers.Credential) *ToolProvider

NewToolProviderWithCredential creates a Claude tool provider with explicit credential.

func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (providers.ProviderTools, error)

BuildTooling converts tool descriptors to Claude format

func (*ToolProvider) PredictStreamWithTools

Section titled “func (*ToolProvider) PredictStreamWithTools”
func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support

func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools providers.ProviderTools, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a predict request with tool support

import "github.com/AltairaLabs/PromptKit/runtime/providers/gemini"

Package gemini provides Gemini Live API streaming support.

IMPORTANT: Response Modality Limitation

The Gemini Live API does NOT support requesting both TEXT and AUDIO response modalities simultaneously. Attempting to set ResponseModalities to [“TEXT”, “AUDIO”] will result in a WebSocket error:

websocket: close 1007 (invalid payload data): Request contains an invalid argument.

Valid configurations:

  • [“TEXT”] - Text responses only (default)
  • [“AUDIO”] - Audio responses only

If you need both text and audio, you must choose one primary modality. For audio responses with transcription, the API may provide output transcription separately via the OutputTranscription field.

const (
// DefaultChunkDuration is 100ms of audio
DefaultChunkDuration = 100 // milliseconds
// DefaultChunkSize is the number of bytes for 100ms at 16kHz 16-bit mono
// 16000 Hz * 0.1 sec * 2 bytes/sample = 3200 bytes
DefaultChunkSize = (geminiSampleRate * DefaultChunkDuration / 1000) * bytesPerSample
)

Embedding model constants

const (
// DefaultGeminiEmbeddingModel is the default model for embeddings
DefaultGeminiEmbeddingModel = "text-embedding-004"
// EmbeddingModel004 is the current recommended model
EmbeddingModel004 = "text-embedding-004"
// EmbeddingModel001 is the legacy embedding model
EmbeddingModel001 = "embedding-001"
)

Common error messages

const (
ErrNotConnected = "not connected"
ErrManagerClosed = "manager is closed"
)

Common error messages

const (
ErrSessionClosed = "session is closed"
)

WebSocket limits

const (
// MaxMessageSize is the maximum allowed WebSocket message size (16MB).
// This protects against memory exhaustion from malformed or malicious responses.
// The limit is generous to accommodate base64-encoded audio/video content.
MaxMessageSize = 16 * 1024 * 1024
)

var (
// ErrInvalidSampleRate indicates an unsupported sample rate
ErrInvalidSampleRate = errors.New("invalid sample rate: must be 16000 Hz")
// ErrInvalidChannels indicates an unsupported channel count
ErrInvalidChannels = errors.New("invalid channels: must be mono (1 channel)")
// ErrInvalidBitDepth indicates an unsupported bit depth
ErrInvalidBitDepth = errors.New("invalid bit depth: must be 16 bits")
// ErrInvalidChunkSize indicates chunk size is not aligned
ErrInvalidChunkSize = errors.New("invalid chunk size: must be multiple of sample size")
// ErrEmptyAudioData indicates no audio data provided
ErrEmptyAudioData = errors.New("empty audio data")
)

Common errors for Gemini streaming

var (
// ErrInvalidAudioFormat indicates audio format doesn't meet Gemini requirements
ErrInvalidAudioFormat = errors.New("invalid audio format")
// ErrRateLimitExceeded indicates too many requests
ErrRateLimitExceeded = errors.New("rate limit exceeded")
// ErrAuthenticationFailed indicates invalid API key
ErrAuthenticationFailed = errors.New("authentication failed")
// ErrServiceUnavailable indicates temporary service issue
ErrServiceUnavailable = errors.New("service unavailable")
// ErrPolicyViolation indicates content policy violation
ErrPolicyViolation = errors.New("policy violation")
// ErrInvalidRequest indicates malformed request
ErrInvalidRequest = errors.New("invalid request")
)

func ClassifyError(apiErr *APIError) error

ClassifyError converts an API error code to a standard error

GeminiAPIError represents an error from the Gemini API

type APIError struct {
Code int `json:"code"`
Message string `json:"message"`
Status string `json:"status"`
}

func (e *APIError) Error() string

Error implements the error interface

func (e *APIError) IsAuthError() bool

IsAuthError returns true if the error is authentication-related

func (e *APIError) IsPolicyViolation() bool

IsPolicyViolation returns true if the error is a content policy violation

func (e *APIError) IsRetryable() bool

IsRetryable returns true if the error can be retried

AudioEncoder handles PCM Linear16 audio encoding for Gemini Live API

type AudioEncoder struct {
// contains filtered or unexported fields
}

func NewAudioEncoder() *AudioEncoder

NewAudioEncoder creates a new audio encoder with Gemini Live API specifications

func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)

NewAudioEncoderWithChunkSize creates an encoder with custom chunk size

func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)

AssembleChunks reassembles MediaChunks back into continuous PCM data.

func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte

ConvertInt16ToPCM converts []int16 samples to PCM bytes (little-endian)

func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)

ConvertPCMToInt16 converts PCM bytes to []int16 samples (little-endian)

func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)

CreateChunks splits PCM audio data into appropriately sized chunks

func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)

DecodePCM decodes base64-encoded audio data back to raw PCM

func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)

EncodePCM encodes raw PCM audio data to base64 for WebSocket transmission

func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte

GenerateSineWave generates PCM audio for a sine wave (useful for testing)

func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64

GetChunkDurationMs calculates the duration of a chunk in milliseconds

func (e *AudioEncoder) GetChunkSize() int

GetChunkSize returns the configured chunk size in bytes

func (e *AudioEncoder) GetSampleRate() int

GetSampleRate returns the configured sample rate

func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)

ReadChunks reads audio from an io.Reader and creates chunks on-the-fly

func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error

ValidateConfig validates audio configuration against Gemini requirements

EmbeddingOption configures the EmbeddingProvider.

type EmbeddingOption func(*EmbeddingProvider)

func WithGeminiEmbeddingAPIKey(key string) EmbeddingOption

WithGeminiEmbeddingAPIKey sets the API key explicitly.

func WithGeminiEmbeddingBaseURL(url string) EmbeddingOption

WithGeminiEmbeddingBaseURL sets a custom base URL.

func WithGeminiEmbeddingHTTPClient(client *http.Client) EmbeddingOption

WithGeminiEmbeddingHTTPClient sets a custom HTTP client.

func WithGeminiEmbeddingModel(model string) EmbeddingOption

WithGeminiEmbeddingModel sets the embedding model.

EmbeddingProvider implements embedding generation via Gemini API.

type EmbeddingProvider struct {
*providers.BaseEmbeddingProvider
}

func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)

NewEmbeddingProvider creates a Gemini embedding provider.

func (p *EmbeddingProvider) Embed(ctx context.Context, req providers.EmbeddingRequest) (providers.EmbeddingResponse, error)

Embed generates embeddings for the given texts.

func (p *EmbeddingProvider) EstimateCost(tokens int) float64

EstimateCost estimates the cost for embedding the given number of tokens. Note: Gemini embeddings are currently free tier.

ErrorResponse wraps a GeminiAPIError in a message format

type ErrorResponse struct {
Error *APIError `json:"error"`
}

FunctionCall represents a function call

type FunctionCall struct {
Name string `json:"name,omitempty"`
ID string `json:"id,omitempty"`
Args map[string]interface{} `json:"args,omitempty"`
}

InlineData represents inline media data

type InlineData struct {
MimeType string `json:"mimeType,omitempty"` // camelCase!
Data string `json:"data,omitempty"` // Base64 encoded
}

ModelTurn represents a model response turn

type ModelTurn struct {
Parts []Part `json:"parts,omitempty"`
}

Part represents a content part (text or inline data)

type Part struct {
Text string `json:"text,omitempty"`
InlineData *InlineData `json:"inlineData,omitempty"` // camelCase!
}

PromptFeedback contains safety ratings and block reason

type PromptFeedback struct {
SafetyRatings []SafetyRating `json:"safetyRatings,omitempty"`
BlockReason string `json:"blockReason,omitempty"`
}

func (f *PromptFeedback) GetBlockReason() string

GetBlockReason returns a human-readable block reason

func (f *PromptFeedback) IsBlocked() bool

IsBlocked returns true if content was blocked by safety filters

Provider implements the Provider interface for Google Gemini

type Provider struct {
providers.BaseProvider
BaseURL string
ApiKey string
Defaults providers.ProviderDefaults
// contains filtered or unexported fields
}

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider

NewProvider creates a new Gemini provider

func NewProviderWithCredential(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, cred providers.Credential) *Provider

NewProviderWithCredential creates a new Gemini provider with explicit credential.

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates detailed cost breakdown including optional cached tokens

func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)

CreateStreamSession creates a new bidirectional streaming session with Gemini Live API

Response Modalities: By default, the session is configured to return TEXT responses only. To request audio responses, pass “response_modalities” in the request metadata:

req := providers.StreamInputRequest{
Config: config,
Metadata: map[string]interface{}{
"response_modalities": []string{"AUDIO"}, // Audio only (TEXT+AUDIO not supported)
},
}

Audio responses will be delivered in the StreamChunk.Metadata[“audio_data”] field as base64-encoded PCM.

func (*Provider) GetMultimodalCapabilities

Section titled “func (*Provider) GetMultimodalCapabilities”
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns Gemini’s multimodal support capabilities

func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities returns detailed information about Gemini’s streaming support

func (p *Provider) Model() string

Model returns the model name/identifier used by this provider.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict sends a predict request to Gemini

func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

PredictMultimodal performs a predict request with multimodal content. This validates multimodal content against Gemini’s capabilities before making the request. For callers that don’t need validation, use Predict directly.

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream performs a streaming predict request with multimodal content. This validates multimodal content against Gemini’s capabilities before making the request. For callers that don’t need validation, use PredictStream directly.

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream performs a streaming prediction request to Gemini

func (p *Provider) SupportsStreamInput() []string

SupportsStreamInput returns the media types supported for streaming input

RecoveryStrategy defines how to handle different error types

type RecoveryStrategy int

const (
// RecoveryRetry indicates the operation should be retried
RecoveryRetry RecoveryStrategy = iota
// RecoveryFailFast indicates the operation should fail immediately
RecoveryFailFast
// RecoveryGracefulDegradation indicates fallback to a simpler mode
RecoveryGracefulDegradation
// RecoveryWaitAndRetry indicates retry after a delay
RecoveryWaitAndRetry
)

func DetermineRecoveryStrategy(err error) RecoveryStrategy

DetermineRecoveryStrategy determines how to handle an error

SafetyRating represents content safety assessment

type SafetyRating struct {
Category string `json:"category"`
Probability string `json:"probability"`
}

ServerContent represents the server content (BidiGenerateContentServerContent)

type ServerContent struct {
ModelTurn *ModelTurn `json:"modelTurn,omitempty"`
TurnComplete bool `json:"turnComplete,omitempty"`
GenerationComplete bool `json:"generationComplete,omitempty"`
Interrupted bool `json:"interrupted,omitempty"`
InputTranscription *Transcription `json:"inputTranscription,omitempty"` // User speech transcription
OutputTranscription *Transcription `json:"outputTranscription,omitempty"` // Model speech transcription
}

ServerMessage represents a message from the Gemini server (BidiGenerateContentServerMessage)

type ServerMessage struct {
SetupComplete *SetupComplete `json:"setupComplete,omitempty"`
ServerContent *ServerContent `json:"serverContent,omitempty"`
ToolCall *ToolCallMsg `json:"toolCall,omitempty"`
UsageMetadata *UsageMetadata `json:"usageMetadata,omitempty"`
}

func (s *ServerMessage) UnmarshalJSON(data []byte) error

UnmarshalJSON unmarshals ServerMessage from JSON with custom handling.

SetupComplete indicates setup is complete (empty object per docs)

type SetupComplete struct{}

StreamSession implements StreamInputSession for Gemini Live API with automatic reconnection on unexpected connection drops.

type StreamSession struct {
// contains filtered or unexported fields
}

func NewStreamSession(ctx context.Context, wsURL, apiKey string, config *StreamSessionConfig) (*StreamSession, error)

NewStreamSession creates a new streaming session

func (s *StreamSession) Close() error

Close closes the session

func (s *StreamSession) CompleteTurn(ctx context.Context) error

CompleteTurn signals that the current turn is complete

func (s *StreamSession) Done() <-chan struct{}

Done returns a channel that’s closed when the session ends

func (s *StreamSession) EndInput()

EndInput implements the EndInputter interface expected by DuplexProviderStage. It signals that the user’s input turn is complete and the model should respond.

Behavior depends on VAD configuration: - If VAD is disabled: sends activityEnd signal for explicit turn control - If VAD is enabled: sends silence frames to trigger VAD end-of-speech detection

func (s *StreamSession) Error() error

Err returns the error that caused the session to close

func (s *StreamSession) Response() <-chan providers.StreamChunk

Response returns the channel for receiving responses

func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk sends a media chunk to the server. When VAD is disabled (manual turn control), automatically sends activityStart before the first audio chunk of a turn.

func (s *StreamSession) SendSystemContext(ctx context.Context, text string) error

SendSystemContext sends a text message as context without completing the turn. Use this for system prompts that should provide context but not trigger a response. The audio/text that follows will be processed with this context in mind.

func (s *StreamSession) SendText(ctx context.Context, text string) error

SendText sends a text message to the server and marks the turn as complete

func (s *StreamSession) SendToolResponse(ctx context.Context, toolCallID, result string) error

SendToolResponse sends a single tool execution result back to Gemini. The toolCallID must match the ID from the FunctionCall. The result should be a JSON-serializable string (typically JSON).

func (s *StreamSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error

SendToolResponses sends multiple tool execution results back to Gemini. This is used when the model makes parallel tool calls. After receiving the tool responses, Gemini will continue generating.

StreamSessionConfig configures a streaming session

type StreamSessionConfig struct {
Model string // Model name (will be prefixed with "models/" automatically)
ResponseModalities []string // "TEXT" or "AUDIO" - NOT both! See package doc for details.
SystemInstruction string // System prompt/instruction for the model
InputCostPer1K float64 // Cost per 1K input tokens (for USD calculation)
OutputCostPer1K float64 // Cost per 1K output tokens (for USD calculation)
// VAD configures Voice Activity Detection settings.
// If nil, Gemini uses its default VAD settings.
VAD *VADConfig
// Tools defines the function declarations available to the model.
// When tools are configured, the model will return structured tool calls
// instead of speaking them as text. Tool definitions should match the
// OpenAPI schema subset supported by Gemini.
Tools []ToolDefinition
// AutoReconnect enables automatic reconnection on unexpected connection drops.
// When enabled, the session will attempt to reconnect and continue receiving
// responses. Note: conversation context may be lost on reconnection.
AutoReconnect bool
MaxReconnectTries int // Maximum reconnection attempts (default: 3)
}

ToolCallMsg represents a tool call from the model

type ToolCallMsg struct {
FunctionCalls []FunctionCall `json:"functionCalls,omitempty"`
}

ToolDefinition represents a function/tool that the model can call. This follows the Gemini function calling schema.

type ToolDefinition struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"` // JSON Schema for parameters
}

ToolProvider extends GeminiProvider with tool support

type ToolProvider struct {
*Provider
// contains filtered or unexported fields
}

func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *ToolProvider

NewToolProvider creates a new Gemini provider with tool support

func NewToolProviderWithCredential(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, cred providers.Credential) *ToolProvider

NewToolProviderWithCredential creates a Gemini tool provider with explicit credential.

func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (providers.ProviderTools, error)

BuildTooling converts tool descriptors to Gemini format

func (p *ToolProvider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)

CreateStreamSession forwards to the embedded Provider’s CreateStreamSession. This enables duplex streaming with tool support.

func (*ToolProvider) GetStreamingCapabilities

Section titled “func (*ToolProvider) GetStreamingCapabilities”
func (p *ToolProvider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities forwards to the embedded Provider’s GetStreamingCapabilities.

func (*ToolProvider) PredictStreamWithTools

Section titled “func (*ToolProvider) PredictStreamWithTools”
func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools any, toolChoice string) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support

func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools providers.ProviderTools, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a predict request with tool support

func (p *ToolProvider) SupportsStreamInput() []string

SupportsStreamInput forwards to the embedded Provider’s SupportsStreamInput.

Transcription represents audio transcription (BidiGenerateContentTranscription)

type Transcription struct {
Text string `json:"text,omitempty"`
}

UsageMetadata contains token usage information

type UsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount,omitempty"`
ResponseTokenCount int `json:"responseTokenCount,omitempty"`
TotalTokenCount int `json:"totalTokenCount,omitempty"`
}

VADConfig configures Voice Activity Detection settings for Gemini Live API. These settings control when Gemini detects the end of speech and starts responding.

type VADConfig struct {
// Disabled turns off automatic VAD (manual turn control only)
Disabled bool
// StartOfSpeechSensitivity controls how sensitive the VAD is to detecting speech start.
// Valid values: "UNSPECIFIED", "LOW", "MEDIUM", "HIGH"
StartOfSpeechSensitivity string
// EndOfSpeechSensitivity controls how sensitive the VAD is to detecting silence.
// Valid values: "UNSPECIFIED", "LOW", "MEDIUM", "HIGH"
// Lower sensitivity = longer silence needed to trigger end of speech
EndOfSpeechSensitivity string
// PrefixPaddingMs is extra padding in milliseconds before speech detection
PrefixPaddingMs int
// SilenceThresholdMs is the duration of silence (in ms) to trigger end of speech.
// This maps to Gemini's "suffixPaddingMs" parameter.
// Default is typically ~500ms. Increase for TTS audio with natural pauses.
SilenceThresholdMs int
}

WebSocketManager manages a WebSocket connection with reconnection logic.

type WebSocketManager struct {
// contains filtered or unexported fields
}

func NewWebSocketManager(url, apiKey string) *WebSocketManager

NewWebSocketManager creates a new WebSocket manager

func (wm *WebSocketManager) Close() error

Close gracefully closes the WebSocket connection

func (wm *WebSocketManager) Connect(ctx context.Context) error

Connect establishes a WebSocket connection to the Gemini Live API

func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error

ConnectWithRetry connects with exponential backoff retry logic

func (wm *WebSocketManager) IsConnected() bool

IsConnected returns true if the WebSocket is connected

func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error

Receive reads a message from the WebSocket

func (wm *WebSocketManager) Send(msg interface{}) error

Send sends a message through the WebSocket

func (wm *WebSocketManager) SendPing() error

SendPing sends a WebSocket ping to keep the connection alive

func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)

StartHeartbeat starts a goroutine that sends periodic pings

import "github.com/AltairaLabs/PromptKit/runtime/providers/imagen"

Package imagen provides Google Imagen image generation provider integration.

ImagenConfig holds configuration for creating an Imagen provider

type Config struct {
ID string
Model string
BaseURL string
ApiKey string
ProjectID string
Location string
IncludeRawOutput bool
Defaults providers.ProviderDefaults
}

Provider implements the Provider interface for Google’s Imagen image generation

type Provider struct {
providers.BaseProvider
BaseURL string
ApiKey string
ProjectID string
Location string
Defaults providers.ProviderDefaults
HTTPClient *http.Client
// contains filtered or unexported fields
}

func NewProvider(config Config) *Provider

NewProvider creates a new Imagen provider

func (p *Provider) CalculateCost(inputTokens, outputTokens, cachedTokens int) types.CostInfo

CalculateCost calculates cost breakdown (simplified for Imagen)

func (p *Provider) Close() error

Close cleans up resources

func (p *Provider) Model() string

Model returns the model name/identifier used by this provider.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict generates images based on the last user message

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream is not supported for image generation

func (p *Provider) SupportsStreaming() bool

SupportsStreaming returns false for Imagen

import "github.com/AltairaLabs/PromptKit/runtime/providers/mock"

Package mock provides mock provider implementation for testing and development.

const (
// DefaultMockStreamingResponse is the default response text for auto-respond mode.
DefaultMockStreamingResponse = "Mock streaming response"
)

AudioURL represents audio content in a mock response.

type AudioURL struct {
URL string `yaml:"url"` // URL to the audio file (can be mock://, http://, https://, data:, or file path)
}

Config represents the structure of a mock configuration file. This allows scenario-specific and turn-specific responses to be defined.

type Config struct {
// Default response if no specific match is found
DefaultResponse string `yaml:"defaultResponse"`
// Scenario-specific responses keyed by scenario ID
Scenarios map[string]ScenarioConfig `yaml:"scenarios,omitempty"`
// Selfplay persona responses keyed by persona ID
// Used when generating user messages in selfplay mode
Selfplay map[string]ScenarioConfig `yaml:"selfplay,omitempty"`
}

ContentPart represents a single content part in a multimodal mock response. This mirrors the structure of types.ContentPart but with YAML-friendly field names.

type ContentPart struct {
Type string `yaml:"type"` // "text", "image", "audio", "video", or "document"
Text string `yaml:"text,omitempty"` // Text content (for type="text")
ImageURL *ImageURL `yaml:"image_url,omitempty"` // Image URL (for type="image")
AudioURL *AudioURL `yaml:"audio_url,omitempty"` // Audio URL (for type="audio")
VideoURL *VideoURL `yaml:"video_url,omitempty"` // Video URL (for type="video")
DocumentURL *DocumentURL `yaml:"document_url,omitempty"` // Document URL (for type="document")
Metadata map[string]interface{} `yaml:"metadata,omitempty"` // Additional metadata
}

func (m *ContentPart) ToContentPart() *types.ContentPart

ToContentPart converts a ContentPart to types.ContentPart.

DocumentURL represents document content in a mock response.

type DocumentURL struct {
URL string `yaml:"url"` // URL to the document file (can be mock://, http://, https://, data:, or file path)
}

FileMockRepository loads mock responses from a YAML configuration file. This is the default implementation for file-based mock configurations.

type FileMockRepository struct {
// contains filtered or unexported fields
}

func NewFileMockRepository(configPath string) (*FileMockRepository, error)

NewFileMockRepository creates a repository that loads mock responses from a YAML file. The file should follow the Config structure with scenarios and turn-specific responses.

func (r *FileMockRepository) GetResponse(ctx context.Context, params ResponseParams) (string, error)

GetResponse retrieves a mock response based on the provided parameters. It follows this priority order: 1. Scenario + Turn specific response 2. Scenario default response 3. Global default response 4. Generic fallback message

func (r *FileMockRepository) GetTurn(ctx context.Context, params ResponseParams) (*Turn, error)

GetTurn retrieves a structured mock turn response that may include tool calls. This method supports both backward-compatible string responses and new structured Turn responses.

For selfplay user turns (when ArenaRole == “self_play_user”), it looks up responses from the selfplay section using PersonaID. For regular turns, it uses scenario responses.

ImageURL represents image content in a mock response.

type ImageURL struct {
URL string `yaml:"url"` // URL to the image (can be mock://, http://, https://, data:, or file path)
Detail *string `yaml:"detail,omitempty"` // Detail level: "low", "high", "auto"
}

InMemoryMockRepository stores mock responses in memory. This is useful for testing and programmatic configuration without files.

type InMemoryMockRepository struct {
// contains filtered or unexported fields
}

func NewInMemoryMockRepository(defaultResponse string) *InMemoryMockRepository

NewInMemoryMockRepository creates an in-memory repository with a default response.

func (*InMemoryMockRepository) GetResponse

Section titled “func (*InMemoryMockRepository) GetResponse”
func (r *InMemoryMockRepository) GetResponse(ctx context.Context, params ResponseParams) (string, error)

GetResponse retrieves a mock response based on the provided parameters.

func (r *InMemoryMockRepository) GetTurn(ctx context.Context, params ResponseParams) (*Turn, error)

GetTurn retrieves a structured mock turn response. InMemoryMockRepository currently only supports simple text responses.

func (*InMemoryMockRepository) SetResponse

Section titled “func (*InMemoryMockRepository) SetResponse”
func (r *InMemoryMockRepository) SetResponse(scenarioID string, turnNumber int, response string)

SetResponse sets a mock response for a specific scenario and turn. Use turnNumber = 0 for scenario default, or -1 for global default.

MockStreamSession implements providers.StreamInputSession for testing duplex scenarios.

type MockStreamSession struct {
// contains filtered or unexported fields
}

func NewMockStreamSession() *MockStreamSession

NewMockStreamSession creates a new mock stream session.

func (m *MockStreamSession) Close() error

Close implements StreamInputSession.Close.

func (m *MockStreamSession) Done() <-chan struct{}

Done implements StreamInputSession.Done.

func (m *MockStreamSession) EmitChunk(chunk *providers.StreamChunk)

EmitChunk sends a response chunk (for testing).

func (m *MockStreamSession) EndInput()

EndInput signals the end of input for the current turn. For mock sessions with auto-respond enabled, this triggers the response.

func (m *MockStreamSession) Error() error

Error implements StreamInputSession.Error.

func (m *MockStreamSession) GetChunks() []*types.MediaChunk

GetChunks returns all received media chunks (for testing).

func (m *MockStreamSession) GetTexts() []string

GetTexts returns all received text messages (for testing).

func (m *MockStreamSession) Response() <-chan providers.StreamChunk

Response implements StreamInputSession.Response.

func (m *MockStreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk implements StreamInputSession.SendChunk.

func (*MockStreamSession) SendSystemContext

Section titled “func (*MockStreamSession) SendSystemContext”
func (m *MockStreamSession) SendSystemContext(ctx context.Context, text string) error

SendSystemContext implements StreamInputSession.SendSystemContext. Unlike SendText, this does NOT trigger a response from the model.

func (m *MockStreamSession) SendText(ctx context.Context, text string) error

SendText implements StreamInputSession.SendText.

func (m *MockStreamSession) WithAutoRespond(text string) *MockStreamSession

WithAutoRespond configures the session to automatically respond to inputs. The session stays open to handle multiple turns - call Close() when done.

func (*MockStreamSession) WithCloseAfterResponse

Section titled “func (*MockStreamSession) WithCloseAfterResponse”
func (m *MockStreamSession) WithCloseAfterResponse(closeAfter bool) *MockStreamSession

WithCloseAfterResponse configures whether to close the response channel after auto-responding.

func (*MockStreamSession) WithCloseAfterTurns

Section titled “func (*MockStreamSession) WithCloseAfterTurns”
func (m *MockStreamSession) WithCloseAfterTurns(turns int, noResponse ...bool) *MockStreamSession

WithCloseAfterTurns configures the session to close unexpectedly after N turns. This simulates Gemini dropping the connection mid-conversation. If noResponse is true, the session closes WITHOUT sending the final response (mimics Gemini closing after interrupted turnComplete).

func (m *MockStreamSession) WithError(err error) *MockStreamSession

WithError sets the error returned by Error().

func (*MockStreamSession) WithInterruptOnTurn

Section titled “func (*MockStreamSession) WithInterruptOnTurn”
func (m *MockStreamSession) WithInterruptOnTurn(turnNumber int) *MockStreamSession

WithInterruptOnTurn configures the session to simulate an interruption on a specific turn. This mimics Gemini detecting user speech while the model is responding. The turn is 1-indexed (first turn = 1).

func (*MockStreamSession) WithResponseChunks

Section titled “func (*MockStreamSession) WithResponseChunks”
func (m *MockStreamSession) WithResponseChunks(chunks []providers.StreamChunk) *MockStreamSession

WithResponseChunks configures custom response chunks to emit.

func (*MockStreamSession) WithSendChunkError

Section titled “func (*MockStreamSession) WithSendChunkError”
func (m *MockStreamSession) WithSendChunkError(err error) *MockStreamSession

WithSendChunkError configures SendChunk to return an error.

func (*MockStreamSession) WithSendTextError

Section titled “func (*MockStreamSession) WithSendTextError”
func (m *MockStreamSession) WithSendTextError(err error) *MockStreamSession

WithSendTextError configures SendText to return an error.

Provider is a provider implementation for testing and development. It returns mock responses without making any API calls, using a repository pattern to source responses from various backends (files, memory, databases).

Provider is designed to be reusable across different contexts:

  • Arena testing: scenario and turn-specific responses
  • SDK examples: simple deterministic responses
  • Unit tests: programmatic response configuration
type Provider struct {
// contains filtered or unexported fields
}

func NewProvider(id, model string, includeRawOutput bool) *Provider

NewProvider creates a new mock provider with default in-memory responses. This constructor maintains backward compatibility with existing code.

func NewProviderWithRepository(id, model string, includeRawOutput bool, repo ResponseRepository) *Provider

NewProviderWithRepository creates a mock provider with a custom response repository. This allows for advanced scenarios like file-based or database-backed mock responses.

func (m *Provider) CalculateCost(inputTokens, outputTokens, cachedTokens int) types.CostInfo

CalculateCost calculates cost breakdown for given token counts.

func (m *Provider) Close() error

Close is a no-op for the mock provider. NOSONAR: Intentionally empty - mock provider has no resources to clean up

func (m *Provider) ID() string

ID returns the provider ID.

func (m *Provider) Model() string

Model returns the model name.

func (m *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict returns a mock response using the configured repository.

func (m *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream returns a mock streaming response using the configured repository.

func (m *Provider) ShouldIncludeRawOutput() bool

ShouldIncludeRawOutput returns whether raw API responses should be included.

func (m *Provider) SupportsStreaming() bool

SupportsStreaming indicates whether the provider supports streaming.

ResponseParams contains parameters for looking up mock responses. Different implementations may use different subsets of these fields.

type ResponseParams struct {
ScenarioID string // Optional: ID of the scenario being executed
TurnNumber int // Optional: Turn number in a multi-turn conversation
ProviderID string // Optional: ID of the provider being mocked
ModelName string // Optional: Model name being mocked
PersonaID string // Optional: ID of the persona for selfplay user responses
ArenaRole string // Optional: Role in arena (e.g., "self_play_user")
}

ResponseRepository provides an interface for retrieving mock responses. This abstraction allows mock data to come from various sources (files, databases, etc.) and makes MockProvider reusable across different contexts (Arena, SDK examples, unit tests).

type ResponseRepository interface {
// GetResponse retrieves a mock response for the given context.
// Parameters can include scenario ID, turn number, provider ID, etc.
// Returns the response text and any error encountered.
GetResponse(ctx context.Context, params ResponseParams) (string, error)
// GetTurn retrieves a mock turn response that may include tool calls.
// This extends GetResponse to support structured turn data with tool call simulation.
GetTurn(ctx context.Context, params ResponseParams) (*Turn, error)
}

ScenarioConfig defines mock responses for a specific scenario.

type ScenarioConfig struct {
// Default response for this scenario (overrides global default)
DefaultResponse string `yaml:"defaultResponse,omitempty"`
// Turn-specific responses keyed by turn number (1-indexed)
// Supports both simple string responses (backward compatibility) and structured Turn responses
Turns map[int]interface{} `yaml:"turns,omitempty"`
}

StreamingProvider extends Provider with StreamInputSupport for duplex testing.

type StreamingProvider struct {
*Provider
// contains filtered or unexported fields
}

func NewStreamingProvider(id, model string, includeRawOutput bool) *StreamingProvider

NewStreamingProvider creates a mock provider with duplex streaming support.

func NewStreamingProviderWithRepository(id, model string, includeRawOutput bool, repo ResponseRepository) *StreamingProvider

NewStreamingProviderWithRepository creates a mock streaming provider with a custom repository.

func (*StreamingProvider) CreateStreamSession

Section titled “func (*StreamingProvider) CreateStreamSession”
func (p *StreamingProvider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)

CreateStreamSession implements StreamInputSupport.CreateStreamSession.

func (p *StreamingProvider) GetSession() *MockStreamSession

GetSession returns the first/most recent mock session for testing access to sent chunks/texts. For multiple sessions, use GetSessions() instead.

func (p *StreamingProvider) GetSessions() []*MockStreamSession

GetSessions returns all created sessions for testing.

func (*StreamingProvider) GetStreamingCapabilities

Section titled “func (*StreamingProvider) GetStreamingCapabilities”
func (p *StreamingProvider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities implements StreamInputSupport.GetStreamingCapabilities.

func (*StreamingProvider) SupportsStreamInput

Section titled “func (*StreamingProvider) SupportsStreamInput”
func (p *StreamingProvider) SupportsStreamInput() []string

SupportsStreamInput implements StreamInputSupport.SupportsStreamInput.

func (p *StreamingProvider) WithAutoRespond(responseText string) *StreamingProvider

WithAutoRespond configures the provider to create sessions that auto-respond to inputs.

func (*StreamingProvider) WithCloseAfterTurns

Section titled “func (*StreamingProvider) WithCloseAfterTurns”
func (p *StreamingProvider) WithCloseAfterTurns(turns int, noResponse ...bool) *StreamingProvider

WithCloseAfterTurns configures the provider to create sessions that close unexpectedly after N turns. This simulates Gemini dropping the connection. If noResponse is true, the session closes WITHOUT sending the final response.

func (*StreamingProvider) WithCreateSessionError

Section titled “func (*StreamingProvider) WithCreateSessionError”
func (p *StreamingProvider) WithCreateSessionError(err error) *StreamingProvider

WithCreateSessionError configures CreateStreamSession to return an error.

func (*StreamingProvider) WithInterruptOnTurn

Section titled “func (*StreamingProvider) WithInterruptOnTurn”
func (p *StreamingProvider) WithInterruptOnTurn(turnNumber int) *StreamingProvider

WithInterruptOnTurn configures the provider to create sessions that simulate an interruption on a specific turn. This mimics Gemini detecting user speech while the model is responding.

ToolCall represents a simulated tool call from the LLM.

type ToolCall struct {
Name string `yaml:"name"` // Name of the tool to call
Arguments map[string]interface{} `yaml:"arguments"` // Arguments to pass to the tool
}

ToolProvider extends MockProvider to support tool/function calling and duplex streaming. It implements the ToolSupport interface to enable tool call simulation while maintaining compatibility with the existing MockProvider API. By embedding StreamingProvider, it also supports StreamInputSupport for duplex scenarios.

type ToolProvider struct {
*StreamingProvider
}

func NewToolProvider(id, model string, includeRawOutput bool, additionalConfig map[string]any) *ToolProvider

NewToolProvider creates a new mock provider with tool support and duplex streaming. This uses default in-memory responses for backward compatibility.

func NewToolProviderWithRepository(id, model string, includeRawOutput bool, repo ResponseRepository) *ToolProvider

NewToolProviderWithRepository creates a mock provider with tool support and duplex streaming using a custom response repository for advanced scenarios.

func (m *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (providers.ProviderTools, error)

BuildTooling implements the ToolSupport interface. For mock providers, we just return the tools as-is since we don’t need to transform them into a provider-specific format.

func (*ToolProvider) PredictStreamWithTools

Section titled “func (*ToolProvider) PredictStreamWithTools”
func (m *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support. For mock providers, this delegates to PredictWithTools and wraps the response in chunks.

func (m *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools providers.ProviderTools, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools implements the ToolSupport interface. This method handles the initial predict request with tools available, potentially returning tool calls based on the mock configuration.

Turn represents a structured mock response that may include tool calls and multimodal content. This extends simple text responses to support tool call simulation and multimodal content parts.

type Turn struct {
Type string `yaml:"type"` // "text", "tool_calls", or "multimodal"
Content string `yaml:"content,omitempty"` // Text content for the response
Parts []ContentPart `yaml:"parts,omitempty"` // Multimodal content parts (text, image, audio, video)
ToolCalls []ToolCall `yaml:"tool_calls,omitempty"` // Tool calls to simulate
}

func (t *Turn) ToContentParts() []types.ContentPart

ToContentParts converts Turn to a slice of types.ContentPart. This handles both legacy text-only responses and new multimodal responses.

VideoURL represents video content in a mock response.

type VideoURL struct {
URL string `yaml:"url"` // URL to the video file (can be mock://, http://, https://, data:, or file path)
}
import "github.com/AltairaLabs/PromptKit/runtime/providers/ollama"

Package ollama provides Ollama LLM provider integration for local development.

Provider implements the Provider interface for Ollama

type Provider struct {
providers.BaseProvider
// contains filtered or unexported fields
}

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]any) *Provider

NewProvider creates a new Ollama provider

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates cost breakdown - Ollama is free (local inference)

func (*Provider) GetMultimodalCapabilities

Section titled “func (*Provider) GetMultimodalCapabilities”
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns Ollama’s multimodal capabilities

func (p *Provider) Model() string

Model returns the model name/identifier used by this provider.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict sends a predict request to Ollama

func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

PredictMultimodal performs a predict request with multimodal content

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream performs a streaming predict request with multimodal content

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream streams a predict response from Ollama

ToolProvider extends Provider with tool support

type ToolProvider struct {
*Provider
}

func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]any) *ToolProvider

NewToolProvider creates a new Ollama provider with tool support

func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (any, error)

BuildTooling converts tool descriptors to Ollama format

func (*ToolProvider) PredictMultimodalWithTools

Section titled “func (*ToolProvider) PredictMultimodalWithTools”
func (p *ToolProvider) PredictMultimodalWithTools(ctx context.Context, req providers.PredictionRequest, tools any, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictMultimodalWithTools implements providers.MultimodalToolSupport interface for ToolProvider This allows combining multimodal content (images) with tool calls in a single request

func (*ToolProvider) PredictStreamWithTools

Section titled “func (*ToolProvider) PredictStreamWithTools”
func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools any, toolChoice string) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support

func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools any, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a prediction request with tool support

import "github.com/AltairaLabs/PromptKit/runtime/providers/openai"

Package openai provides OpenAI LLM provider integration.

Package openai provides OpenAI LLM provider integration.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Embedding model constants

const (
// DefaultEmbeddingModel is the default model for embeddings
DefaultEmbeddingModel = "text-embedding-3-small"
// EmbeddingModelAda002 is the legacy ada-002 model
EmbeddingModelAda002 = "text-embedding-ada-002"
// EmbeddingModel3Small is the newer small model with better performance
EmbeddingModel3Small = "text-embedding-3-small"
// EmbeddingModel3Large is the large model with highest quality
EmbeddingModel3Large = "text-embedding-3-large"
)

API mode constants

const (
APIModeResponses = "responses" // New Responses API (v1/responses)
APIModeCompletions = "completions" // Legacy Chat Completions API (v1/chat/completions)
)

Realtime API constants

const (
// RealtimeAPIEndpoint is the base WebSocket endpoint for OpenAI Realtime API.
RealtimeAPIEndpoint = "wss://api.openai.com/v1/realtime"
// RealtimeBetaHeader is required for the Realtime API.
RealtimeBetaHeader = "realtime=v1"
// Default audio configuration for OpenAI Realtime API.
// OpenAI Realtime uses 24kHz 16-bit PCM mono audio.
DefaultRealtimeSampleRate = 24000
DefaultRealtimeChannels = 1
DefaultRealtimeBitDepth = 16
)

func ParseServerEvent(data []byte) (interface{}, error)

ParseServerEvent parses a raw JSON message into the appropriate event type.

func RealtimeStreamingCapabilities() providers.StreamingCapabilities

RealtimeStreamingCapabilities returns the streaming capabilities for OpenAI Realtime API.

APIMode represents the OpenAI API mode to use

type APIMode string

ClientEvent is the base structure for all client events.

type ClientEvent struct {
EventID string `json:"event_id,omitempty"`
Type string `json:"type"`
}

ConversationContent represents content within a conversation item.

type ConversationContent struct {
Type string `json:"type"` // "input_text", "input_audio", "text", "audio"
Text string `json:"text,omitempty"`
Audio string `json:"audio,omitempty"` // Base64-encoded
Transcript string `json:"transcript,omitempty"` // For audio content
}

ConversationItem represents an item in the conversation.

type ConversationItem struct {
ID string `json:"id,omitempty"`
Type string `json:"type"` // "message", "function_call", "function_call_output"
Status string `json:"status,omitempty"`
Role string `json:"role,omitempty"` // "user", "assistant", "system"
Content []ConversationContent `json:"content,omitempty"`
CallID string `json:"call_id,omitempty"` // For function_call_output
Output string `json:"output,omitempty"` // For function_call_output
Name string `json:"name,omitempty"` // For function_call
Arguments string `json:"arguments,omitempty"` // For function_call
}

ConversationItemCreateEvent adds an item to the conversation.

type ConversationItemCreateEvent struct {
ClientEvent
PreviousItemID string `json:"previous_item_id,omitempty"`
Item ConversationItem `json:"item"`
}

ConversationItemCreatedEvent confirms an item was added.

type ConversationItemCreatedEvent struct {
ServerEvent
PreviousItemID string `json:"previous_item_id"`
Item ConversationItem `json:"item"`
}

type ConversationItemInputAudioTranscriptionCompletedEvent

Section titled “type ConversationItemInputAudioTranscriptionCompletedEvent”

ConversationItemInputAudioTranscriptionCompletedEvent provides transcription.

type ConversationItemInputAudioTranscriptionCompletedEvent struct {
ServerEvent
ItemID string `json:"item_id"`
ContentIndex int `json:"content_index"`
Transcript string `json:"transcript"`
}

type ConversationItemInputAudioTranscriptionFailedEvent

Section titled “type ConversationItemInputAudioTranscriptionFailedEvent”

ConversationItemInputAudioTranscriptionFailedEvent indicates transcription failed.

type ConversationItemInputAudioTranscriptionFailedEvent struct {
ServerEvent
ItemID string `json:"item_id"`
ContentIndex int `json:"content_index"`
Error ErrorDetail `json:"error"`
}

EmbeddingOption configures the EmbeddingProvider.

type EmbeddingOption func(*EmbeddingProvider)

func WithEmbeddingAPIKey(key string) EmbeddingOption

WithEmbeddingAPIKey sets the API key explicitly.

func WithEmbeddingBaseURL(url string) EmbeddingOption

WithEmbeddingBaseURL sets a custom base URL (for Azure or proxies).

func WithEmbeddingHTTPClient(client *http.Client) EmbeddingOption

WithEmbeddingHTTPClient sets a custom HTTP client.

func WithEmbeddingModel(model string) EmbeddingOption

WithEmbeddingModel sets the embedding model.

EmbeddingProvider implements embedding generation via OpenAI API.

type EmbeddingProvider struct {
*providers.BaseEmbeddingProvider
}

func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)

NewEmbeddingProvider creates an OpenAI embedding provider.

func (p *EmbeddingProvider) Embed(ctx context.Context, req providers.EmbeddingRequest) (providers.EmbeddingResponse, error)

Embed generates embeddings for the given texts.

func (p *EmbeddingProvider) EstimateCost(tokens int) float64

EstimateCost estimates the cost for embedding the given number of tokens.

ErrorDetail contains error information.

type ErrorDetail struct {
Type string `json:"type"`
Code string `json:"code"`
Message string `json:"message"`
Param string `json:"param,omitempty"`
EventID string `json:"event_id,omitempty"`
}

ErrorEvent indicates an error occurred.

type ErrorEvent struct {
ServerEvent
Error ErrorDetail `json:"error"`
}

InputAudioBufferAppendEvent appends audio to the input buffer.

type InputAudioBufferAppendEvent struct {
ClientEvent
Audio string `json:"audio"` // Base64-encoded audio data
}

InputAudioBufferClearEvent clears the audio buffer.

type InputAudioBufferClearEvent struct {
ClientEvent
}

InputAudioBufferClearedEvent confirms audio buffer was cleared.

type InputAudioBufferClearedEvent struct {
ServerEvent
}

InputAudioBufferCommitEvent commits the audio buffer for processing.

type InputAudioBufferCommitEvent struct {
ClientEvent
}

InputAudioBufferCommittedEvent confirms audio buffer was committed.

type InputAudioBufferCommittedEvent struct {
ServerEvent
PreviousItemID string `json:"previous_item_id"`
ItemID string `json:"item_id"`
}

InputAudioBufferSpeechStartedEvent indicates speech was detected.

type InputAudioBufferSpeechStartedEvent struct {
ServerEvent
AudioStartMs int `json:"audio_start_ms"`
ItemID string `json:"item_id"`
}

InputAudioBufferSpeechStoppedEvent indicates speech ended.

type InputAudioBufferSpeechStoppedEvent struct {
ServerEvent
AudioEndMs int `json:"audio_end_ms"`
ItemID string `json:"item_id"`
}

OpenAIProvider implements the Provider interface for OpenAI

type Provider struct {
providers.BaseProvider
// contains filtered or unexported fields
}

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider

NewProvider creates a new OpenAI provider

func NewProviderWithConfig(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]any) *Provider

NewProviderWithConfig creates a new OpenAI provider with additional configuration

func NewProviderWithCredential(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, cred providers.Credential) *Provider

NewProviderWithCredential creates a new OpenAI provider with explicit credential.

func NewProviderWithCredentialAndConfig(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, cred providers.Credential, additionalConfig map[string]any) *Provider

NewProviderWithCredentialAndConfig creates a new OpenAI provider with explicit credential and config.

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates detailed cost breakdown including optional cached tokens

func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)

CreateStreamSession creates a new bidirectional streaming session with OpenAI Realtime API.

The session supports real-time audio input/output with the following features: - Bidirectional audio streaming (send and receive audio simultaneously) - Server-side voice activity detection (VAD) for automatic turn detection - Function/tool calling during the streaming session - Input and output audio transcription

Audio Format: OpenAI Realtime API uses 24kHz 16-bit PCM mono audio by default. The session automatically handles base64 encoding/decoding of audio data.

Example usage:

session, err := provider.CreateStreamSession(ctx, &providers.StreamingInputConfig{
Config: types.StreamingMediaConfig{
Type: types.ContentTypeAudio,
SampleRate: 24000,
Encoding: "pcm16",
Channels: 1,
},
SystemInstruction: "You are a helpful assistant.",
})

func (*Provider) GetMultimodalCapabilities

Section titled “func (*Provider) GetMultimodalCapabilities”
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns OpenAI’s multimodal capabilities

func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities returns detailed information about OpenAI’s streaming support.

func (p *Provider) Model() string

Model returns the model name/identifier used by this provider.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict sends a predict request to OpenAI

func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

PredictMultimodal performs a predict request with multimodal content

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream performs a streaming predict request with multimodal content

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream streams a predict response from OpenAI

func (p *Provider) SupportsStreamInput() []string

SupportsStreamInput returns the media types supported for streaming input.

RateLimit contains rate limit details.

type RateLimit struct {
Name string `json:"name"`
Limit int `json:"limit"`
Remaining int `json:"remaining"`
ResetSeconds float64 `json:"reset_seconds"`
}

RateLimitsUpdatedEvent provides rate limit information.

type RateLimitsUpdatedEvent struct {
ServerEvent
RateLimits []RateLimit `json:"rate_limits"`
}

RealtimeSession implements StreamInputSession for OpenAI Realtime API.

type RealtimeSession struct {
// contains filtered or unexported fields
}

func NewRealtimeSession(ctx context.Context, apiKey string, config *RealtimeSessionConfig) (*RealtimeSession, error)

NewRealtimeSession creates a new OpenAI Realtime streaming session.

func (s *RealtimeSession) CancelResponse() error

CancelResponse cancels an in-progress response.

func (s *RealtimeSession) ClearAudioBuffer() error

ClearAudioBuffer clears the current audio buffer.

func (s *RealtimeSession) Close() error

Close closes the session.

func (s *RealtimeSession) CommitAudioBuffer() error

CommitAudioBuffer commits the current audio buffer for processing.

func (s *RealtimeSession) Done() <-chan struct{}

Done returns a channel that’s closed when the session ends.

func (s *RealtimeSession) EndInput()

EndInput signals the end of user input. For OpenAI Realtime with server VAD, this commits the audio buffer. For manual turn control, this commits and triggers a response.

func (s *RealtimeSession) Error() error

Error returns any error that occurred during the session.

func (s *RealtimeSession) Response() <-chan providers.StreamChunk

Response returns the channel for receiving responses.

func (s *RealtimeSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk sends an audio chunk to the server.

func (s *RealtimeSession) SendSystemContext(ctx context.Context, text string) error

SendSystemContext sends a text message as context without completing the turn.

func (s *RealtimeSession) SendText(ctx context.Context, text string) error

SendText sends a text message and triggers a response.

func (s *RealtimeSession) SendToolResponse(ctx context.Context, toolCallID, result string) error

SendToolResponse sends the result of a tool execution back to the model.

func (s *RealtimeSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error

SendToolResponses sends multiple tool results at once (for parallel tool calls).

func (s *RealtimeSession) TriggerResponse(config *ResponseConfig) error

TriggerResponse manually triggers a response from the model.

RealtimeSessionConfig configures a new OpenAI Realtime streaming session.

type RealtimeSessionConfig struct {
// Model specifies the model to use (e.g., "gpt-4o-realtime-preview").
Model string
// Modalities specifies the input/output modalities.
// Valid values: "text", "audio"
// Default: ["text", "audio"]
Modalities []string
// Instructions is the system prompt for the session.
Instructions string
// Voice selects the voice for audio output.
// Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
// Default: "alloy"
Voice string
// InputAudioFormat specifies the format for input audio.
// Options: "pcm16", "g711_ulaw", "g711_alaw"
// Default: "pcm16"
InputAudioFormat string
// OutputAudioFormat specifies the format for output audio.
// Options: "pcm16", "g711_ulaw", "g711_alaw"
// Default: "pcm16"
OutputAudioFormat string
// InputAudioTranscription configures transcription of input audio.
// If nil, input transcription is disabled.
InputAudioTranscription *TranscriptionConfig
// TurnDetection configures server-side voice activity detection.
// If nil, VAD is disabled and turn management is manual.
TurnDetection *TurnDetectionConfig
// Tools defines available functions for the session.
Tools []RealtimeToolDefinition
// Temperature controls randomness (0.6-1.2, default 0.8).
Temperature float64
// MaxResponseOutputTokens limits response length.
// Use "inf" for unlimited, or a specific number.
MaxResponseOutputTokens interface{}
}

func DefaultRealtimeSessionConfig() RealtimeSessionConfig

DefaultRealtimeSessionConfig returns sensible defaults for a Realtime session.

RealtimeToolDef is the tool definition format for session config.

type RealtimeToolDef struct {
Type string `json:"type"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
}

RealtimeToolDefinition defines a function available in the session.

type RealtimeToolDefinition struct {
// Type is always "function" for function tools.
Type string `json:"type"`
// Name is the function name.
Name string `json:"name"`
// Description explains what the function does.
Description string `json:"description,omitempty"`
// Parameters is the JSON Schema for function parameters.
Parameters map[string]interface{} `json:"parameters,omitempty"`
}

RealtimeWebSocket manages WebSocket connections for OpenAI Realtime API.

type RealtimeWebSocket struct {
// contains filtered or unexported fields
}

func NewRealtimeWebSocket(model, apiKey string) *RealtimeWebSocket

NewRealtimeWebSocket creates a new WebSocket manager for OpenAI Realtime API.

func (ws *RealtimeWebSocket) Close() error

Close closes the WebSocket connection gracefully.

func (ws *RealtimeWebSocket) Connect(ctx context.Context) error

Connect establishes a WebSocket connection to the OpenAI Realtime API.

func (*RealtimeWebSocket) ConnectWithRetry

Section titled “func (*RealtimeWebSocket) ConnectWithRetry”
func (ws *RealtimeWebSocket) ConnectWithRetry(ctx context.Context) error

ConnectWithRetry attempts to connect with exponential backoff.

func (ws *RealtimeWebSocket) IsClosed() bool

IsClosed returns whether the WebSocket is closed.

func (ws *RealtimeWebSocket) Receive(ctx context.Context) ([]byte, error)

Receive reads a message from the WebSocket with context support.

func (ws *RealtimeWebSocket) ReceiveLoop(ctx context.Context, msgCh chan<- []byte) error

ReceiveLoop continuously reads messages and sends them to the provided channel. It returns when the connection is closed or an error occurs.

func (ws *RealtimeWebSocket) Send(msg interface{}) error

Send sends a message to the WebSocket.

func (ws *RealtimeWebSocket) StartHeartbeat(ctx context.Context, interval time.Duration)

StartHeartbeat starts a goroutine that sends ping messages periodically.

ResponseAudioDeltaEvent provides streaming audio.

type ResponseAudioDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Delta string `json:"delta"` // Base64-encoded audio
}

ResponseAudioDoneEvent indicates audio streaming completed.

type ResponseAudioDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
}

ResponseAudioTranscriptDeltaEvent provides streaming transcript.

type ResponseAudioTranscriptDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Delta string `json:"delta"`
}

ResponseAudioTranscriptDoneEvent indicates transcript completed.

type ResponseAudioTranscriptDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Transcript string `json:"transcript"`
}

ResponseCancelEvent cancels an in-progress response.

type ResponseCancelEvent struct {
ClientEvent
}

ResponseConfig configures a response.

type ResponseConfig struct {
Modalities []string `json:"modalities,omitempty"`
Instructions string `json:"instructions,omitempty"`
Voice string `json:"voice,omitempty"`
OutputAudioFormat string `json:"output_audio_format,omitempty"`
Tools []RealtimeToolDef `json:"tools,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
MaxOutputTokens interface{} `json:"max_output_tokens,omitempty"`
}

ResponseContentPartAddedEvent indicates content was added.

type ResponseContentPartAddedEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Part ConversationContent `json:"part"`
}

ResponseContentPartDoneEvent indicates content part completed.

type ResponseContentPartDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Part ConversationContent `json:"part"`
}

ResponseCreateEvent triggers a response from the model.

type ResponseCreateEvent struct {
ClientEvent
Response *ResponseConfig `json:"response,omitempty"`
}

ResponseCreatedEvent indicates a response is starting.

type ResponseCreatedEvent struct {
ServerEvent
Response ResponseInfo `json:"response"`
}

ResponseDoneEvent indicates a response completed.

type ResponseDoneEvent struct {
ServerEvent
Response ResponseInfo `json:"response"`
}

type ResponseFunctionCallArgumentsDeltaEvent

Section titled “type ResponseFunctionCallArgumentsDeltaEvent”

ResponseFunctionCallArgumentsDeltaEvent provides streaming function args.

type ResponseFunctionCallArgumentsDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
CallID string `json:"call_id"`
Delta string `json:"delta"`
}

type ResponseFunctionCallArgumentsDoneEvent

Section titled “type ResponseFunctionCallArgumentsDoneEvent”

ResponseFunctionCallArgumentsDoneEvent indicates function args completed.

type ResponseFunctionCallArgumentsDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
CallID string `json:"call_id"`
Name string `json:"name"`
Arguments string `json:"arguments"`
}

ResponseInfo contains response details.

type ResponseInfo struct {
ID string `json:"id"`
Object string `json:"object"`
Status string `json:"status"`
StatusDetails interface{} `json:"status_details"`
Output []ConversationItem `json:"output"`
Usage *UsageInfo `json:"usage"`
}

ResponseOutputItemAddedEvent indicates an output item was added.

type ResponseOutputItemAddedEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
OutputIndex int `json:"output_index"`
Item ConversationItem `json:"item"`
}

ResponseOutputItemDoneEvent indicates an output item completed.

type ResponseOutputItemDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
OutputIndex int `json:"output_index"`
Item ConversationItem `json:"item"`
}

ResponseTextDeltaEvent provides streaming text.

type ResponseTextDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Delta string `json:"delta"`
}

ResponseTextDoneEvent indicates text streaming completed.

type ResponseTextDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Text string `json:"text"`
}

ServerEvent is the base structure for all server events.

type ServerEvent struct {
EventID string `json:"event_id"`
Type string `json:"type"`
}

SessionConfig is the session configuration sent in session.update. Note: TurnDetection uses a pointer without omitempty so we can explicitly send null to disable VAD. Omitting it causes OpenAI to use default (server_vad).

type SessionConfig struct {
Modalities []string `json:"modalities,omitempty"`
Instructions string `json:"instructions,omitempty"`
Voice string `json:"voice,omitempty"`
InputAudioFormat string `json:"input_audio_format,omitempty"`
OutputAudioFormat string `json:"output_audio_format,omitempty"`
InputAudioTranscription *TranscriptionConfig `json:"input_audio_transcription,omitempty"`
TurnDetection *TurnDetectionConfig `json:"turn_detection"` // No omitempty - null disables VAD
Tools []RealtimeToolDef `json:"tools,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
MaxResponseOutputTokens interface{} `json:"max_response_output_tokens,omitempty"`
}

SessionCreatedEvent is sent when the session is established.

type SessionCreatedEvent struct {
ServerEvent
Session SessionInfo `json:"session"`
}

SessionInfo contains session details.

type SessionInfo struct {
ID string `json:"id"`
Object string `json:"object"`
Model string `json:"model"`
Modalities []string `json:"modalities"`
Instructions string `json:"instructions"`
Voice string `json:"voice"`
InputAudioFormat string `json:"input_audio_format"`
OutputAudioFormat string `json:"output_audio_format"`
InputAudioTranscription *TranscriptionConfig `json:"input_audio_transcription"`
TurnDetection *TurnDetectionConfig `json:"turn_detection"`
Tools []RealtimeToolDef `json:"tools"`
Temperature float64 `json:"temperature"`
MaxResponseOutputTokens interface{} `json:"max_response_output_tokens"`
}

SessionUpdateEvent updates session configuration.

type SessionUpdateEvent struct {
ClientEvent
Session SessionConfig `json:"session"`
}

SessionUpdatedEvent confirms a session update.

type SessionUpdatedEvent struct {
ServerEvent
Session SessionInfo `json:"session"`
}

ToolProvider extends OpenAIProvider with tool support

type ToolProvider struct {
*Provider
}

func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]any) *ToolProvider

NewToolProvider creates a new OpenAI provider with tool support

func NewToolProviderWithCredential(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]any, cred providers.Credential) *ToolProvider

NewToolProviderWithCredential creates an OpenAI tool provider with explicit credential.

func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (providers.ProviderTools, error)

BuildTooling converts tool descriptors to OpenAI format

func (*ToolProvider) PredictMultimodalWithTools

Section titled “func (*ToolProvider) PredictMultimodalWithTools”
func (p *ToolProvider) PredictMultimodalWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictMultimodalWithTools implements providers.MultimodalToolSupport interface for ToolProvider This allows combining multimodal content (images) with tool calls in a single request

func (*ToolProvider) PredictStreamWithTools

Section titled “func (*ToolProvider) PredictStreamWithTools”
func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support

func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools providers.ProviderTools, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a prediction request with tool support

TranscriptionConfig configures audio transcription.

type TranscriptionConfig struct {
// Model specifies the transcription model.
// Default: "whisper-1"
Model string `json:"model,omitempty"`
}

TurnDetectionConfig configures server-side VAD.

type TurnDetectionConfig struct {
// Type specifies the VAD type.
// Options: "server_vad", "semantic_vad"
Type string `json:"type"`
// Threshold is the activation threshold (0.0-1.0).
// Default: 0.5
Threshold float64 `json:"threshold,omitempty"`
// PrefixPaddingMs is audio padding before speech in milliseconds.
// Default: 300
PrefixPaddingMs int `json:"prefix_padding_ms,omitempty"`
// SilenceDurationMs is silence duration to detect end of speech.
// Default: 500
SilenceDurationMs int `json:"silence_duration_ms,omitempty"`
// CreateResponse determines if a response is automatically created
// when speech ends. Default: true
CreateResponse bool `json:"create_response,omitempty"`
}

UsageInfo contains token usage information.

type UsageInfo struct {
TotalTokens int `json:"total_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
} `json:"input_token_details"`
OutputTokenDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
} `json:"output_token_details"`
}
import "github.com/AltairaLabs/PromptKit/runtime/providers/replay"

Package replay provides a provider that replays recorded sessions deterministically.

Package replay provides a provider that replays recorded sessions deterministically.

ErrSessionClosed is returned when attempting to use a closed session.

var ErrSessionClosed = errors.New("session is closed")

Config configures the replay provider.

type Config struct {
// Timing controls response delivery timing.
// Default: TimingInstant
Timing TimingMode
// Speed is the multiplier for TimingAccelerated mode.
// Default: 2.0 (2x speed)
Speed float64
// MatchMode controls how requests are matched to recorded responses.
// Default: MatchByTurn (sequential order)
MatchMode MatchMode
// Metadata contains additional information about the recording.
// This can include judge targets, tags, and provider information
// that should flow through to evaluation contexts.
Metadata map[string]interface{}
}

func DefaultConfig() Config

DefaultConfig returns sensible defaults for replay.

MatchMode controls how incoming requests are matched to recorded responses.

type MatchMode int

const (
// MatchByTurn matches responses in sequential order (turn 1, 2, 3, ...).
MatchByTurn MatchMode = iota
// MatchByContent matches by comparing the last user message content.
MatchByContent
)

Provider replays recorded session responses without making LLM calls.

type Provider struct {
// contains filtered or unexported fields
}

func NewProvider(rec *recording.SessionRecording, cfg *Config) (*Provider, error)

NewProvider creates a replay provider from a session recording.

func NewProviderFromFile(path string, cfg *Config) (*Provider, error)

NewProviderFromFile loads a recording file and creates a replay provider.

func (p *Provider) CalculateCost(inputTokens, outputTokens, cachedTokens int) types.CostInfo

CalculateCost returns zero cost as replays don’t incur real costs.

func (p *Provider) Close() error

Close is a no-op for replay provider.

func (p *Provider) CurrentTurn() int

CurrentTurn returns the current turn index (0-based).

func (p *Provider) GetMetadata() map[string]interface{}

GetMetadata returns metadata about the recording. This includes judge targets, tags, and provider information that can be used by evaluation frameworks and assertions.

func (p *Provider) ID() string

ID returns the provider identifier.

func (p *Provider) Model() string

Model returns the model name. For replay provider, this returns “replay”.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict returns the next recorded response.

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream returns the recorded response as a single stream chunk.

func (p *Provider) Reset()

Reset resets the provider to replay from the beginning.

func (p *Provider) ShouldIncludeRawOutput() bool

ShouldIncludeRawOutput returns false as replays don’t have raw output.

func (p *Provider) SupportsStreaming() bool

SupportsStreaming returns true as replay supports streaming.

func (p *Provider) TurnCount() int

TurnCount returns the number of recorded turns available.

StreamSession implements StreamInputSession for replaying recorded sessions.

type StreamSession struct {
// contains filtered or unexported fields
}

func (s *StreamSession) Close() error

Close ends the streaming session.

func (s *StreamSession) Done() <-chan struct{}

Done returns a channel that closes when the session ends.

func (s *StreamSession) EndInput()

EndInput signals the end of input and triggers the next response. This implements the EndInputter interface expected by DuplexProviderStage.

func (s *StreamSession) Error() error

Error returns any session error.

func (s *StreamSession) RemainingTurns() int

RemainingTurns returns the number of responses left to replay.

func (s *StreamSession) Response() <-chan providers.StreamChunk

Response returns the response channel.

func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk receives input chunks and triggers replay of the next response.

func (s *StreamSession) SendSystemContext(ctx context.Context, text string) error

SendSystemContext sends system context (ignored for replay).

func (s *StreamSession) SendText(ctx context.Context, text string) error

SendText receives text input and triggers replay of the next response.

func (s *StreamSession) TriggerNextResponse(ctx context.Context) error

TriggerNextResponse manually triggers the next response (for testing).

StreamingProvider extends Provider with streaming support for duplex replay.

type StreamingProvider struct {
*Provider
// contains filtered or unexported fields
}

func NewStreamingProviderFromArenaOutput(path string, cfg *Config) (*StreamingProvider, error)

NewStreamingProviderFromArenaOutput creates a streaming replay provider from an arena output file.

func (*StreamingProvider) CreateStreamSession

Section titled “func (*StreamingProvider) CreateStreamSession”
func (p *StreamingProvider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)

CreateStreamSession creates a new bidirectional streaming session for replay.

func (*StreamingProvider) GetStreamingCapabilities

Section titled “func (*StreamingProvider) GetStreamingCapabilities”
func (p *StreamingProvider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities returns detailed information about streaming support.

func (*StreamingProvider) SupportsStreamInput

Section titled “func (*StreamingProvider) SupportsStreamInput”
func (p *StreamingProvider) SupportsStreamInput() []string

SupportsStreamInput returns the media types supported for streaming input.

TimingMode controls how response timing is handled during replay.

type TimingMode int

const (
// TimingInstant delivers responses immediately without delay.
TimingInstant TimingMode = iota
// TimingRealTime delivers responses with original timing preserved.
TimingRealTime
// TimingAccelerated delivers responses with accelerated timing.
TimingAccelerated
)
import "github.com/AltairaLabs/PromptKit/runtime/providers/vllm"

Package vllm provides vLLM LLM provider integration for high-performance inference.

Package vllm provides multimodal support for vLLM provider

Provider implements the Provider interface for vLLM

type Provider struct {
providers.BaseProvider
// contains filtered or unexported fields
}

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]any) *Provider

NewProvider creates a new vLLM provider

func (p *Provider) BuildTooling(descriptors []*providers.ToolDescriptor) (any, error)

BuildTooling converts tool descriptors to vLLM format

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates cost breakdown vLLM is typically self-hosted, so default is $0 unless custom pricing is configured

func (*Provider) GetMultimodalCapabilities

Section titled “func (*Provider) GetMultimodalCapabilities”
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns the multimodal capabilities of the vLLM provider

func (p *Provider) Model() string

Model returns the model name/identifier used by this provider.

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict sends a prediction request to vLLM

func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

PredictMultimodal sends a multimodal prediction request to vLLM vLLM supports vision models via OpenAI-compatible API with image_url format

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream sends a streaming multimodal prediction request to vLLM

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream streams a prediction response from vLLM

func (p *Provider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools any, toolChoice string) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming prediction request with tool support

func (p *Provider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools any, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a prediction request with tool support

import "github.com/AltairaLabs/PromptKit/runtime/providers/voyageai"

Package voyageai provides embedding generation via the Voyage AI API. Voyage AI is recommended by Anthropic for embeddings with Claude-based systems.

Model constants for Voyage AI embeddings.

const (
// DefaultModel is the recommended general-purpose model.
DefaultModel = "voyage-3.5"
// ModelVoyage35 is the latest general-purpose model for best performance.
ModelVoyage35 = "voyage-3.5"
// ModelVoyage35Lite is an efficient model with lower latency.
ModelVoyage35Lite = "voyage-3.5-lite"
// ModelVoyage3Large is a high-capacity model for complex tasks.
ModelVoyage3Large = "voyage-3-large"
// ModelVoyageCode3 is optimized for code embeddings.
ModelVoyageCode3 = "voyage-code-3"
// ModelVoyageFinance2 is optimized for finance domain.
ModelVoyageFinance2 = "voyage-finance-2"
// ModelVoyageLaw2 is optimized for legal domain.
ModelVoyageLaw2 = "voyage-law-2"
)

Dimension constants for Voyage AI embeddings.

const (
Dimensions2048 = 2048
Dimensions1024 = 1024 // Default
Dimensions512 = 512
Dimensions256 = 256
)

InputType constants for retrieval optimization.

const (
// InputTypeQuery indicates the input is a search query.
InputTypeQuery = "query"
// InputTypeDocument indicates the input is a document to be indexed.
InputTypeDocument = "document"
)

EmbeddingOption configures the EmbeddingProvider.

type EmbeddingOption func(*EmbeddingProvider)

func WithAPIKey(key string) EmbeddingOption

WithAPIKey sets the API key explicitly.

func WithBaseURL(url string) EmbeddingOption

WithBaseURL sets a custom base URL.

func WithDimensions(dims int) EmbeddingOption

WithDimensions sets the output embedding dimensions.

func WithHTTPClient(client *http.Client) EmbeddingOption

WithHTTPClient sets a custom HTTP client.

func WithInputType(inputType string) EmbeddingOption

WithInputType sets the input type for retrieval optimization. Use “query” for search queries and “document” for documents to be indexed.

func WithModel(model string) EmbeddingOption

WithModel sets the embedding model.

EmbeddingProvider implements embedding generation via Voyage AI API.

type EmbeddingProvider struct {
*providers.BaseEmbeddingProvider
// contains filtered or unexported fields
}

func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)

NewEmbeddingProvider creates a Voyage AI embedding provider.

func (p *EmbeddingProvider) Embed(ctx context.Context, req providers.EmbeddingRequest) (providers.EmbeddingResponse, error)

Embed generates embeddings for the given texts.

import "github.com/AltairaLabs/PromptKit/runtime/storage/local"

Package local provides local filesystem-based storage implementation.

FileStore implements MediaStorageService using local filesystem storage.

type FileStore struct {
// contains filtered or unexported fields
}

func NewFileStore(config FileStoreConfig) (*FileStore, error)

NewFileStore creates a new local filesystem storage backend.

func (fs *FileStore) DeleteMedia(ctx context.Context, reference storage.Reference) error

DeleteMedia implements MediaStorageService.DeleteMedia

func (fs *FileStore) GetURL(ctx context.Context, reference storage.Reference, expiry time.Duration) (string, error)

GetURL implements MediaStorageService.GetURL

func (fs *FileStore) RetrieveMedia(ctx context.Context, reference storage.Reference) (*types.MediaContent, error)

RetrieveMedia implements MediaStorageService.RetrieveMedia

func (fs *FileStore) StoreMedia(ctx context.Context, content *types.MediaContent, metadata *storage.MediaMetadata) (storage.Reference, error)

StoreMedia implements MediaStorageService.StoreMedia

FileStoreConfig configures the local filesystem storage backend.

type FileStoreConfig struct {
// BaseDir is the root directory for media storage
BaseDir string
// Organization determines how files are organized in directories
Organization storage.OrganizationMode
// EnableDeduplication enables content-based deduplication using SHA-256 hashing
EnableDeduplication bool
// DefaultPolicy is the default retention policy to apply to new media
DefaultPolicy string
}
import "github.com/AltairaLabs/PromptKit/runtime/storage/policy"

Package policy provides storage retention and cleanup policy management.

func ParsePolicyName(name string) (string, time.Duration, error)

ParsePolicyName extracts policy type and parameters from a policy name. Supported formats:

  • “delete-after-Xmin” - Delete after X minutes
  • “retain-Xdays” - Retain for X days
  • “retain-Xhours” - Retain for X hours

Returns (policyType, duration, error)

PolicyConfig defines a retention policy for media storage. Policies control how long media should be retained and when it should be deleted.

type Config struct {
// Name is a unique identifier for this policy (e.g., "delete-after-5min", "retain-30days")
Name string `json:"name" yaml:"name"`
// Description provides human-readable documentation for this policy
Description string `json:"description,omitempty" yaml:"description,omitempty"`
// Rules contains policy-specific configuration (e.g., retention duration)
Rules map[string]interface{} `json:"rules,omitempty" yaml:"rules,omitempty"`
}

func (p *Config) Validate() error

Validate checks if a policy configuration is valid.

PolicyMetadata stores policy information in .meta files alongside media. This metadata is used by the enforcement system to determine when to delete files.

type Metadata struct {
// PolicyName identifies the policy applied to this media
PolicyName string `json:"policy_name"`
// ExpiresAt is when this media should be deleted (nil = never expires)
ExpiresAt *time.Time `json:"expires_at,omitempty"`
// CreatedAt is when the policy was applied
CreatedAt time.Time `json:"created_at"`
}

TimeBasedPolicyHandler implements PolicyHandler for time-based retention policies. It applies expiration times to media based on policy names and enforces deletion of expired media through background scanning.

type TimeBasedPolicyHandler struct {
// contains filtered or unexported fields
}

func NewTimeBasedPolicyHandler(enforcementInterval time.Duration) *TimeBasedPolicyHandler

NewTimeBasedPolicyHandler creates a new time-based policy handler.

func (*TimeBasedPolicyHandler) ApplyPolicy

Section titled “func (*TimeBasedPolicyHandler) ApplyPolicy”
func (h *TimeBasedPolicyHandler) ApplyPolicy(ctx context.Context, metadata *storage.MediaMetadata) error

ApplyPolicy implements storage.PolicyHandler.ApplyPolicy

func (*TimeBasedPolicyHandler) EnforcePolicy

Section titled “func (*TimeBasedPolicyHandler) EnforcePolicy”
func (h *TimeBasedPolicyHandler) EnforcePolicy(ctx context.Context, baseDir string) error

EnforcePolicy implements storage.PolicyHandler.EnforcePolicy

func (*TimeBasedPolicyHandler) RegisterPolicy

Section titled “func (*TimeBasedPolicyHandler) RegisterPolicy”
func (h *TimeBasedPolicyHandler) RegisterPolicy(policy Config) error

RegisterPolicy adds a policy configuration to the handler.

func (*TimeBasedPolicyHandler) StartEnforcement

Section titled “func (*TimeBasedPolicyHandler) StartEnforcement”
func (h *TimeBasedPolicyHandler) StartEnforcement(ctx context.Context, baseDir string)

StartEnforcement starts a background goroutine that periodically enforces policies.

func (h *TimeBasedPolicyHandler) Stop()

Stop signals the enforcement goroutine to stop and waits for it to finish.

Generated by gomarkdoc