TTS API Reference

Complete reference for text-to-speech services.

Service Interface

type Service interface {
    Name() string
    Synthesize(ctx context.Context, text string, config SynthesisConfig) (io.ReadCloser, error)
    SupportedVoices() []Voice
    SupportedFormats() []AudioFormat
}

Methods

Name

func (s Service) Name() string

Returns the provider identifier (e.g., “openai”, “elevenlabs”).

Synthesize

func (s Service) Synthesize(ctx context.Context, text string, config SynthesisConfig) (io.ReadCloser, error)

Converts text to audio. Returns a reader for streaming audio data. The caller is responsible for closing the reader.

SupportedVoices

func (s Service) SupportedVoices() []Voice

Returns available voices for this provider.

SupportedFormats

func (s Service) SupportedFormats() []AudioFormat

Returns supported audio output formats.

StreamingService Interface

type StreamingService interface {
    Service
    SynthesizeStream(ctx context.Context, text string, config SynthesisConfig) (<-chan AudioChunk, error)
}

Extends Service with streaming synthesis capabilities for lower latency.

SynthesisConfig

type SynthesisConfig struct {
    Voice    string      // Voice ID
    Format   AudioFormat // Output format
    Speed    float64     // Speech rate (0.25-4.0)
    Pitch    float64     // Pitch adjustment (-20 to 20)
    Language string      // Language code
    Model    string      // TTS model (provider-specific)
}

Fields

FieldTypeDefaultDescription
Voicestring”alloy”Voice ID (provider-specific)
FormatAudioFormatMP3Output audio format
Speedfloat641.0Speech rate multiplier
Pitchfloat640Pitch adjustment in semitones
Languagestring""Language code (e.g., “en-US”)
Modelstring""TTS model (e.g., “tts-1-hd”)

Constructor

func DefaultSynthesisConfig() SynthesisConfig

Returns sensible defaults for synthesis.

Voice Type

type Voice struct {
    ID          string // Provider-specific identifier
    Name        string // Human-readable name
    Language    string // Primary language code
    Gender      string // "male", "female", "neutral"
    Description string // Voice characteristics
    Preview     string // URL to voice sample
}

AudioFormat Type

type AudioFormat struct {
    Name       string // Format identifier
    MIMEType   string // Content type
    SampleRate int    // Sample rate in Hz
    BitDepth   int    // Bits per sample
    Channels   int    // Number of channels
}

Predefined Formats

ConstantNameMIME TypeUse Case
FormatMP3mp3audio/mpegMost compatible
FormatOpusopusaudio/opusBest for streaming
FormatAACaacaudio/aacApple devices
FormatFLACflacaudio/flacLossless quality
FormatPCM16pcmaudio/pcmRaw processing
FormatWAVwavaudio/wavPCM with header

AudioChunk Type

type AudioChunk struct {
    Data  []byte // Raw audio bytes
    Index int    // Chunk sequence number
    Final bool   // Last chunk indicator
    Error error  // Error during synthesis
}

Providers

OpenAI TTS

func NewOpenAI(apiKey string) Service

Creates an OpenAI TTS service.

Voices:

IDCharacter
alloyNeutral, versatile
echoWarm, smooth
fableExpressive, British
onyxDeep, authoritative
novaFriendly, youthful
shimmerClear, professional

Models:

Example:

service := tts.NewOpenAI(os.Getenv("OPENAI_API_KEY"))

config := tts.SynthesisConfig{
    Voice:  "nova",
    Format: tts.FormatMP3,
    Model:  "tts-1-hd",
}

reader, _ := service.Synthesize(ctx, "Hello world", config)

ElevenLabs TTS

func NewElevenLabs(apiKey string) Service

Creates an ElevenLabs TTS service.

Features:

Example:

service := tts.NewElevenLabs(os.Getenv("ELEVENLABS_API_KEY"))

// List available voices
voices := service.SupportedVoices()
for _, v := range voices {
    fmt.Printf("%s: %s\n", v.ID, v.Name)
}

Cartesia TTS

func NewCartesia(apiKey string) Service

Creates a Cartesia TTS service.

Features:

Example:

service := tts.NewCartesia(os.Getenv("CARTESIA_API_KEY"))

Error Types

var (
    ErrInvalidVoice    = errors.New("invalid voice")
    ErrInvalidFormat   = errors.New("unsupported format")
    ErrTextTooLong     = errors.New("text exceeds maximum length")
    ErrRateLimited     = errors.New("rate limited")
    ErrServiceDown     = errors.New("service unavailable")
)

Usage Examples

Basic Synthesis

service := tts.NewOpenAI(apiKey)

reader, err := service.Synthesize(ctx, "Hello!", tts.DefaultSynthesisConfig())
if err != nil {
    log.Fatal(err)
}
defer reader.Close()

data, _ := io.ReadAll(reader)
// Use audio data...

Streaming Synthesis

service := tts.NewCartesia(apiKey)

streamingService, ok := service.(tts.StreamingService)
if !ok {
    log.Fatal("Provider doesn't support streaming")
}

chunks, err := streamingService.SynthesizeStream(ctx, "Hello world!", config)
if err != nil {
    log.Fatal(err)
}

for chunk := range chunks {
    if chunk.Error != nil {
        log.Printf("Error: %v", chunk.Error)
        break
    }
    playAudio(chunk.Data)
}

Custom Configuration

config := tts.SynthesisConfig{
    Voice:    "onyx",
    Format:   tts.FormatOpus,
    Speed:    0.9,           // Slightly slower
    Pitch:    -2,            // Slightly lower
    Language: "en-US",
    Model:    "tts-1-hd",
}

reader, _ := service.Synthesize(ctx, text, config)

See Also