Skip to content

Tutorial 2: Streaming Responses

Learn how to implement real-time streaming for better user experience.

  • Stream LLM responses in real-time
  • Process chunks as they arrive
  • Handle streaming errors
  • Track progress and completion

Streaming provides immediate feedback:

Without Streaming:

[3 second wait...]
Here's a complete response about streaming...

With Streaming:

Here's→ a→ complete→ response→ about→ streaming...

Users see results immediately and can stop generation if needed.

Complete Tutorial 1: Your First Conversation or understand basic SDK usage.

Use conv.Stream() instead of conv.Send():

package main
import (
"context"
"fmt"
"log"
"github.com/AltairaLabs/PromptKit/sdk"
)
func main() {
conv, err := sdk.Open("./hello.pack.json", "chat")
if err != nil {
log.Fatal(err)
}
defer conv.Close()
ctx := context.Background()
fmt.Print("Assistant: ")
for chunk := range conv.Stream(ctx, "Tell me a short story") {
if chunk.Error != nil {
log.Printf("Error: %v", chunk.Error)
break
}
if chunk.Type == sdk.ChunkDone {
fmt.Println("\n[Done]")
break
}
fmt.Print(chunk.Text)
}
}

Each chunk contains:

type StreamChunk struct {
Type ChunkType // ChunkText, ChunkToolCall, ChunkDone
Text string // Text content (for ChunkText)
Error error // Non-nil if error occurred
}
  • sdk.ChunkText - Text content arrived
  • sdk.ChunkToolCall - Tool is being called
  • sdk.ChunkDone - Stream completed

Track the complete response while streaming:

var fullText string
for chunk := range conv.Stream(ctx, "Write a poem") {
if chunk.Error != nil {
log.Printf("Error: %v", chunk.Error)
break
}
if chunk.Type == sdk.ChunkDone {
break
}
fmt.Print(chunk.Text) // Real-time display
fullText += chunk.Text // Collect for later
}
fmt.Printf("\n\nTotal length: %d characters\n", len(fullText))

Show progress indicators:

charCount := 0
for chunk := range conv.Stream(ctx, "Tell me about AI") {
if chunk.Error != nil {
break
}
if chunk.Type == sdk.ChunkDone {
fmt.Printf("\n\n[Complete - %d characters]\n", charCount)
break
}
fmt.Print(chunk.Text)
charCount += len(chunk.Text)
}

Build a streaming chatbot:

package main
import (
"bufio"
"context"
"fmt"
"log"
"os"
"strings"
"github.com/AltairaLabs/PromptKit/sdk"
)
func main() {
conv, err := sdk.Open("./hello.pack.json", "chat")
if err != nil {
log.Fatal(err)
}
defer conv.Close()
ctx := context.Background()
scanner := bufio.NewScanner(os.Stdin)
fmt.Println("Streaming chat ready! Type 'quit' to exit.")
for {
fmt.Print("\nYou: ")
if !scanner.Scan() {
break
}
msg := strings.TrimSpace(scanner.Text())
if msg == "quit" {
break
}
fmt.Print("Assistant: ")
for chunk := range conv.Stream(ctx, msg) {
if chunk.Error != nil {
log.Printf("\nError: %v", chunk.Error)
break
}
if chunk.Type == sdk.ChunkDone {
fmt.Println()
break
}
fmt.Print(chunk.Text)
}
}
}

Handle errors gracefully:

for chunk := range conv.Stream(ctx, "Generate content") {
if chunk.Error != nil {
// Check error type
if errors.Is(chunk.Error, context.DeadlineExceeded) {
fmt.Println("\n[Timeout - response truncated]")
} else {
fmt.Printf("\n[Error: %v]", chunk.Error)
}
break
}
if chunk.Type == sdk.ChunkDone {
break
}
fmt.Print(chunk.Text)
}

Set a streaming timeout:

ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
for chunk := range conv.Stream(ctx, "Tell me a long story") {
if chunk.Error != nil {
break
}
if chunk.Type == sdk.ChunkDone {
break
}
fmt.Print(chunk.Text)
}

✅ Stream responses with conv.Stream()
✅ Process chunks as they arrive
✅ Track progress and completion
✅ Handle streaming errors
✅ Build interactive streaming apps

See the full example at sdk/examples/streaming/.