Stream Processing Guide
Handle real-time AI streaming responses efficiently.Overview
Streaming allows you to display AI responses as they’re generated, improving perceived performance and user experience.Basic Streaming
TypeScript/JavaScript
Copy
const response = await fetch('https://regpilot.dev/api/ai/chat', {
method: 'POST',
headers: {
'X-API-Key': process.env.REGPILOT_API_KEY!,
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: [{ role: 'user', content: 'Write a story' }],
quality: 'balanced'
})
});
const reader = response.body?.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = decoder.decode(value);
process.stdout.write(text); // Display incrementally
}
Python
Copy
import requests
import os
response = requests.post(
'https://regpilot.dev/api/ai/chat',
headers={
'X-API-Key': os.getenv('REGPILOT_API_KEY'),
'Content-Type': 'application/json'
},
json={
'messages': [{'role': 'user', 'content': 'Write a story'}],
'quality': 'balanced'
},
stream=True
)
for chunk in response.iter_content(chunk_size=None):
if chunk:
print(chunk.decode('utf-8'), end='', flush=True)
React Streaming
With useEffect
Copy
'use client';
import { useState, useEffect } from 'react';
export default function StreamingChat() {
const [response, setResponse] = useState('');
const [loading, setLoading] = useState(false);
async function streamChat(message: string) {
setLoading(true);
setResponse('');
const res = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ message })
});
const reader = res.body?.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader!.read();
if (done) break;
const text = decoder.decode(value);
setResponse(prev => prev + text);
}
setLoading(false);
}
return (
<div>
<div className="response">{response}</div>
<button onClick={() => streamChat('Hello')} disabled={loading}>
{loading ? 'Streaming...' : 'Send'}
</button>
</div>
);
}
Custom Hook
Copy
function useStreamingChat() {
const [response, setResponse] = useState('');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<Error | null>(null);
const stream = async (message: string) => {
setLoading(true);
setResponse('');
setError(null);
try {
const res = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ message })
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const reader = res.body?.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader!.read();
if (done) break;
setResponse(prev => prev + decoder.decode(value));
}
} catch (err) {
setError(err as Error);
} finally {
setLoading(false);
}
};
return { response, loading, error, stream };
}
// Usage
function Chat() {
const { response, loading, stream } = useStreamingChat();
return (
<div>
<div>{response}</div>
<button onClick={() => stream('Hello')} disabled={loading}>
Send
</button>
</div>
);
}
Next.js Streaming
App Router
Copy
// app/api/chat/route.ts
import { NextRequest, NextResponse } from 'next/server';
export async function POST(request: NextRequest) {
const { message } = await request.json();
const response = await fetch('https://regpilot.dev/api/ai/chat', {
method: 'POST',
headers: {
'X-API-Key': process.env.REGPILOT_API_KEY!,
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: [{ role: 'user', content: message }]
})
});
// Stream directly to client
return new NextResponse(response.body, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
}
});
}
Error Handling
Retry on Interruption
Copy
async function streamWithRetry(messages: any[], maxRetries = 3) {
for (let i = 0; i < maxRetries; i++) {
try {
const response = await fetch('https://regpilot.dev/api/ai/chat', {
method: 'POST',
headers: {
'X-API-Key': process.env.REGPILOT_API_KEY!,
'Content-Type': 'application/json'
},
body: JSON.stringify({ messages })
});
const reader = response.body?.getReader();
const decoder = new TextDecoder();
let fullResponse = '';
while (true) {
const { done, value } = await reader!.read();
if (done) break;
const text = decoder.decode(value);
fullResponse += text;
// Display incrementally
}
return fullResponse;
} catch (error) {
console.error(`Stream attempt ${i + 1} failed:`, error);
if (i === maxRetries - 1) throw error;
await new Promise(r => setTimeout(r, 1000 * Math.pow(2, i)));
}
}
}
Timeout Handling
Copy
async function streamWithTimeout(messages: any[], timeoutMs = 30000) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch('https://regpilot.dev/api/ai/chat', {
method: 'POST',
signal: controller.signal,
headers: {
'X-API-Key': process.env.REGPILOT_API_KEY!,
'Content-Type': 'application/json'
},
body: JSON.stringify({ messages })
});
// Process stream...
} catch (error) {
if (error.name === 'AbortError') {
console.error('Stream timeout after', timeoutMs, 'ms');
}
throw error;
} finally {
clearTimeout(timeout);
}
}
Best Practices
1. Buffer for UI Updates
Copy
const reader = response.body?.getReader();
const decoder = new TextDecoder();
let buffer = '';
let lastUpdate = Date.now();
const updateInterval = 50; // Update UI every 50ms
while (true) {
const { done, value } = await reader.read();
if (done) {
if (buffer) updateUI(buffer); // Final update
break;
}
buffer += decoder.decode(value, { stream: true });
// Update UI periodically, not on every chunk
if (Date.now() - lastUpdate > updateInterval) {
updateUI(buffer);
buffer = '';
lastUpdate = Date.now();
}
}
2. Handle Partial Tokens
Copy
const decoder = new TextDecoder();
// Use streaming option
const text = decoder.decode(value, { stream: true });
3. Cancel Streams
Copy
const controller = new AbortController();
// User can cancel
button.onclick = () => controller.abort();
const response = await fetch('...', {
signal: controller.signal
});
Performance Tips
- Update UI efficiently - Don’t update DOM on every chunk
- Use RequestAnimationFrame for smooth rendering
- Buffer updates - Batch UI updates every 50-100ms
- Cancel appropriately - Clean up aborted streams
- Handle errors gracefully - Show error states clearly
Related: Multi-turn Conversations | Best Practices