Apr 17, 2026

One of the interesting effects that chat normalized is a streaming flow of text as a response. This is done to alter perceived performance of the otherwise slow LLM inference process. Here is a Netlify function in JavaScript to invoke the OpenAI API and stream the results.

import OpenAI from 'openai';
import { Pinecone } from '@pinecone-database/pinecone';

export default async ( request, context ) => {
  /* CORS */
  const allowed = [
    'https://kevinhoyt.com',
    'https://ketnerlake.com'
  ];
  const origin = request.headers.get( 'Origin' );

  let headers = {
    'Access-Control-Allow-Methods': 'OPTIONS, POST',
    'Access-Control-Allow-Headers': 'Content-Type, Accept, Origin',
    'Content-Type': 'application/json'
  };

  if( allowed.includes( origin ) ) {
    headers['Access-Control-Allow-Origin'] = origin;
  } else {
    return new Response( JSON.stringify( {error: 'Bot Detected'} ), {
      status: 400,
      statusText: 'Bot detected'
    } );
  }

  if( request.method === 'OPTIONS' ) {
    return new Response( 'OK', {
      headers
    } );
  }

  /* Only POST for this function */
  if( request.method !== 'POST' ) {
    return new Response( JSON.stringify( {error: 'Method Not Allowed'} ), {
      status: 405,
      statusText: 'Method Not Allowed'
    } );
  }

  /* Main */
  try {
    const body = await request.json();

    // Honeypot check (bots)
    if( body.honey && body.honey !== null ) {
      return new Response( JSON.stringify( {error: 'Bot Detected'} ), {
        status: 400,
        statusText: 'Bot detected'
      } );
    }

    // Validate incoming fields
    if( !body.question ) {
      return new Response( JSON.stringify( {error: 'Question is required'} ), {
        status: 400,
        statusText: 'Bad Request',
        headers
      } );
    }

    const openai = new OpenAI( {
      apiKey: process.env.OPENAI_API_KEY
    } );

    const pc = new Pinecone( {
      apiKey: process.env.PINECONE_API_KEY
    } );

    const index = pc.index( 'your-vector-store' );
    const topK = 3;

    // Create embedding for the question
    const embeddingResponse = await openai.embeddings.create( {
      model: 'text-embedding-3-small',
      input: body.question,
      dimensions: 512
    } );
    const embedding = embeddingResponse.data[0].embedding;

    // Query Pinecone for similar content
    const queryResponse = await index.query( {
      vector: embedding,
      topK: topK,
      includeMetadata: true
    } );

    // Build context from results
    let context = '';
    for( const match of queryResponse.matches ) {
      const page = match.metadata?.page ?? '?';
      const text = match.metadata?.text ?? '';
      context += `[Page ${page}] ${text}\n---\n`;
    }

    const instructions = 'Your instructions here (system prompt)';

    let input = `Context:\n${context}\n\nQuestion: ${body.question}`;
    if( body.summary ) {
      input = `Conversation summary:\n${body.summary}\n\n${input}`;
    }

    const stream = await openai.responses.create( {
      model: 'gpt-5.1',
      instructions: instructions,
      input: input,
      stream: true
    } );

    const streamHeaders = {
      ...headers,
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      'Connection': 'keep-alive'
    };

    const readable = new ReadableStream( {
      async start( controller ) {
        try {
          for await ( const event of stream ) {
            if( event.type === 'response.output_text.delta' ) {
              controller.enqueue( new TextEncoder().encode( event.delta ) );
            }
          }
          controller.close();
        } catch ( err ) {
          controller.error( err );
        }
      }
    } );

    return new Response( readable, {
      headers: streamHeaders
    } );
  } catch ( error ) {
    return new Response( JSON.stringify( {error: error.toString()} ), {
      status: 500,
      headers
    } );
  }
};

export const config = {
  path: '/api/chat'
};
Back to Notes