API

// app/api/tts.ts
export const runtime = 'edge'

const elevenLabsAPIKey = process.env.ELEVEN_LABS_API_KEY!;

export async function GET(request: Request) {
  const searchParams = new URLSearchParams(
    request.url.substring(request.url.indexOf("?"))
  );

  const voice_id = searchParams.get("voice_id");
  const model_id = searchParams.get("model_id") ?? "eleven_turbo_v2";
  const text = searchParams.get("text");

  const myHeaders = new Headers();
  myHeaders.append("xi-api-key", elevenLabsAPIKey);
  myHeaders.append("Content-Type", "application/json");

  const raw = JSON.stringify({
    text: text,
    model_id: model_id,
  });

  const requestOptions = {
    method: "POST",
    headers: myHeaders,
    body: raw,
    redirect: "follow",
  } as RequestInit;

  const arrayBuffer = await fetch(
    `https://api.elevenlabs.io/v1/text-to-speech/${voice_id}`,
    requestOptions
  );

  return arrayBuffer;
}

Frontend

// components/TextToSpeech.tsx
const audioContextRef = useRef<AudioContext>(new AudioContext());
const [avatarInitialed, setAvatarInitialed] = useState();

const { ... } = useAvatar({ 
    onAvatarLoaded: ({ avatarId }) => {
        setAvatarInitialed(true);
    }
});

useEffect(() => {
    if (!avatarInitialed) return;
    connectAudioContext(audioContextRef.current);
}, [avatarInitialed]);

useEffect(() => {
    (async () => {
      if (!audioContextRef.current) return;

      if (isLoading) return;

      await fetch(`/api/tts?voice_id=${voice_id}&text=${message}`).then(
        async (response) => {
          if (!audioContextRef.current) return;

          audioContextRef.current?.resume();
          const val = await response.arrayBuffer();
          const audioSourceNode = audioContextRef.current.createBufferSource();
          const buffer = await audioContextRef.current.decodeAudioData(val);
          audioSourceNode.buffer = buffer;

          connectAudioNode(audioSourceNode);
          audioSourceNode.start();
        }
      );
    })();
}, [message, isLoading]);