{
  "generated_at": "2026-04-24T15:01:35.625676+00:00",
  "slug": "api-venice-ai-api-v1-audio-speech",
  "title": "Venice AI \u00b7 Text-to-Speech",
  "url": "https://api.venice.ai/api/v1/audio/speech",
  "category": "media",
  "summary": "Synthesize speech audio from text input using Venice AI's text-to-speech model with binary audio output.",
  "seo": {
    "title": "Venice AI TTS API | Uncensored Speech Synthesis x402",
    "description": "Generate speech audio from text via Venice AI's private, uncensored API. Binary audio output. 10 USDC per call. x402 on Base."
  },
  "use_cases": [
    "Convert text content to spoken audio for accessibility",
    "Generate voice content for media production",
    "Create audio versions of written materials"
  ],
  "ideal_buyer": "Content creators, accessibility tools, and media production pipelines needing TTS capabilities.",
  "example_prompt": "Generate speech audio for this text: 'Welcome to the future of AI payments.'",
  "risk_notes": [],
  "pricing_sanity": {
    "flag": "expensive_outlier",
    "ratio": 125,
    "median_category_atomic": 80000
  },
  "pricing_review_required": false,
  "pricing_decimal_suspect": false,
  "trust_tier": "indexed_external",
  "accepts": [
    {
      "scheme": "exact",
      "network": "base",
      "pay_to": "0x2670b922ef37c7df47158725c0cc407b5382293f",
      "asset": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913",
      "max_amount_required_atomic": "10000000",
      "max_timeout_seconds": 300,
      "verified": false,
      "hints": {
        "input": {
          "type": "http",
          "method": "POST",
          "bodyFields": {
            "input": {
              "type": "string",
              "required": true,
              "description": "The text to generate audio for. The maximum length is 4096 characters."
            },
            "model": {
              "enum": [
                "tts-kokoro",
                "tts-qwen3-0-6b",
                "tts-qwen3-1-7b"
              ],
              "type": "string",
              "description": "The model ID of a Venice TTS model."
            },
            "speed": {
              "type": "number",
              "description": "The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default."
            },
            "top_p": {
              "type": "number",
              "description": "Nucleus sampling parameter. Only supported by Qwen 3 TTS models. Default is 1.0."
            },
            "voice": {
              "enum": [
                "af_alloy",
                "af_aoede",
                "af_bella",
                "af_heart",
                "af_jadzia",
                "af_jessica",
                "af_kore",
                "af_nicole",
                "af_nova",
                "af_river",
                "af_sarah",
                "af_sky",
                "am_adam",
                "am_echo",
                "am_eric",
                "am_fenrir",
                "am_liam",
                "am_michael",
                "am_onyx",
                "am_puck",
                "am_santa",
                "bf_alice",
                "bf_emma",
                "bf_lily",
                "bm_daniel",
                "bm_fable",
                "bm_george",
                "bm_lewis",
                "zf_xiaobei",
                "zf_xiaoni",
                "zf_xiaoxiao",
                "zf_xiaoyi",
                "zm_yunjian",
                "zm_yunxi",
                "zm_yunxia",
                "zm_yunyang",
                "ff_siwis",
                "hf_alpha",
                "hf_beta",
                "hm_omega",
                "hm_psi",
                "if_sara",
                "im_nicola",
                "jf_alpha",
                "jf_gongitsune",
                "jf_nezumi",
                "jf_tebukuro",
                "jm_kumo",
                "pf_dora",
                "pm_alex",
                "pm_santa",
                "ef_dora",
                "em_alex",
                "em_santa",
                "Vivian",
                "Serena",
                "Ono_Anna",
                "Sohee",
                "Uncle_Fu",
                "Dylan",
                "Eric",
                "Ryan",
                "Aiden"
              ],
              "type": "string",
              "description": "The voice to use when generating the audio. Voices are model-specific: Kokoro voices (e.g. af_sky, af_bella, am_adam) work with tts-kokoro; Qwen 3 voices (e.g. Vivian, Serena, Dylan, Eric, Ryan, Aiden) work with tts-qwen3-0-6b and tts-qwen3-1-7b. Using an incompatible voice returns a 400 error."
            },
            "prompt": {
              "type": "string",
              "description": "A style prompt to control the emotion and delivery of the speech. Only supported by Qwen 3 TTS models. Examples: \"Very happy.\", \"Sad and slow.\", \"Excited and energetic.\""
            },
            "language": {
              "enum": [
                "Auto",
                "English",
                "Chinese",
                "Spanish",
                "French",
                "German",
                "Italian",
                "Japanese",
                "Korean",
                "Portuguese",
                "Russian"
              ],
              "type": "string",
              "description": "The language of the input text. Only supported by Qwen 3 TTS models. If not specified, the language is auto-detected."
            },
            "streaming": {
              "type": "boolean",
              "description": "Should the content stream back sentence by sentence or be processed and returned as a complete audio file."
            },
            "temperature": {
              "type": "number",
              "description": "Sampling temperature for speech generation. Higher values produce more varied output. Only supported by Qwen 3 TTS models. Default is 0.9."
            },
            "response_format": {
              "enum": [
                "mp3",
                "opus",
                "aac",
                "flac",
                "wav",
                "pcm"
              ],
              "type": "string",
              "description": "The format to audio in."
            }
          }
        },
        "output": {
          "type": "string",
          "format": "binary"
        }
      }
    }
  ],
  "origin": {
    "slug": "api-venice-ai",
    "host": "api.venice.ai",
    "title": "Venice API Docs",
    "description": "Harness the full capabilities of Venice AI with the Venice API, a private and uncensored AI API enabling the development of advanced applications that generate text and images.",
    "url": "https://api.venice.ai",
    "og_image": "https://venice.ai/images/venice_social_preview.png",
    "favicon": "https://docs.venice.ai/mintlify-assets/_mintlify/favicons/veniceai/HJGBlV4jYrSOrFXh/_generated/favicon/favicon-16x16.png"
  },
  "json_ld": {
    "@id": "https://x402all.com/resource/api-venice-ai-api-v1-audio-speech",
    "url": "https://x402all.com/resource/api-venice-ai-api-v1-audio-speech",
    "name": "Venice AI \u00b7 Text-to-Speech",
    "@type": "WebAPI",
    "offers": {
      "url": "https://x402all.com/resource/api-venice-ai-api-v1-audio-speech",
      "@type": "Offer",
      "price": "10",
      "availability": "https://schema.org/InStock",
      "priceCurrency": "USDC",
      "priceSpecification": {
        "@type": "UnitPriceSpecification",
        "price": "10.000000",
        "unitText": "call",
        "priceCurrency": "USDC"
      },
      "eligibleCustomerType": "Agent",
      "additionalProperty": [
        {
          "@type": "PropertyValue",
          "name": "paymentNetwork",
          "value": "base"
        },
        {
          "@type": "PropertyValue",
          "name": "paymentAsset",
          "value": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913"
        }
      ]
    },
    "sameAs": "https://api.venice.ai/api/v1/audio/speech",
    "@context": "https://schema.org",
    "provider": {
      "@id": "https://x402all.com/server/api-venice-ai",
      "url": "https://api.venice.ai",
      "name": "Venice API Docs",
      "@type": "Organization"
    },
    "identifier": "api-venice-ai-api-v1-audio-speech",
    "description": "Generate speech audio from text via Venice AI's private, uncensored API. Binary audio output. 10 USDC per call. x402 on Base.",
    "potentialAction": {
      "@type": "BuyAction",
      "target": "https://axon402.com/test-buy?resource=api-venice-ai-api-v1-audio-speech",
      "description": "Test-buy this endpoint on AXON"
    },
    "applicationCategory": "media"
  },
  "axon_deep_link": "https://axon402.com/test-buy?resource=api-venice-ai-api-v1-audio-speech"
}
