{
  "generated_at": "2026-04-24T15:01:35.625676+00:00",
  "slug": "api-venice-ai-api-v1-audio-transcriptions",
  "title": "Venice AI \u00b7 Audio Transcription",
  "url": "https://api.venice.ai/api/v1/audio/transcriptions",
  "category": "media",
  "summary": "Transcribe audio files to text with optional timestamps and duration metadata.",
  "seo": {
    "title": "Venice AI Audio Transcription API | $10 STT Service",
    "description": "Convert audio to text with Venice AI's private speech-to-transcription API. Pay $10 USDC per call via x402. Uncensored, private AI transcription."
  },
  "use_cases": [
    "Transcribe podcasts and interviews",
    "Generate subtitles from audio content",
    "Create searchable text archives from voice recordings"
  ],
  "ideal_buyer": "Content creators and media producers needing private, uncensored audio transcription.",
  "example_prompt": "Transcribe this podcast episode with timestamps",
  "example_request_body": {
    "audio": "base64_encoded_audio_data",
    "language": "en",
    "timestamps": true
  },
  "risk_notes": [],
  "pricing_sanity": {
    "flag": "expensive_outlier",
    "ratio": 125,
    "median_category_atomic": 80000
  },
  "pricing_review_required": false,
  "pricing_decimal_suspect": false,
  "trust_tier": "indexed_external",
  "accepts": [
    {
      "scheme": "exact",
      "network": "base",
      "pay_to": "0x2670b922ef37c7df47158725c0cc407b5382293f",
      "asset": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913",
      "max_amount_required_atomic": "10000000",
      "max_timeout_seconds": 300,
      "verified": false,
      "hints": {
        "input": {
          "type": "http",
          "method": "POST",
          "bodyFields": {
            "file": {
              "type": "string",
              "description": "The audio file object (not a base64 string). Supported formats: WAV, WAVE, FLAC, M4A, AAC, MP4, MP3, OGG, WEBM."
            },
            "model": {
              "enum": [
                "nvidia/parakeet-tdt-0.6b-v3",
                "openai/whisper-large-v3"
              ],
              "type": "string",
              "description": "The model to use for transcription. See https://docs.venice.ai/models/overview for more information."
            },
            "language": {
              "type": "string",
              "description": "ISO 639-1 language code (e.g., \"en\", \"es\", \"fr\"). Optional - if not provided, the model will auto-detect the language. Note: Only supported by certain models (e.g., Whisper). Ignored by models that do not support language hints."
            },
            "timestamps": {
              "type": "boolean",
              "description": "Whether to include timestamps in the response."
            },
            "response_format": {
              "enum": [
                "json",
                "text"
              ],
              "type": "string",
              "description": "The format of the transcript output, in one of these options: json, text."
            }
          }
        },
        "output": {
          "type": "object",
          "required": [
            "text"
          ],
          "properties": {
            "text": {
              "type": "string",
              "description": "The transcribed text"
            },
            "duration": {
              "type": "number",
              "description": "Duration of the audio in seconds"
            },
            "timestamps": {
              "type": "object",
              "properties": {
                "char": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "required": [
                      "char",
                      "start",
                      "end"
                    ],
                    "properties": {
                      "end": {
                        "type": "number"
                      },
                      "char": {
                        "type": "string"
                      },
                      "start": {
                        "type": "number"
                      }
                    }
                  }
                },
                "word": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "required": [
                      "word",
                      "start",
                      "end"
                    ],
                    "properties": {
                      "end": {
                        "type": "number"
                      },
                      "word": {
                        "type": "string"
                      },
                      "start": {
                        "type": "number"
                      }
                    }
                  }
                },
                "segment": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "required": [
                      "text",
                      "start",
                      "end"
                    ],
                    "properties": {
                      "end": {
                        "type": "number"
                      },
                      "text": {
                        "type": "string"
                      },
                      "start": {
                        "type": "number"
                      }
                    }
                  }
                }
              },
              "description": "Timestamps for the transcription (only if timestamps=true)"
            }
          },
          "description": "Transcription response"
        }
      }
    }
  ],
  "origin": {
    "slug": "api-venice-ai",
    "host": "api.venice.ai",
    "title": "Venice API Docs",
    "description": "Harness the full capabilities of Venice AI with the Venice API, a private and uncensored AI API enabling the development of advanced applications that generate text and images.",
    "url": "https://api.venice.ai",
    "og_image": "https://venice.ai/images/venice_social_preview.png",
    "favicon": "https://docs.venice.ai/mintlify-assets/_mintlify/favicons/veniceai/HJGBlV4jYrSOrFXh/_generated/favicon/favicon-16x16.png"
  },
  "json_ld": {
    "@id": "https://x402all.com/resource/api-venice-ai-api-v1-audio-transcriptions",
    "url": "https://x402all.com/resource/api-venice-ai-api-v1-audio-transcriptions",
    "name": "Venice AI \u00b7 Audio Transcription",
    "@type": "WebAPI",
    "offers": {
      "url": "https://x402all.com/resource/api-venice-ai-api-v1-audio-transcriptions",
      "@type": "Offer",
      "price": "10",
      "availability": "https://schema.org/InStock",
      "priceCurrency": "USDC",
      "priceSpecification": {
        "@type": "UnitPriceSpecification",
        "price": "10.000000",
        "unitText": "call",
        "priceCurrency": "USDC"
      },
      "eligibleCustomerType": "Agent",
      "additionalProperty": [
        {
          "@type": "PropertyValue",
          "name": "paymentNetwork",
          "value": "base"
        },
        {
          "@type": "PropertyValue",
          "name": "paymentAsset",
          "value": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913"
        }
      ]
    },
    "sameAs": "https://api.venice.ai/api/v1/audio/transcriptions",
    "@context": "https://schema.org",
    "provider": {
      "@id": "https://x402all.com/server/api-venice-ai",
      "url": "https://api.venice.ai",
      "name": "Venice API Docs",
      "@type": "Organization"
    },
    "identifier": "api-venice-ai-api-v1-audio-transcriptions",
    "description": "Convert audio to text with Venice AI's private speech-to-transcription API. Pay $10 USDC per call via x402. Uncensored, private AI transcription.",
    "potentialAction": {
      "@type": "BuyAction",
      "target": "https://axon402.com/test-buy?resource=api-venice-ai-api-v1-audio-transcriptions",
      "description": "Test-buy this endpoint on AXON"
    },
    "applicationCategory": "media"
  },
  "axon_deep_link": "https://axon402.com/test-buy?resource=api-venice-ai-api-v1-audio-transcriptions"
}