{
  "generated_at": "2026-04-24T15:01:35.625676+00:00",
  "slug": "cnvrt-ing-api-transcribe",
  "title": "CNVRT \u00b7 Transcribe Audio/Video",
  "url": "https://cnvrt.ing/api/transcribe",
  "category": "media",
  "summary": "Convert speech from audio or video URLs into text transcripts with optional timestamp markers and accuracy enhancement.",
  "seo": {
    "title": "Audio Video Transcription API | Whisper | x402",
    "description": "Transcribe audio and video from URLs using OpenAI Whisper. Optional timestamps and enhanced accuracy. 0.025 USDC per call on Base."
  },
  "use_cases": [
    "Transcribe podcast episodes from hosting URLs",
    "Generate captions for video content",
    "Create searchable text from audio recordings"
  ],
  "ideal_buyer": "Media workflows and content platforms that need URL-based transcription without managing Whisper infrastructure.",
  "example_prompt": "Transcribe this podcast episode with timestamps: https://example.com/episode.mp3",
  "example_request_body": {
    "url": "https://example.com/episode.mp3",
    "language": "en",
    "includeTimestamps": true
  },
  "risk_notes": [],
  "pricing_sanity": {
    "flag": "cheap",
    "ratio": 0.312,
    "median_category_atomic": 80000
  },
  "pricing_review_required": false,
  "pricing_decimal_suspect": false,
  "trust_tier": "indexed_external",
  "accepts": [
    {
      "scheme": "exact",
      "network": "base",
      "pay_to": "0xfad67ce5a834e68d5533ba7ac08619ff82a42084",
      "asset": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913",
      "max_amount_required_atomic": "25000",
      "max_timeout_seconds": 60,
      "mime_type": "application/json",
      "description": "Transcribe audio/video from URLs using OpenAI Whisper. Payment via USDC on base.",
      "verified": false,
      "hints": {
        "input": {
          "type": "http",
          "method": "POST",
          "discoverable": true
        },
        "output": {
          "input": {
            "type": "http",
            "method": "POST",
            "bodyType": "json",
            "bodyFields": {
              "url": {
                "type": "string",
                "required": true,
                "description": "Media URL to transcribe (video or audio from any supported platform)"
              },
              "language": {
                "type": "string",
                "required": false,
                "description": "Optional: Language code (ISO 639-1) for better accuracy. Auto-detected if not provided."
              },
              "enhanceAccuracy": {
                "type": "boolean",
                "required": false,
                "description": "Use enhanced accuracy mode (slower but more precise)"
              },
              "includeTimestamps": {
                "type": "boolean",
                "required": false,
                "description": "Include timestamp markers in the transcription"
              }
            }
          },
          "output": {
            "type": "object",
            "required": [
              "success",
              "transcription"
            ],
            "properties": {
              "jobId": {
                "type": "string",
                "description": "Unique job identifier"
              },
              "success": {
                "type": "boolean",
                "description": "Whether the transcription was successful"
              },
              "duration": {
                "type": "number",
                "description": "Duration of the media in seconds"
              },
              "language": {
                "type": "string",
                "description": "Detected or specified language"
              },
              "wordCount": {
                "type": "number",
                "description": "Number of words in transcription"
              },
              "timestamps": {
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "end": {
                      "type": "number",
                      "description": "End time in seconds"
                    },
                    "text": {
                      "type": "string",
                      "description": "Text for this segment"
                    },
                    "start": {
                      "type": "number",
                      "description": "Start time in seconds"
                    }
                  }
                },
                "description": "Array of timestamped segments (if requested)"
              },
              "transcription": {
                "type": "string",
                "description": "Full text transcription of the media"
              }
            }
          }
        }
      }
    }
  ],
  "origin": {
    "slug": "cnvrt-ing",
    "host": "cnvrt.ing",
    "title": "CNVRT - Perception Layer for Autonomous Systems",
    "description": "We're not building agents.... we're waking them up.",
    "url": "https://cnvrt.ing",
    "og_image": "https://cnvrt.ing/assets/cnvrt.png",
    "favicon": "https://cnvrt.ing/assets/favicon.ico"
  },
  "json_ld": {
    "@id": "https://x402all.com/resource/cnvrt-ing-api-transcribe",
    "url": "https://x402all.com/resource/cnvrt-ing-api-transcribe",
    "name": "CNVRT \u00b7 Transcribe Audio/Video",
    "@type": "WebAPI",
    "offers": {
      "url": "https://x402all.com/resource/cnvrt-ing-api-transcribe",
      "@type": "Offer",
      "price": "0.025",
      "availability": "https://schema.org/InStock",
      "priceCurrency": "USDC",
      "priceSpecification": {
        "@type": "UnitPriceSpecification",
        "price": "0.025000",
        "unitText": "call",
        "priceCurrency": "USDC"
      },
      "eligibleCustomerType": "Agent",
      "additionalProperty": [
        {
          "@type": "PropertyValue",
          "name": "paymentNetwork",
          "value": "base"
        },
        {
          "@type": "PropertyValue",
          "name": "paymentAsset",
          "value": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913"
        }
      ]
    },
    "sameAs": "https://cnvrt.ing/api/transcribe",
    "@context": "https://schema.org",
    "provider": {
      "@id": "https://x402all.com/server/cnvrt-ing",
      "url": "https://cnvrt.ing",
      "name": "CNVRT - Perception Layer for Autonomous Systems",
      "@type": "Organization"
    },
    "identifier": "cnvrt-ing-api-transcribe",
    "description": "Transcribe audio and video from URLs using OpenAI Whisper. Optional timestamps and enhanced accuracy. 0.025 USDC per call on Base.",
    "potentialAction": {
      "@type": "BuyAction",
      "target": "https://axon402.com/test-buy?resource=cnvrt-ing-api-transcribe",
      "description": "Test-buy this endpoint on AXON"
    },
    "applicationCategory": "media"
  },
  "axon_deep_link": "https://axon402.com/test-buy?resource=cnvrt-ing-api-transcribe"
}
