Skip to main content

🇺🇸 English Speech-to-Text (ASR) PRO

🇺🇸 ระบบแปลงเสียงพูดภาษาอังกฤษเป็นข้อความแบบ PRO

The English Speech-to-Text Pro API provides advanced speech recognition with enhanced accuracy using state-of-the-art language models. The API is designed for high performance and reliability with support for speaker diarization and optimized handling of longer audio files.

ระบบแปลงเสียงพูดภาษาอังกฤษเป็นข้อความ (Pro Model) Version Status Production Legacy

iApp Text to Speech API

Try Demo

Example File (Click to try)

Selected: vc-demo.mp3

Getting Started

Prerequisites

  • An API key from iApp Technology
  • Audio files in supported formats
  • Maximum file length: No more than 1 hours

Key Features

  • Text extraction from audio files
  • Speaker diarization for multi-speaker conversations
  • Support for various audio formats
  • High accuracy transcription with context awareness

Security & Compliance

  • API key authentication
  • GDPR and PDPA compliant

API Endpoints

EndpointMethodDescriptionCost
/v3/store/speech/speech-to-text/pro/en
Legacy: /asr/v3/en/pro
POSTConvert English speech to text (Pro model with LLM)2 IC per 60 seconds

API Usage

Supported File Formats

The API supports the following audio and video file formats:

  • Audio: .mp3, .wav, .m4a, .aac, .aif, .cda, .flac, .mid, .ogg, .wma

Endpoint: POST /v3/store/speech/speech-to-text/pro/en

Header:

  • Content-Type: multipart/form-data
  • apikey: Your API key for authentication

Form Parameters:

ParameterTypeRequiredDefaultDescription
fileFileYes-The audio file to transcribe
promptStringNo"base"Prompt template to use
chunk_sizeIntegerNo20Size in seconds for processing audio chunks
use_asr_proBooleanNotrueUse ASR Pro model

Sample Request:

curl -X POST "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en" \
-H "Content-Type: multipart/form-data" \
-H "apikey: YOUR_API_KEY" \
-F "file=@/path/to/your/audio.mp3" \
-F "prompt=base" \
-F "chunk_size=20" \
-F "use_asr_pro=true"

Sample Response:

{
"output": [
{
"text": "Transcribed text from segment one.",
"start": 0.0,
"end": 5.28,
"speaker": "SPEAKER_00",
"segment": 0
},
{
"text": "Transcribed text from segment two.",
"start": 5.28,
"end": 10.56,
"speaker": "SPEAKER_01",
"segment": 1
}
],
"audio_duration_in_seconds": 60.5,
"uploaded_file_name": "example.mp3",
"processing_time_in_seconds": 12.34,
"use_asr_pro": true,
"asr_pro_is_used": true,
"iapp": {
"seconds": 60.5
}
}

Response Status Codes

  • 200 OK: Request successful
  • 400 Bad Request: Invalid request (e.g., no file uploaded, unsupported file format)
  • 404 Not Found: Task ID not found
  • 500 Internal Server Error: Processing failed, server error

Notes

  • Speaker diarization is supported, with segments containing both speaker IDs and timestamp information.

Code Examples

Curl

curl -X POST https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en \
-H "apikey: YOUR_API_KEY" \
-F "file=@/path/to/audio.mp3"

Python Example

import requests

def transcribe_audio(file_path, api_key):
url = "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en"
headers = {"apikey": api_key}

files = {"file": open(file_path, "rb")}
data = {
"prompt": "base",
"chunk_size": 20,
"use_asr_pro": true
}

response = requests.post(url, headers=headers, files=files, data=data)

if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}, {response.text}"

JavaScript Example

async function transcribeAudio(filePath, apiKey) {
const url = "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en"

const formData = new FormData()
formData.append("file", await fetch(filePath).then((r) => r.blob()))
formData.append("prompt", "base")
formData.append("chunk_size", "20")
formData.append("use_asr_pro", "true")

const response = await fetch(url, {
method: "POST",
headers: {
apikey: apiKey,
},
body: formData,
})

if (response.ok) {
return await response.json()
} else {
throw new Error(`Error: ${response.status}, ${await response.text()}`)
}
}

PHP Example

function transcribe_audio($file_path, $api_key) {
$url = "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en";

$curl = curl_init();

$post_data = [
'file' => new CURLFile($file_path),
'prompt' => 'base',
'chunk_size' => '20',
'use_asr_pro' => 'true'
];

curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $post_data,
CURLOPT_HTTPHEADER => [
"apikey: $api_key"
]
]);

$response = curl_exec($curl);
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);

if ($status_code === 200) {
return json_decode($response, true);
} else {
return "Error: $status_code, $response";
}
}

// Usage
$result = transcribe_audio("path/to/audio.mp3", "YOUR_API_KEY");
print_r($result);

Swift Example

import Foundation

func transcribeAudio(filePath: String, apiKey: String, completion: @escaping (Result<[String: Any], Error>) -> Void) {
let url = URL(string: "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en")!

var request = URLRequest(url: url)
request.httpMethod = "POST"
request.addValue(apiKey, forHTTPHeaderField: "apikey")

let boundary = UUID().uuidString
request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")

let fileURL = URL(fileURLWithPath: filePath)
guard let fileData = try? Data(contentsOf: fileURL) else {
completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Could not load file"])))
return
}

var body = Data()
let filename = fileURL.lastPathComponent

// Add file
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(filename)\"\r\n".data(using: .utf8)!)
body.append("Content-Type: audio/mpeg\r\n\r\n".data(using: .utf8)!)
body.append(fileData)
body.append("\r\n".data(using: .utf8)!)

// Add prompt
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"prompt\"\r\n\r\n".data(using: .utf8)!)
body.append("base".data(using: .utf8)!)
body.append("\r\n".data(using: .utf8)!)

// Add chunk_size
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"chunk_size\"\r\n\r\n".data(using: .utf8)!)
body.append("20".data(using: .utf8)!)
body.append("\r\n".data(using: .utf8)!)

body.append("--\(boundary)--\r\n".data(using: .utf8)!)

request.httpBody = body

URLSession.shared.dataTask(with: request) { data, response, error in
if let error = error {
completion(.failure(error))
return
}

guard let data = data else {
completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "No data received"])))
return
}

do {
if let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] {
completion(.success(json))
} else {
completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Invalid JSON format"])))
}
} catch {
completion(.failure(error))
}
}.resume()
}

// Usage
transcribeAudio(filePath: "path/to/audio.mp3", apiKey: "YOUR_API_KEY") { result in
switch result {
case .success(let json):
print(json)
case .failure(let error):
print("Error: \(error)")
}
}

Kotlin Example

import okhttp3.*
import java.io.File
import java.io.IOException

fun transcribeAudio(filePath: String, apiKey: String, callback: (Result<String>) -> Unit) {
val client = OkHttpClient()
val file = File(filePath)

val requestBody = MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart(
"file",
file.name,
RequestBody.create(MediaType.parse("audio/*"), file)
)
.addFormDataPart("prompt", "base")
.addFormDataPart("chunk_size", "20")
.addFormDataPart("use_asr_pro", "true")
.build()

val request = Request.Builder()
.url("https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en")
.header("apikey", apiKey)
.post(requestBody)
.build()

client.newCall(request).enqueue(object : Callback {
override fun onFailure(call: Call, e: IOException) {
callback(Result.failure(e))
}

override fun onResponse(call: Call, response: Response) {
if (response.isSuccessful) {
callback(Result.success(response.body()?.string() ?: ""))
} else {
callback(Result.failure(IOException("Error: ${response.code()} ${response.message()}")))
}
}
})
}

Java Example

import java.io.File;
import java.io.IOException;
import okhttp3.*;

public class ASRApiClient {

public static void transcribeAudio(String filePath, String apiKey, Callback callback) {
OkHttpClient client = new OkHttpClient();
File file = new File(filePath);

RequestBody requestBody = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart(
"file",
file.getName(),
RequestBody.create(MediaType.parse("audio/*"), file)
)
.addFormDataPart("prompt", "base")
.addFormDataPart("chunk_size", "20")
.addFormDataPart("use_asr_pro", "true")
.build();

Request request = new Request.Builder()
.url("https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en")
.header("apikey", apiKey)
.post(requestBody)
.build();

client.newCall(request).enqueue(callback);
}

public static void main(String[] args) {
transcribeAudio("path/to/audio.mp3", "YOUR_API_KEY", new Callback() {
@Override
public void onFailure(Call call, IOException e) {
System.err.println("Error: " + e.getMessage());
}

@Override
public void onResponse(Call call, Response response) throws IOException {
if (response.isSuccessful()) {
System.out.println(response.body().string());
} else {
System.err.println("Error: " + response.code() + " " + response.message());
}
}
});
}
}

Dart (Flutter) Example

import 'dart:io';
import 'package:http/http.dart' as http;

Future<Map<String, dynamic>> transcribeAudio(String filePath, String apiKey) async {
var uri = Uri.parse('https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en');

var request = http.MultipartRequest('POST', uri);
request.headers['apikey'] = apiKey;

request.files.add(await http.MultipartFile.fromPath(
'file',
filePath,
));

request.fields['prompt'] = 'base';
request.fields['chunk_size'] = '20';
request.fields['use_asr_pro'] = 'true';

try {
var response = await request.send();
var responseData = await http.Response.fromStream(response);

if (response.statusCode == 200) {
return jsonDecode(responseData.body);
} else {
throw Exception('Failed to transcribe: ${response.statusCode} ${responseData.body}');
}
} catch (e) {
throw Exception('Error transcribing file: $e');
}
}

Pricing

OperationProduction PathLegacy PathIC CostUnitOn-Premise
English Speech-to-Text (Pro)/v3/store/speech/speech-to-text/pro/en/asr/v3/en/pro2 ICper 60 secondsContact us