🇺🇸 English Speech-to-Text (ASR) PRO
🇺🇸 ระบบแปลงเสียงพูดภาษาอังกฤษเป็นข้อความแบบ PRO
The English Speech-to-Text Pro API provides advanced speech recognition with enhanced accuracy using state-of-the-art language models. The API is designed for high performance and reliability with support for speaker diarization and optimized handling of longer audio files.
ระบบแปลงเสียงพูดภาษาอังกฤษเป็นข้อความ (Pro Model)
Try Demo
Getting Started
Prerequisites
- An API key from iApp Technology
- Audio files in supported formats
- Maximum file length: No more than 1 hours
Key Features
- Text extraction from audio files
- Speaker diarization for multi-speaker conversations
- Support for various audio formats
- High accuracy transcription with context awareness
Security & Compliance
- API key authentication
- GDPR and PDPA compliant
API Endpoints
| Endpoint | Method | Description | Cost |
|---|---|---|---|
/v3/store/speech/speech-to-text/pro/enLegacy: /asr/v3/en/pro | POST | Convert English speech to text (Pro model with LLM) | 2 IC per 60 seconds |
API Usage
Supported File Formats
The API supports the following audio and video file formats:
- Audio:
.mp3,.wav,.m4a,.aac,.aif,.cda,.flac,.mid,.ogg,.wma
Endpoint: POST /v3/store/speech/speech-to-text/pro/en
Header:
Content-Type: multipart/form-dataapikey: Your API key for authentication
Form Parameters:
| Parameter | Type | Required | Default | Description |
|---|---|---|---|---|
file | File | Yes | - | The audio file to transcribe |
prompt | String | No | "base" | Prompt template to use |
chunk_size | Integer | No | 20 | Size in seconds for processing audio chunks |
use_asr_pro | Boolean | No | true | Use ASR Pro model |
Sample Request:
curl -X POST "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en" \
-H "Content-Type: multipart/form-data" \
-H "apikey: YOUR_API_KEY" \
-F "file=@/path/to/your/audio.mp3" \
-F "prompt=base" \
-F "chunk_size=20" \
-F "use_asr_pro=true"
Sample Response:
{
"output": [
{
"text": "Transcribed text from segment one.",
"start": 0.0,
"end": 5.28,
"speaker": "SPEAKER_00",
"segment": 0
},
{
"text": "Transcribed text from segment two.",
"start": 5.28,
"end": 10.56,
"speaker": "SPEAKER_01",
"segment": 1
}
],
"audio_duration_in_seconds": 60.5,
"uploaded_file_name": "example.mp3",
"processing_time_in_seconds": 12.34,
"use_asr_pro": true,
"asr_pro_is_used": true,
"iapp": {
"seconds": 60.5
}
}
Response Status Codes
200 OK: Request successful400 Bad Request: Invalid request (e.g., no file uploaded, unsupported file format)404 Not Found: Task ID not found500 Internal Server Error: Processing failed, server error
Notes
- Speaker diarization is supported, with segments containing both speaker IDs and timestamp information.
Code Examples
Curl
curl -X POST https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en \
-H "apikey: YOUR_API_KEY" \
-F "file=@/path/to/audio.mp3"
Python Example
import requests
def transcribe_audio(file_path, api_key):
url = "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en"
headers = {"apikey": api_key}
files = {"file": open(file_path, "rb")}
data = {
"prompt": "base",
"chunk_size": 20,
"use_asr_pro": true
}
response = requests.post(url, headers=headers, files=files, data=data)
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}, {response.text}"
JavaScript Example
async function transcribeAudio(filePath, apiKey) {
const url = "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en"
const formData = new FormData()
formData.append("file", await fetch(filePath).then((r) => r.blob()))
formData.append("prompt", "base")
formData.append("chunk_size", "20")
formData.append("use_asr_pro", "true")
const response = await fetch(url, {
method: "POST",
headers: {
apikey: apiKey,
},
body: formData,
})
if (response.ok) {
return await response.json()
} else {
throw new Error(`Error: ${response.status}, ${await response.text()}`)
}
}
PHP Example
function transcribe_audio($file_path, $api_key) {
$url = "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en";
$curl = curl_init();
$post_data = [
'file' => new CURLFile($file_path),
'prompt' => 'base',
'chunk_size' => '20',
'use_asr_pro' => 'true'
];
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $post_data,
CURLOPT_HTTPHEADER => [
"apikey: $api_key"
]
]);
$response = curl_exec($curl);
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($status_code === 200) {
return json_decode($response, true);
} else {
return "Error: $status_code, $response";
}
}
// Usage
$result = transcribe_audio("path/to/audio.mp3", "YOUR_API_KEY");
print_r($result);
Swift Example
import Foundation
func transcribeAudio(filePath: String, apiKey: String, completion: @escaping (Result<[String: Any], Error>) -> Void) {
let url = URL(string: "https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en")!
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.addValue(apiKey, forHTTPHeaderField: "apikey")
let boundary = UUID().uuidString
request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
let fileURL = URL(fileURLWithPath: filePath)
guard let fileData = try? Data(contentsOf: fileURL) else {
completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Could not load file"])))
return
}
var body = Data()
let filename = fileURL.lastPathComponent
// Add file
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(filename)\"\r\n".data(using: .utf8)!)
body.append("Content-Type: audio/mpeg\r\n\r\n".data(using: .utf8)!)
body.append(fileData)
body.append("\r\n".data(using: .utf8)!)
// Add prompt
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"prompt\"\r\n\r\n".data(using: .utf8)!)
body.append("base".data(using: .utf8)!)
body.append("\r\n".data(using: .utf8)!)
// Add chunk_size
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"chunk_size\"\r\n\r\n".data(using: .utf8)!)
body.append("20".data(using: .utf8)!)
body.append("\r\n".data(using: .utf8)!)
body.append("--\(boundary)--\r\n".data(using: .utf8)!)
request.httpBody = body
URLSession.shared.dataTask(with: request) { data, response, error in
if let error = error {
completion(.failure(error))
return
}
guard let data = data else {
completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "No data received"])))
return
}
do {
if let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] {
completion(.success(json))
} else {
completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Invalid JSON format"])))
}
} catch {
completion(.failure(error))
}
}.resume()
}
// Usage
transcribeAudio(filePath: "path/to/audio.mp3", apiKey: "YOUR_API_KEY") { result in
switch result {
case .success(let json):
print(json)
case .failure(let error):
print("Error: \(error)")
}
}
Kotlin Example
import okhttp3.*
import java.io.File
import java.io.IOException
fun transcribeAudio(filePath: String, apiKey: String, callback: (Result<String>) -> Unit) {
val client = OkHttpClient()
val file = File(filePath)
val requestBody = MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart(
"file",
file.name,
RequestBody.create(MediaType.parse("audio/*"), file)
)
.addFormDataPart("prompt", "base")
.addFormDataPart("chunk_size", "20")
.addFormDataPart("use_asr_pro", "true")
.build()
val request = Request.Builder()
.url("https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en")
.header("apikey", apiKey)
.post(requestBody)
.build()
client.newCall(request).enqueue(object : Callback {
override fun onFailure(call: Call, e: IOException) {
callback(Result.failure(e))
}
override fun onResponse(call: Call, response: Response) {
if (response.isSuccessful) {
callback(Result.success(response.body()?.string() ?: ""))
} else {
callback(Result.failure(IOException("Error: ${response.code()} ${response.message()}")))
}
}
})
}
Java Example
import java.io.File;
import java.io.IOException;
import okhttp3.*;
public class ASRApiClient {
public static void transcribeAudio(String filePath, String apiKey, Callback callback) {
OkHttpClient client = new OkHttpClient();
File file = new File(filePath);
RequestBody requestBody = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart(
"file",
file.getName(),
RequestBody.create(MediaType.parse("audio/*"), file)
)
.addFormDataPart("prompt", "base")
.addFormDataPart("chunk_size", "20")
.addFormDataPart("use_asr_pro", "true")
.build();
Request request = new Request.Builder()
.url("https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en")
.header("apikey", apiKey)
.post(requestBody)
.build();
client.newCall(request).enqueue(callback);
}
public static void main(String[] args) {
transcribeAudio("path/to/audio.mp3", "YOUR_API_KEY", new Callback() {
@Override
public void onFailure(Call call, IOException e) {
System.err.println("Error: " + e.getMessage());
}
@Override
public void onResponse(Call call, Response response) throws IOException {
if (response.isSuccessful()) {
System.out.println(response.body().string());
} else {
System.err.println("Error: " + response.code() + " " + response.message());
}
}
});
}
}
Dart (Flutter) Example
import 'dart:io';
import 'package:http/http.dart' as http;
Future<Map<String, dynamic>> transcribeAudio(String filePath, String apiKey) async {
var uri = Uri.parse('https://api.iapp.co.th/v3/store/speech/speech-to-text/pro/en');
var request = http.MultipartRequest('POST', uri);
request.headers['apikey'] = apiKey;
request.files.add(await http.MultipartFile.fromPath(
'file',
filePath,
));
request.fields['prompt'] = 'base';
request.fields['chunk_size'] = '20';
request.fields['use_asr_pro'] = 'true';
try {
var response = await request.send();
var responseData = await http.Response.fromStream(response);
if (response.statusCode == 200) {
return jsonDecode(responseData.body);
} else {
throw Exception('Failed to transcribe: ${response.statusCode} ${responseData.body}');
}
} catch (e) {
throw Exception('Error transcribing file: $e');
}
}
Pricing
| Operation | Production Path | Legacy Path | IC Cost | Unit | On-Premise |
|---|---|---|---|---|---|
| English Speech-to-Text (Pro) | /v3/store/speech/speech-to-text/pro/en | /asr/v3/en/pro | 2 IC | per 60 seconds | Contact us |