Skip to main content

🎙️ Thai Real-time Speech-to-Text (ASR) 🆕

🎙️ ระบบแปลงเสียงพูดภาษาไทยเป็นข้อความแบบเรียลไทม์ (Real-time)

Version Status

Welcome to Thai Real-time ASR - our WebSocket-based real-time Automatic Speech Recognition service. This service converts audio stream data into text stream data in real-time, supporting both file streaming and direct microphone capture.

iApp Text to Speech API

Getting Started

  1. Prerequisites

    • An API key from iApp Technology
    • Audio stream in supported format
    • WebSocket client implementation
    • Supported format: PCM audio
    • No specific length limitation
  2. Quick Start

    • Real-time processing
    • Stream from file or microphone
    • Support for Thai language
  3. Key Features

    • Real-time text streaming
    • WebSocket-based communication
    • Low latency processing
    • Flexible integration options
    • Continuous speech recognition
  4. Security & Compliance

    • GDPR and PDPA compliant
    • Secure WebSocket (WSS) support
    • No data retention after processing
How to get API Key?

Please visit API Portal to view your existing API key or request a new one.

Example

Sample Request:

  • Without encryption : ws://api-uat.iapp.co.th/asr
  • With encryption : wss://api-uat.iapp.co.th/asr

Sample Response:

{
"type": "realtime",
"state": "Sentence",
"text": "สวัสดีครับ"
}

Features & Capabilities

Core Features

  • Real-time speech recognition
  • WebSocket streaming support
  • File and microphone input
  • State-based response system
  • Continuous recognition

Supported Fields

  • Audio stream data
  • PCM audio format
  • WebSocket protocols
  • Real-time text output
  • State indicators

API Reference

  • Endpoints:
    • Without encryption : ws://api-uat.iapp.co.th/asr
    • With encryption : wss://api-uat.iapp.co.th/asr
  • Required Headers:
    • apikey: Your API key

Audio Format Requirements

  • Sample Rate: 16,000 Hz
  • Channel: Mono
  • Bit-depth: 16 bit
  • Audio-encode: PCM

Code Examples

Python (Microphone Streaming)

    import pyaudio
import websockets
import asyncio

async def stream_microphone():
url = "wss://api-uat.iapp.co.th/asr"
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024)

async with websockets.connect(url, extra_headers={"apikey": "YOUR_API_KEY"}) as ws:
while True:
data = stream.read(1024)
await ws.send(data)
await asyncio.sleep(0.05)

Python (File Streaming)

    from websockets.client import WebSocketClientProtocol
import websockets
import asyncio
import json

async def send_audio_data(ws_client: WebSocketClientProtocol):
frame_size = 1024
with open("audio.pcm", "rb") as file:
while True:
buff = file.read(frame_size)
if buff:
await ws_client.send(buff)
else:
break
await asyncio.sleep(0.02)

async def client_test():
url = "wss://api-uat.iapp.co.th/asr"
async with websockets.connect(url, extra_headers={"apikey": "YOUR_API_KEY"}) as ws:
await send_audio_data(ws)

Javascript

const WebSocket = require('ws'); // Ensure WebSocket is installed (npm install ws)
const fs = require('fs');

async function sendAudioData(ws) {
const frameSize = 1024; // Define the frame size for audio chunks
const fileStream = fs.createReadStream('audio.pcm', { highWaterMark: frameSize });

// Read and send audio data frame by frame
for await (const chunk of fileStream) {
if (chunk) {
ws.send(chunk); // Send the audio data
}
await new Promise(resolve => setTimeout(resolve, 20)); // Wait for 20 ms
}

// Close the WebSocket connection after sending all data
ws.close();
}

function clientTest() {
const url = "wss://api-uat.iapp.co.th/asr";
const apiKey = "YOUR_API_KEY";

// Connect to the WebSocket server
const ws = new WebSocket(url, {
headers: {
apikey: apiKey
}
});

ws.on('open', async () => {
console.log('WebSocket connection opened');
await sendAudioData(ws);
});

ws.on('message', (message) => {
console.log('Received:', message.toString());
});

ws.on('error', (error) => {
console.error('WebSocket error:', error);
});

ws.on('close', (code, reason) => {
console.log('WebSocket closed:', code, reason);
});
}

// Run the client
clientTest();

PHP

<?php
$apiKey = "YOUR_API_KEY";
$url = "wss://api-uat.iapp.co.th/asr";

$ws = new WebSocket($url, ['headers' => ["apikey: $apiKey"]]);

function sendAudioData($ws) {
$frameSize = 1024; // Frame size in bytes
$file = fopen("audio.pcm", "rb");

while (!feof($file)) {
$data = fread($file, $frameSize);
$ws->send($data);
usleep(20000); // 20ms delay
}

fclose($file);
$ws->close();
}

$ws->on("open", function($ws) {
sendAudioData($ws);
});

$ws->on("message", function($message) {
echo "Received: $message\n";
});

$ws->on("error", function($error) {
echo "Error: $error\n";
});

$ws->on("close", function() {
echo "WebSocket closed\n";
});

Swift

import Foundation

class WebSocketClient: NSObject, URLSessionWebSocketDelegate {
let apiKey = "YOUR_API_KEY"
let url = URL(string: "wss://api-uat.iapp.co.th/asr")!
var webSocket: URLSessionWebSocketTask?

func connect() {
let session = URLSession(configuration: .default, delegate: self, delegateQueue: OperationQueue())
var request = URLRequest(url: url)
request.addValue(apiKey, forHTTPHeaderField: "apikey")

webSocket = session.webSocketTask(with: request)
webSocket?.resume()

sendAudioData()
}

func sendAudioData() {
guard let webSocket = webSocket else { return }
let fileURL = URL(fileURLWithPath: "audio.pcm")

guard let fileHandle = try? FileHandle(forReadingFrom: fileURL) else {
print("Could not open audio file.")
return
}

DispatchQueue.global().async {
while true {
let data = fileHandle.readData(ofLength: 1024)
if data.isEmpty { break }

let message = URLSessionWebSocketTask.Message.data(data)
webSocket.send(message) { error in
if let error = error {
print("Error sending data: \(error)")
return
}
}

Thread.sleep(forTimeInterval: 0.02)
}

fileHandle.closeFile()
webSocket.cancel()
}
}

func urlSession(_ session: URLSession, webSocketTask: URLSessionWebSocketTask, didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, reason: Data?) {
print("WebSocket closed")
}
}

let client = WebSocketClient()
client.connect()

Kotlin

import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.WebSocket
import okhttp3.WebSocketListener
import okio.ByteString
import java.io.File
import kotlinx.coroutines.*

class WebSocketClient : WebSocketListener() {
private val apiKey = "YOUR_API_KEY"
private val url = "wss://api-uat.iapp.co.th/asr"

fun connect() {
val client = OkHttpClient()
val request = Request.Builder()
.url(url)
.addHeader("apikey", apiKey)
.build()

val webSocket = client.newWebSocket(request, this)

CoroutineScope(Dispatchers.IO).launch {
sendAudioData(webSocket)
}
}

private suspend fun sendAudioData(webSocket: WebSocket) {
val frameSize = 1024
val file = File("audio.pcm")
val byteArray = ByteArray(frameSize)

file.inputStream().use { inputStream ->
while (inputStream.read(byteArray) != -1) {
webSocket.send(ByteString.of(byteArray, 0, frameSize))
delay(20) // 20ms delay
}
}
webSocket.close(1000, "Finished sending data")
}

override fun onMessage(webSocket: WebSocket, text: String) {
println("Received: $text")
}

override fun onFailure(webSocket: WebSocket, t: Throwable, response: okhttp3.Response?) {
println("Error: ${t.message}")
}

override fun onClosing(webSocket: WebSocket, code: Int, reason: String) {
println("WebSocket closing: $code / $reason")
webSocket.close(code, reason)
}
}

fun main() {
val client = WebSocketClient()
client.connect()
}

Java

import okhttp3.*;
import okio.ByteString;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

public class WebSocketClient {
private static final String URL = "wss://api-uat.iapp.co.th/asr";
private static final String API_KEY = "YOUR_API_KEY";

public static void main(String[] args) {
OkHttpClient client = new OkHttpClient();
Request request = new Request.Builder()
.url(URL)
.addHeader("apikey", API_KEY)
.build();

WebSocket webSocket = client.newWebSocket(request, new WebSocketListener() {
@Override
public void onOpen(WebSocket webSocket, Response response) {
System.out.println("WebSocket opened");
new Thread(() -> sendAudioData(webSocket)).start();
}

@Override
public void onMessage(WebSocket webSocket, String text) {
System.out.println("Received: " + text);
}

@Override
public void onFailure(WebSocket webSocket, Throwable t, Response response) {
System.out.println("Error: " + t.getMessage());
}

@Override
public void onClosing(WebSocket webSocket, int code, String reason) {
System.out.println("WebSocket closing: " + code + " / " + reason);
webSocket.close(code, reason);
}
});
}

private static void sendAudioData(WebSocket webSocket) {
int frameSize = 1024;
File file = new File("audio.pcm");
try (FileInputStream fis = new FileInputStream(file)) {
byte[] buffer = new byte[frameSize];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
webSocket.send(ByteString.of(buffer, 0, bytesRead));
Thread.sleep(20); // 20ms delay
}
webSocket.close(1000, "Finished sending data");
} catch (Exception e) {
e.printStackTrace();
}
}
}

Dart

import 'dart:async';
import 'dart:convert';
import 'dart:io';

void main() async {
final url = "wss://api-uat.iapp.co.th/asr";
final apiKey = "YOUR_API_KEY";

final socket = await WebSocket.connect(url, headers: {'apikey': apiKey});
print('WebSocket connected');

sendAudioData(socket);
socket.listen(
(data) => print('Received: $data'),
onError: (error) => print('Error: $error'),
onDone: () => print('WebSocket closed'),
);
}

void sendAudioData(WebSocket socket) async {
final file = File('audio.pcm');
final frameSize = 1024;
final raf = file.openRead();

await for (final chunk in raf) {
socket.add(chunk);
await Future.delayed(Duration(milliseconds: 20)); // 20ms delay
}

socket.close();
}

Limitations and Best Practices

Limitations

  • Thai language support only
  • Specific audio format requirements
  • WebSocket connection required
  • Network dependency

Best Practices

  • Maintain stable network connection
  • Use appropriate frame size
  • Handle connection errors gracefully
  • Monitor WebSocket state
  • Process response states correctly

Accuracy & Performance

Overall Accuracy

  • Real-time transcription quality
  • Continuous speech recognition
  • State-based accuracy tracking

Processing Speed

  • Real-time processing
  • Low latency response
  • Network-dependent performance

Factors Affecting Accuracy

  • Audio quality
  • Network stability
  • Background noise
  • Speaking clarity
  • Microphone quality

History

Version 1.0 (2023)

  • Initial release
  • WebSocket streaming support
  • Real-time processing capability
  • File and microphone input support
  • State-based response system

Pricing

AI API Service NameEndpointIC Per SecondsOn-Premise
Thai Speech To Text (ASR)iapp-asr-v3-en1 IC/60 SecondsContact
iapp-asr-v3-th-en1 IC/60 Seconds