chinese-speech-to-text-base

import Text2TextDemo from '@site/src/components/Text2TextBox';
import DemoNlpCommonResult from '@site/src/components/DemoResult/nlp'

# 🇨🇳 การแปลงเสียงพูดภาษาจีนเป็นข้อความ (ASR) พื้นฐาน 🆕

🆕 ระบบแปลงเสียงพูดภาษาไทยเป็นข้อความ
![Version](https://img.shields.io/badge/Version-1.0-blue)
![Status](https://img.shields.io/badge/Status-Active-brightgreen)

<img src="/img/api/iApp-ASR-Base-Model.png" alt="iApp Text to Speech API" width="400"/>

## ทดลองใช้งาน

<Text2TextDemo
endpointUrl="https://api.iapp.co.th/v1/audio/stt/zh/transcriptions"
exampleFile="/audio/john-china.wav"
firstinputTitle="เลือกขนาดชิ้นส่วนของคุณ"
firsttextInput="7"
keyendpoint="asr"
submitTitle="ประมวลผล"
customDemoResultView={(result,question,apiType,isSafari) => DemoNlpCommonResult(result,question,apiType,isSafari)}
/>

## เริ่มต้นใช้งาน

### ข้อกำหนดเบื้องต้น

-   API key จาก ไอแอพพ์เทคโนโลยี
-   ไฟล์เสียงในรูปแบบที่รองรับ
-   ความยาวไฟล์สูงสุด: ไม่เกิน 10 ชั่วโมง

### คุณสมบัติหลัก

-   การดึงข้อความจากไฟล์เสียง
-   การแยกเสียงพูดสำหรับการสนทนาหลายคน
-   รองรับรูปแบบไฟล์เสียงต่างๆ
-   การถอดเสียงที่มีคุณภาพสูงพร้อมกับการแสดงเวลา

### ความปลอดภัยและการปฏิบัติตามกฎระเบียบ

-   การตรวจสอบความถูกต้องด้วย API key
-   เป็นไปตาม GDPR และ PDPA

## การใช้งาน API

### รูปแบบไฟล์ที่รองรับ

API รองรับรูปแบบไฟล์เสียงและวิดีโอต่อไปนี้:

-   เสียง: `.mp3`, `.wav`, `.m4a`, `.aac`, `.aif`, `.cda`, `.flac`, `.mid`, `.ogg`, `.wma`

**Endpoint**: `POST /v1/audio/stt/zh/transcriptions`

**Header**:

-   `Content-Type`: multipart/form-data
-   `apikey`: API key ของคุณสำหรับการตรวจสอบความถูกต้อง

**พารามิเตอร์แบบฟอร์ม**:

| พารามิเตอร์    | ชนิด    | จำเป็นต้องใช้ | ค่าเริ่มต้น  | คำอธิบาย                                 |
| ------------ | ------- | -------- | -------- | ------------------------------------------- |
| `file`       | ไฟล์    | ใช่      | -        | ไฟล์เสียงที่จะถอดเสียง                |
| `prompt`     | ข้อความ  | ไม่ใช่       | `"base"` | เทมเพลตพรอมต์ที่จะใช้                      |
| `chunk_size` | จำนวนเต็ม | ไม่ใช่       | `20`     | ขนาดเป็นวินาทีสำหรับการประมวลผลชิ้นส่วนเสียง |

**ตัวอย่างคำขอ**:

```bash
curl -X POST "http://api.iapp.co.th/v1/audio/stt/zh/transcriptions" \
  -H "Content-Type: multipart/form-data" \
  -F "file=@/path/to/your/audio.mp3" \
  -F "prompt=base" \
  -F "chunk_size=20"

ตัวอย่างการตอบสนอง:

{
    "output": [
        {
            "text": "ข้อความที่ถอดเสียงจากส่วนที่หนึ่ง",
            "start": 0.0,
            "end": 5.28,
            "speaker": "SPEAKER_00",
            "segment": 0
        },
        {
            "text": "ข้อความที่ถอดเสียงจากส่วนที่สอง",
            "start": 5.28,
            "end": 10.56,
            "speaker": "SPEAKER_01",
            "segment": 1
        }
    ],
    "audio_duration_in_seconds": 60.5,
    "uploaded_file_name": "example.mp3",
    "processing_time_in_seconds": 12.34,
    "use_asr_pro": false,
    "asr_pro_is_used": false,
    "iapp": {
        "seconds": 60.5
    }
}

รหัสสถานะการตอบสนอง

200 OK: คำขอสำเร็จ
400 Bad Request: คำขอไม่ถูกต้อง (เช่น ไม่มีไฟล์อัปโหลด รูปแบบไฟล์ไม่รองรับ)
404 Not Found: ไม่พบ ID งาน
500 Internal Server Error: การประมวลผลล้มเหลว ข้อผิดพลาดของเซิร์ฟเวอร์

หมายเหตุ

รองรับการแยกเสียงพูด โดยส่วนต่างๆ จะมีทั้ง ID ผู้พูดและข้อมูลเวลา

ตัวอย่างโค้ด

ตัวอย่าง Python

import requests

def transcribe_audio(file_path, api_key):
    url = "http://api.iapp.co.th/v1/audio/stt/zh/transcriptions"
    headers = {"apikey": api_key}

    files = {"file": open(file_path, "rb")}
    data = {
        "prompt": "base",
        "chunk_size": 20
    }

    response = requests.post(url, headers=headers, files=files, data=data)

    if response.status_code == 200:
        return response.json()
    else:
        return f"Error: {response.status_code}, {response.text}"

ตัวอย่าง JavaScript

async function transcribeAudio(filePath, apiKey) {
    const url = "http://api.iapp.co.th/v1/audio/stt/zh/transcriptions"

    const formData = new FormData()
    formData.append("file", await fetch(filePath).then((r) => r.blob()))
    formData.append("prompt", "base")
    formData.append("chunk_size", "20")

    const response = await fetch(url, {
        method: "POST",
        headers: {
            apikey: apiKey,
        },
        body: formData,
    })

    if (response.ok) {
        return await response.json()
    } else {
        throw new Error(`Error: ${response.status}, ${await response.text()}`)
    }
}

ตัวอย่าง PHP

function transcribe_audio($file_path, $api_key) {
    $url = "http://api.iapp.co.th/v1/audio/stt/zh/transcriptions";

    $curl = curl_init();

    $post_data = [
        'file' => new CURLFile($file_path),
        'prompt' => 'base',
        'chunk_size' => '20'
    ];

    curl_setopt_array($curl, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_POST => true,
        CURLOPT_POSTFIELDS => $post_data,
        CURLOPT_HTTPHEADER => [
            "apikey: $api_key"
        ]
    ]);

    $response = curl_exec($curl);
    $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
    curl_close($curl);

    if ($status_code === 200) {
        return json_decode($response, true);
    } else {
        return "Error: $status_code, $response";
    }
}

// Usage
$result = transcribe_audio("path/to/audio.mp3", "YOUR_API_KEY");
print_r($result);

ตัวอย่าง Swift

import Foundation

func transcribeAudio(filePath: String, apiKey: String, completion: @escaping (Result<[String: Any], Error>) -> Void) {
    let url = URL(string: "http://api.iapp.co.th/v1/audio/stt/zh/transcriptions")!

    var request = URLRequest(url: url)
    request.httpMethod = "POST"
    request.addValue(apiKey, forHTTPHeaderField: "apikey")

    let boundary = UUID().uuidString
    request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")

    let fileURL = URL(fileURLWithPath: filePath)
    guard let fileData = try? Data(contentsOf: fileURL) else {
        completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Could not load file"])))
        return
    }

    var body = Data()
    let filename = fileURL.lastPathComponent

    // Add file
    body.append("--\(boundary)\r\n".data(using: .utf8)!)
    body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(filename)\"\r\n".data(using: .utf8)!)
    body.append("Content-Type: audio/mpeg\r\n\r\n".data(using: .utf8)!)
    body.append(fileData)
    body.append("\r\n".data(using: .utf8)!)

    // Add prompt
    body.append("--\(boundary)\r\n".data(using: .utf8)!)
    body.append("Content-Disposition: form-data; name=\"prompt\"\r\n\r\n".data(using: .utf8)!)
    body.append("base".data(using: .utf8)!)
    body.append("\r\n".data(using: .utf8)!)

    // Add chunk_size
    body.append("--\(boundary)\r\n".data(using: .utf8)!)
    body.append("Content-Disposition: form-data; name=\"chunk_size\"\r\n\r\n".data(using: .utf8)!)
    body.append("20".data(using: .utf8)!)
    body.append("\r\n".data(using: .utf8)!)

    body.append("--\(boundary)--\r\n".data(using: .utf8)!)

    request.httpBody = body

    URLSession.shared.dataTask(with: request) { data, response, error in
        if let error = error {
            completion(.failure(error))
            return
        }

        guard let data = data else {
            completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "No data received"])))
            return
        }

        do {
            if let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] {
                completion(.success(json))
            } else {
                completion(.failure(NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Invalid JSON format"])))
            }
        } catch {
            completion(.failure(error))
        }
    }.resume()
}

// Usage
transcribeAudio(filePath: "path/to/audio.mp3", apiKey: "YOUR_API_KEY") { result in
    switch result {
    case .success(let json):
        print(json)
    case .failure(let error):
        print("Error: \(error)")
    }
}

ตัวอย่าง Kotlin

import okhttp3.*
import java.io.File
import java.io.IOException

fun transcribeAudio(filePath: String, apiKey: String, callback: (Result<String>) -> Unit) {
    val client = OkHttpClient()
    val file = File(filePath)

    val requestBody = MultipartBody.Builder()
        .setType(MultipartBody.FORM)
        .addFormDataPart(
            "file",
            file.name,
            RequestBody.create(MediaType.parse("audio/*"), file)
        )
        .addFormDataPart("prompt", "base")
        .addFormDataPart("chunk_size", "20")
        .build()

    val request = Request.Builder()
        .url("http://api.iapp.co.th/v1/audio/stt/zh/transcriptions")
        .header("apikey", apiKey)
        .post(requestBody)
        .build()

    client.newCall(request).enqueue(object : Callback {
        override fun onFailure(call: Call, e: IOException) {
            callback(Result.failure(e))
        }

        override fun onResponse(call: Call, response: Response) {
            if (response.isSuccessful) {
                callback(Result.success(response.body()?.string() ?: ""))
            } else {
                callback(Result.failure(IOException("Error: ${response.code()} ${response.message()}")))
            }
        }
    })
}

ตัวอย่าง Java

import java.io.File;
import java.io.IOException;
import okhttp3.*;

public class ASRApiClient {

    public static void transcribeAudio(String filePath, String apiKey, Callback callback) {
        OkHttpClient client = new OkHttpClient();
        File file = new File(filePath);

        RequestBody requestBody = new MultipartBody.Builder()
                .setType(MultipartBody.FORM)
                .addFormDataPart(
                        "file",
                        file.getName(),
                        RequestBody.create(MediaType.parse("audio/*"), file)
                )
                .addFormDataPart("prompt", "base")
                .addFormDataPart("chunk_size", "20")
                .build();

        Request request = new Request.Builder()
                .url("http://api.iapp.co.th/v1/audio/stt/zh/transcriptions")
                .header("apikey", apiKey)
                .post(requestBody)
                .build();

        client.newCall(request).enqueue(callback);
    }

    public static void main(String[] args) {
        transcribeAudio("path/to/audio.mp3", "YOUR_API_KEY", new Callback() {
            @Override
            public void onFailure(Call call, IOException e) {
                System.err.println("Error: " + e.getMessage());
            }

            @Override
            public void onResponse(Call call, Response response) throws IOException {
                if (response.isSuccessful()) {
                    System.out.println(response.body().string());
                } else {
                    System.err.println("Error: " + response.code() + " " + response.message());
                }
            }
        });
    }
}

ตัวอย่าง Dart (Flutter)

import 'dart:io';
import 'package:http/http.dart' as http;

Future<Map<String, dynamic>> transcribeAudio(String filePath, String apiKey) async {
  var uri = Uri.parse('http://api.iapp.co.th/v1/audio/stt/zh/transcriptions');

  var request = http.MultipartRequest('POST', uri);
  request.headers['apikey'] = apiKey;

  request.files.add(await http.MultipartFile.fromPath(
    'file',
    filePath,
  ));

  request.fields['prompt'] = 'base';
  request.fields['chunk_size'] = '20';

  try {
    var response = await request.send();
    var responseData = await http.Response.fromStream(response);

    if (response.statusCode == 200) {
      return jsonDecode(responseData.body);
    } else {
      throw Exception('Failed to transcribe: ${response.statusCode} ${responseData.body}');
    }
  } catch (e) {
    throw Exception('Error transcribing file: $e');
  }
}

chinese-speech-to-text-base

รหัสสถานะการตอบสนอง

หมายเหตุ

ตัวอย่างโค้ด

ตัวอย่าง Python

ตัวอย่าง JavaScript

ตัวอย่าง PHP

ตัวอย่าง Swift

ตัวอย่าง Kotlin

ตัวอย่าง Java

ตัวอย่าง Dart (Flutter)

ChindaX

Speechflow

รหัสสถานะการตอบสนอง​

หมายเหตุ​

ตัวอย่างโค้ด​

ตัวอย่าง Python​

ตัวอย่าง JavaScript​

ตัวอย่าง PHP​

ตัวอย่าง Swift​

ตัวอย่าง Kotlin​

ตัวอย่าง Java​

ตัวอย่าง Dart (Flutter)​

รหัสสถานะการตอบสนอง

หมายเหตุ

ตัวอย่างโค้ด

ตัวอย่าง Python

ตัวอย่าง JavaScript

ตัวอย่าง PHP

ตัวอย่าง Swift

ตัวอย่าง Kotlin

ตัวอย่าง Java

ตัวอย่าง Dart (Flutter)