在一篇全面的教程中,AssemblyAI 提供了使用 JavaScript 创建实时语言翻译服务的见解。该教程利用 AssemblyAI 的实时语音转文本转录功能和 DeepL 将转录后的文本翻译成各种语言。
实时翻译简介
翻译在不同语言的交流和可访问性中发挥着关键作用。例如,如果游客在外国无法理解当地语言,他们可能会难以交流。AssemblyAI 的流式语音转文本服务可以实时转录语音,然后使用 DeepL 进行翻译,使交流变得无缝。
项目设置
该教程首先讲解了如何设置一个 Node.js 项目。安装了必要的依赖项,包括用于创建简单服务器的 Express.js、用于管理环境变量的 dotenv 以及 AssemblyAI 和 DeepL 的官方库。
mkdir real-time-translation
cd real-time-translation
npm init -y
npm install express dotenv assemblyai deepl-node
AssemblyAI 和 DeepL 的 API 密钥存储在 .env 文件中,以确保安全并避免在前端暴露。
创建后端
后端的设计目的是确保 API 密钥的安全性,并生成临时令牌以与 AssemblyAI 和 DeepL API 进行安全通信。定义了路由以服务前端和处理令牌生成和文本翻译。
const express = require("express");
const deepl = require("deepl-node");
const { AssemblyAI } = require("assemblyai");
require("dotenv").config();
const app = express();
const port = 3000;
app.use(express.static("public"));
app.use(express.json());
app.get("/", (req, res) => {
res.sendFile(__dirname + "/public/index.html");
});
app.get("/token", async (req, res) => {
const token = await client.realtime.createTemporaryToken({ expires_in: 300 });
res.json({ token });
});
app.post("/translate", async (req, res) => {
const { text, target_lang } = req.body;
const translation = await translator.translateText(text, "en", target_lang);
res.json({ translation });
});
app.listen(port, () => {
console.log(`Listening on port ${port}`);
});
前端开发
前端由一个 HTML 页面组成,页面有用于显示转录和翻译的文本区域,以及用于开始和停止录音的按钮。AssemblyAI SDK 和 RecordRTC 库用于实时音频录制和转录。
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>语音录音与转录</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body>
<div class="min-h-screen flex flex-col items-center justify-center bg-gray-100 p-4">
<div class="w-full max-w-6xl bg-white shadow-md rounded-lg p-4 flex flex-col md:flex-row space-y-4 md:space-y-0 md:space-x-4">
<div class="flex-1">
<label for="transcript" class="block text-sm font-medium text-gray-700">转录文本</label>
<textarea id="transcript" rows="20" class="mt-1 block w-full p-2 border border-gray-300 rounded-md shadow-sm"></textarea>
</div>
<div class="flex-1">
<label for="translation" class="block text-sm font-medium text-gray-700">翻译文本</label>
<select id="translation-language" class="mt-1 block w-full p-2 border border-gray-300 rounded-md shadow-sm">
<option value="es">西班牙语</option>
<option value="fr">法语</option>
<option value="de">德语</option>
<option value="zh">中文</option>
</select>
<textarea id="translation" rows="18" class="mt-1 block w-full p-2 border border-gray-300 rounded-md shadow-sm"></textarea>
</div>
</div>
<button id="record-button" class="mt-4 px-6 py-2 bg-blue-500 text-white rounded-md shadow">录音</button>
</div>
<script src="https://www.unpkg.com/assemblyai@latest/dist/assemblyai.umd.min.js"></script>
<script src="https://www.WebRTC-Experiment.com/RecordRTC.js"></script>
<script src="main.js"></script>
</body>
</html>
实时转录和翻译
main.js 文件处理音频录制、转录和翻译。AssemblyAI 实时转录服务处理音频,而 DeepL API 将最终转录翻译成所选语言。
const recordBtn = document.getElementById("record-button");
const transcript = document.getElementById("transcript");
const translationLanguage = document.getElementById("translation-language");
const translation = document.getElementById("translation");
let isRecording = false;
let recorder;
let rt;
const run = async () => {
if (isRecording) {
if (rt) {
await rt.close(false);
rt = null;
}
if (recorder) {
recorder.stopRecording();
recorder = null;
}
recordBtn.innerText = "录音";
transcript.innerText = "";
translation.innerText = "";
} else {
recordBtn.innerText = "加载中...";
const response = await fetch("/token");
const data = await response.json();
rt = new assemblyai.RealtimeService({ token: data.token });
const texts = {};
let translatedText = "";
rt.on("transcript", async (message) => {
let msg = "";
texts[message.audio_start] = message.text;
const keys = Object.keys(texts);
keys.sort((a, b) => a - b);
for (const key of keys) {
if (texts[key]) {
msg += ` ${texts[key]}`;
}
}
transcript.innerText = msg;
if (message.message_type === "FinalTranscript") {
const response = await fetch("/translate", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
text: message.text,
target_lang: translationLanguage.value,
}),
});
const data = await response.json();
translatedText += ` ${data.translation.text}`;
translation.innerText = translatedText;
}
});
rt.on("error", async (error) => {
console.error(error);
await rt.close();
});
rt.on("close", (event) => {
console.log(event);
rt = null;
});
await rt.connect();
navigator.mediaDevices
.getUserMedia({ audio: true })
.then((stream) => {
recorder = new RecordRTC(stream, {
type: "audio",
mimeType: "audio/webm;codecs=pcm",
recorderType: StereoAudioRecorder,
timeSlice: 250,
desiredSampRate: 16000,
numberOfAudioChannels: 1,
bufferSize: 16384,
audioBitsPerSecond: 128000,
ondataavailable: async (blob) => {
if (rt) {
rt.sendAudio(await blob.arrayBuffer());
}
},
});
recorder.startRecording();
recordBtn.innerText = "停止录音";
})
.catch((err) => console.error(err));
}
isRecording = !isRecording;
};
recordBtn.addEventListener("click", () => {
run();
});
结论
本教程演示了如何使用 AssemblyAI 和 DeepL 在 JavaScript 中构建实时语言翻译服务。这样一种工具可以显著增强不同语言环境中用户的交流和访问能力。有关更详细的说明,请访问原始AssemblyAI 教程。
Image source: Shutterstock