1
Fork 0
serverless-voice-to-text-bot/main.go

219 lines
5.6 KiB
Go

package main
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"mime/multipart"
"net/http"
"os"
"slices"
"strconv"
"strings"
"github.com/carlmjohnson/requests"
)
type TelegramMessage struct {
MessageID int `json:"message_id,omitempty"`
From TelegramUser `json:"from,omitempty"`
Chat TelegramChat `json:"chat,omitempty"`
Voice *TelegramVoice `json:"voice,omitempty"`
}
type TelegramUser struct {
ID int `json:"id,omitempty"`
Username string `json:"username,omitempty"`
}
type TelegramChat struct {
ID int `json:"id,omitempty"`
}
type TelegramVoice struct {
FileID string `json:"file_id,omitempty"`
}
type TelegramSendMessage struct {
ChatID int `json:"chat_id,omitempty"`
Text string `json:"text,omitempty"`
ReplyParameters *TelegramReplyParameters `json:"reply_parameters,omitempty"`
}
type TelegramReplyParameters struct {
MessageID int `json:"message_id,omitempty"`
}
func main() {
// Retrieve bot token from environment variable
botToken := os.Getenv("BOT_TOKEN")
if botToken == "" {
log.Fatal("BOT_TOKEN environment variable is not set")
}
// Retrieve OpenAI API key
openaiApiKey := os.Getenv("OPENAI_KEY")
if openaiApiKey == "" {
log.Fatal("OPENAI_KEY environment variable is not set")
}
// Retrieve allowed users
allowedUsers := strings.Split(os.Getenv("ALLOWED_USERS"), ",")
if len(allowedUsers) == 0 {
log.Fatal("ALLOWED_USERS environment variable is not set")
}
log.Println("Allowed users:", allowedUsers)
// Set up the HTTP server
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
io.WriteString(w, "Hi!")
})
http.HandleFunc("/webhook", func(w http.ResponseWriter, r *http.Request) {
// Decode the incoming message
var update struct {
Message *TelegramMessage `json:"message"`
}
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
log.Println("Error decoding update:", err)
return
}
if update.Message == nil {
return
}
// Check allowed users
if !(slices.Contains(allowedUsers, update.Message.From.Username) || slices.Contains(allowedUsers, strconv.Itoa(update.Message.From.ID))) {
sendMessage(botToken, update.Message.Chat.ID, "Sorry, you are not allowed to use this bot!", update.Message.MessageID)
return
}
if update.Message.Voice != nil {
transcript, err := transcriptVoiceMessage(botToken, openaiApiKey, update.Message.Voice.FileID)
if err != nil {
log.Println("Error transcribing voice message:", err)
sendMessage(botToken, update.Message.Chat.ID, "Failed to transcribe voice message", update.Message.MessageID)
} else {
for _, transcriptPart := range splitText(transcript, 3000) {
sendMessage(botToken, update.Message.Chat.ID, transcriptPart, update.Message.MessageID)
}
}
} else {
sendMessage(botToken, update.Message.Chat.ID, "Please send a voice message", update.Message.MessageID)
}
})
// Start the HTTP server
port := os.Getenv("PORT")
if port == "" {
port = "8080"
}
log.Printf("Starting server on :%s", port)
log.Fatal(http.ListenAndServe(":"+port, nil))
}
func sendMessage(botToken string, chatID int, text string, replyTo int) {
msg := &TelegramSendMessage{
ChatID: chatID,
Text: text,
}
if replyTo != 0 {
msg.ReplyParameters = &TelegramReplyParameters{
MessageID: replyTo,
}
}
err := requests.URL(fmt.Sprintf("https://api.telegram.org/bot%s/sendMessage", botToken)).
Method(http.MethodPost).
BodyJSON(msg).
Fetch(context.Background())
if err != nil {
log.Println("Error sending message:", err)
return
}
}
func transcriptVoiceMessage(botToken, openaiApiKey, fileID string) (string, error) {
// Get file path
var fileResponse struct {
OK bool `json:"ok"`
Result struct {
FilePath string `json:"file_path"`
} `json:"result"`
}
err := requests.URL(fmt.Sprintf("https://api.telegram.org/bot%s/getFile", botToken)).
Param("file_id", fileID).
ToJSON(&fileResponse).
Fetch(context.Background())
if err != nil {
return "", err
}
// Download the file
voiceDownload := fmt.Sprintf("https://api.telegram.org/file/bot%s/%s", botToken, fileResponse.Result.FilePath)
voiceReader, voiceWriter := io.Pipe()
go func() {
voiceWriter.CloseWithError(
requests.URL(voiceDownload).
ToWriter(voiceWriter).
Fetch(context.Background()),
)
}()
// Transcribe message
multipartReader, multipartWriter := io.Pipe()
mpw := multipart.NewWriter(multipartWriter)
go func() {
defer multipartWriter.Close()
mpw.WriteField("model", "whisper-1")
fileWriter, err := mpw.CreateFormFile("file", "file.oga")
if err != nil {
multipartWriter.CloseWithError(err)
return
}
_, err = io.Copy(fileWriter, voiceReader)
if err != nil {
multipartWriter.CloseWithError(err)
return
}
multipartWriter.CloseWithError(mpw.Close())
}()
var transcription struct {
Text string `json:"text"`
}
err = requests.URL("https://api.openai.com/v1/audio/transcriptions").
Method(http.MethodPost).
Header("Authorization", fmt.Sprintf("Bearer %s", openaiApiKey)).
ContentType(mpw.FormDataContentType()).
BodyReader(multipartReader).
ToJSON(&transcription).
Fetch(context.Background())
return transcription.Text, err
}
func splitText(s string, chunkSize int) []string {
if chunkSize >= len(s) {
return []string{s}
}
var chunks []string
var b strings.Builder
b.Grow(chunkSize)
l := 0
for _, r := range s {
b.WriteRune(r)
l++
if l == chunkSize {
chunks = append(chunks, b.String())
l = 0
b.Reset()
b.Grow(chunkSize)
}
}
if l > 0 {
chunks = append(chunks, b.String())
}
return chunks
}