GoBlog/tts.go

229 lines
5.4 KiB
Go
Raw Normal View History

2021-09-07 20:16:28 +00:00
package main
import (
"context"
2022-02-11 18:39:07 +00:00
"crypto/sha256"
"encoding/base64"
2021-09-07 20:16:28 +00:00
"errors"
2022-02-11 18:39:07 +00:00
"fmt"
2021-12-30 11:40:21 +00:00
"html"
"io"
"log"
2022-01-04 08:48:37 +00:00
"net/http"
2021-09-07 20:16:28 +00:00
"net/url"
"path"
2021-12-30 11:40:21 +00:00
"strings"
2021-09-07 20:16:28 +00:00
2021-12-26 08:18:08 +00:00
"github.com/carlmjohnson/requests"
"go.goblog.app/app/pkgs/bufferpool"
2022-01-05 09:56:53 +00:00
"go.goblog.app/app/pkgs/mp3merge"
2023-01-24 17:53:24 +00:00
"golang.org/x/sync/errgroup"
2021-09-07 20:16:28 +00:00
)
const ttsParameter = "tts"
func (a *goBlog) initTTS() {
if !a.ttsEnabled() {
return
}
createOrUpdate := func(p *post) {
// Automatically create audio for published section posts only
2022-12-26 18:52:06 +00:00
if !p.isPublicPublishedSectionPost() {
return
}
// Check if there is already a tts audio file
if p.firstParameter(ttsParameter) != "" {
return
}
// Create TTS audio
err := a.createPostTTSAudio(p)
if err != nil {
log.Printf("create post audio for %s failed: %v", p.Path, err)
}
}
a.pPostHooks = append(a.pPostHooks, createOrUpdate)
a.pUpdateHooks = append(a.pUpdateHooks, createOrUpdate)
a.pUndeleteHooks = append(a.pUndeleteHooks, createOrUpdate)
a.pDeleteHooks = append(a.pDeleteHooks, func(p *post) {
// Try to delete the audio file
if a.deletePostTTSAudio(p) {
log.Println("deleted tts audio for", p.Path)
}
})
}
func (a *goBlog) ttsEnabled() bool {
tts := a.cfg.TTS
// Requires media storage as well
return tts != nil && tts.Enabled && tts.GoogleAPIKey != "" && a.mediaStorageEnabled()
}
2021-09-07 20:16:28 +00:00
func (a *goBlog) createPostTTSAudio(p *post) error {
// Get required values
2022-12-14 15:03:54 +00:00
lang := defaultIfEmpty(a.getBlogFromPost(p).Lang, "en")
2022-01-05 09:56:53 +00:00
// Create TTS text parts
parts := []string{}
// Add title if available
if title := p.Title(); title != "" {
parts = append(parts, a.renderMdTitle(title))
}
// Add body split into paragraphs because of 5000 character limit
phr, phw := io.Pipe()
go func() {
a.postHtmlToWriter(phw, &postHtmlOptions{p: p})
_ = phw.Close()
}()
postHtmlText, err := htmlTextFromReader(phr)
if err != nil {
return err
}
parts = append(parts, strings.Split(postHtmlText, "\n\n")...)
2022-01-05 09:56:53 +00:00
// Create TTS audio for each part
2023-01-24 17:53:24 +00:00
partReaders := []io.Reader{}
var g errgroup.Group
for _, part := range parts {
part := part
pr, pw := io.Pipe()
defer func() {
pw.Close()
}()
partReaders = append(partReaders, pr)
g.Go(func() error {
2022-01-05 09:56:53 +00:00
// Build SSML
ssml := "<speak>" + html.EscapeString(part) + "<break time=\"500ms\"/></speak>"
// Create TTS audio
2023-01-24 17:53:24 +00:00
err := a.createTTSAudio(lang, ssml, pw)
_ = pw.CloseWithError(err)
return err
2023-01-24 17:53:24 +00:00
})
2021-09-07 20:16:28 +00:00
}
2021-12-30 11:40:21 +00:00
2023-01-24 17:53:24 +00:00
// Merge parts together (needs buffer because the hash is needed before the file can be uploaded)
buf := bufferpool.Get()
defer bufferpool.Put(buf)
2022-02-11 18:39:07 +00:00
hash := sha256.New()
err = mp3merge.MergeMP3(io.MultiWriter(buf, hash), partReaders...)
2023-01-24 17:53:24 +00:00
if err != nil {
return err
}
// Check if other errors appeared
if err = g.Wait(); err != nil {
return err
}
2022-01-05 09:56:53 +00:00
2021-12-30 11:40:21 +00:00
// Save audio
2023-01-24 17:53:24 +00:00
loc, err := a.saveMediaFile(fmt.Sprintf("%x.mp3", hash.Sum(nil)), buf)
2021-09-07 20:16:28 +00:00
if err != nil {
return err
}
if loc == "" {
return errors.New("no media location for tts audio")
}
2023-01-24 17:53:24 +00:00
// Check existing tts parameter
if old := p.firstParameter(ttsParameter); old != "" && old != loc {
// Already has tts audio, but with different location
// Try to delete the old audio file
if a.deletePostTTSAudio(p) {
log.Println("deleted old tts audio for", p.Path)
}
}
2021-09-07 20:16:28 +00:00
// Set post parameter
err = a.db.replacePostParam(p.Path, ttsParameter, []string{loc})
if err != nil {
return err
2021-09-07 20:16:28 +00:00
}
// Purge cache
a.cache.purge()
2021-09-07 20:16:28 +00:00
return nil
}
// Tries to delete the tts audio file, but doesn't remove the post parameter
func (a *goBlog) deletePostTTSAudio(p *post) bool {
// Check if post has tts audio
audio := p.firstParameter(ttsParameter)
if audio == "" {
return false
2021-09-07 20:16:28 +00:00
}
// Get filename and check if file is from the configured media storage
fileUrl, err := url.Parse(audio)
if err != nil {
// Failed to parse audio url
log.Println("failed to parse audio url:", err)
return false
2021-09-07 20:16:28 +00:00
}
fileName := path.Base(fileUrl.Path)
if a.getFullAddress(a.mediaFileLocation(fileName)) != audio {
// File is not from the configured media storage
return false
2021-09-07 20:16:28 +00:00
}
// Try to delete the audio file
err = a.deleteMediaFile(fileName)
if err != nil {
log.Println("failed to delete audio file:", err)
return false
2021-09-07 20:16:28 +00:00
}
return true
}
2021-09-07 20:16:28 +00:00
2021-12-30 11:40:21 +00:00
func (a *goBlog) createTTSAudio(lang, ssml string, w io.Writer) error {
// Check if Google Cloud TTS is enabled
gctts := a.cfg.TTS
if !gctts.Enabled || gctts.GoogleAPIKey == "" {
return errors.New("missing config for Google Cloud TTS")
2021-09-07 20:16:28 +00:00
}
// Check parameters
if lang == "" {
return errors.New("language not provided")
}
2021-12-30 11:40:21 +00:00
if ssml == "" {
2021-09-07 20:16:28 +00:00
return errors.New("empty text")
}
2021-12-30 11:40:21 +00:00
if w == nil {
return errors.New("writer not provided")
2021-09-07 20:16:28 +00:00
}
// Create request body
2022-03-16 07:28:03 +00:00
body := map[string]any{
"audioConfig": map[string]any{
"audioEncoding": "MP3",
},
2022-03-16 07:28:03 +00:00
"input": map[string]any{
2021-12-30 11:40:21 +00:00
"ssml": ssml,
},
2022-03-16 07:28:03 +00:00
"voice": map[string]any{
"languageCode": lang,
},
}
2021-09-07 20:16:28 +00:00
// Do request
2022-03-16 07:28:03 +00:00
var response map[string]any
2021-12-26 08:18:08 +00:00
err := requests.
URL("https://texttospeech.googleapis.com/v1beta1/text:synthesize").
Param("key", gctts.GoogleAPIKey).
Client(a.httpClient).
2022-01-04 08:48:37 +00:00
Method(http.MethodPost).
2021-12-26 08:18:08 +00:00
BodyJSON(body).
ToJSON(&response).
Fetch(context.Background())
2021-09-07 20:16:28 +00:00
if err != nil {
2021-12-26 08:18:08 +00:00
return errors.New("tts request failed: " + err.Error())
2021-09-07 20:16:28 +00:00
}
// Decode response
2021-12-26 08:18:08 +00:00
if encoded, ok := response["audioContent"]; ok {
if encodedStr, ok := encoded.(string); ok {
2023-01-24 13:59:01 +00:00
_, err := io.Copy(w, base64.NewDecoder(base64.StdEncoding, strings.NewReader(encodedStr)))
return err
}
2021-09-07 20:16:28 +00:00
}
return errors.New("no audio content")
2021-09-07 20:16:28 +00:00
}