mirror of https://github.com/jlelse/GoBlog
Fix TTS for long texts
This commit is contained in:
parent
fac8129c99
commit
a3517a9a97
1
go.mod
1
go.mod
|
@ -15,6 +15,7 @@ require (
|
||||||
github.com/cretz/bine v0.2.0
|
github.com/cretz/bine v0.2.0
|
||||||
github.com/dchest/captcha v0.0.0-20200903113550-03f5f0333e1f
|
github.com/dchest/captcha v0.0.0-20200903113550-03f5f0333e1f
|
||||||
github.com/dgraph-io/ristretto v0.1.0
|
github.com/dgraph-io/ristretto v0.1.0
|
||||||
|
github.com/dmulholl/mp3lib v1.0.0
|
||||||
github.com/elnormous/contenttype v1.0.0
|
github.com/elnormous/contenttype v1.0.0
|
||||||
github.com/emersion/go-sasl v0.0.0-20211008083017-0b9dcfb154ac
|
github.com/emersion/go-sasl v0.0.0-20211008083017-0b9dcfb154ac
|
||||||
github.com/emersion/go-smtp v0.15.0
|
github.com/emersion/go-smtp v0.15.0
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -97,6 +97,8 @@ github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUn
|
||||||
github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE=
|
github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE=
|
||||||
github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=
|
github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=
|
||||||
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
|
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
|
||||||
|
github.com/dmulholl/mp3lib v1.0.0 h1:PZq24kJBIk5zIxi/t6Qp8/EOAbAqThyrUCpkUKLBeWQ=
|
||||||
|
github.com/dmulholl/mp3lib v1.0.0/go.mod h1:4RoA+iht/khfwxmH1ieoxZTzYVbb0am/zdvFkyGRr6I=
|
||||||
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
|
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
|
||||||
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
||||||
github.com/dvyukov/go-fuzz v0.0.0-20210103155950-6a8e9d1f2415/go.mod h1:11Gm+ccJnvAhCNLlf5+cS9KjtbaD5I5zaZpFMsTHWTw=
|
github.com/dvyukov/go-fuzz v0.0.0-20210103155950-6a8e9d1f2415/go.mod h1:11Gm+ccJnvAhCNLlf5+cS9KjtbaD5I5zaZpFMsTHWTw=
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
package mp3merge
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/dmulholl/mp3lib"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Inspired by https://github.com/dmulholl/mp3cat/blob/2ec1e4fe4d995ebd41bf1887b3cab8e2a569b3d4/mp3cat.go
|
||||||
|
// Merge multiple mp3s into one mp3.
|
||||||
|
func MergeMP3(out io.Writer, in ...io.Reader) error {
|
||||||
|
if len(in) == 0 {
|
||||||
|
return errors.New("no inputs specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
var totalFrames, totalBytes uint32
|
||||||
|
var firstBitRate int
|
||||||
|
var isVBR bool
|
||||||
|
var tmpOut bytes.Buffer
|
||||||
|
|
||||||
|
// Loop over the input files and append their MP3 frames to the output file.
|
||||||
|
for _, inReader := range in {
|
||||||
|
if inReader == nil {
|
||||||
|
return errors.New("nil input")
|
||||||
|
}
|
||||||
|
|
||||||
|
isFirstFrame := true
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Read the next frame from the input
|
||||||
|
frame := mp3lib.NextFrame(inReader)
|
||||||
|
if frame == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip the first frame if it's a VBR header
|
||||||
|
if isFirstFrame {
|
||||||
|
isFirstFrame = false
|
||||||
|
if mp3lib.IsXingHeader(frame) || mp3lib.IsVbriHeader(frame) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we detect more than one bitrate we'll need to add a VBR header to the output
|
||||||
|
if firstBitRate == 0 {
|
||||||
|
firstBitRate = frame.BitRate
|
||||||
|
} else if frame.BitRate != firstBitRate {
|
||||||
|
isVBR = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the frame to the temporary output
|
||||||
|
_, err := tmpOut.Write(frame.RawBytes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment the total number of frames and bytes
|
||||||
|
totalFrames += 1
|
||||||
|
totalBytes += uint32(len(frame.RawBytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we detected multiple bitrates, prepend a VBR header to the output
|
||||||
|
if isVBR {
|
||||||
|
xingHeader := mp3lib.NewXingHeader(totalFrames, totalBytes)
|
||||||
|
_, err := out.Write(xingHeader.RawBytes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the temporary output to the output
|
||||||
|
_, err := tmpOut.WriteTo(out)
|
||||||
|
return err
|
||||||
|
}
|
75
tts.go
75
tts.go
|
@ -12,8 +12,10 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
"path"
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/carlmjohnson/requests"
|
"github.com/carlmjohnson/requests"
|
||||||
|
"go.goblog.app/app/pkgs/mp3merge"
|
||||||
)
|
)
|
||||||
|
|
||||||
const ttsParameter = "tts"
|
const ttsParameter = "tts"
|
||||||
|
@ -54,31 +56,60 @@ func (a *goBlog) ttsEnabled() bool {
|
||||||
|
|
||||||
func (a *goBlog) createPostTTSAudio(p *post) error {
|
func (a *goBlog) createPostTTSAudio(p *post) error {
|
||||||
// Get required values
|
// Get required values
|
||||||
lang := a.cfg.Blogs[p.Blog].Lang
|
lang := defaultIfEmpty(a.cfg.Blogs[p.Blog].Lang, "en")
|
||||||
if lang == "" {
|
|
||||||
lang = "en"
|
// Create TTS text parts
|
||||||
|
parts := []string{}
|
||||||
|
// Add title if available
|
||||||
|
if title := p.Title(); title != "" {
|
||||||
|
parts = append(parts, a.renderMdTitle(title))
|
||||||
|
}
|
||||||
|
// Add body split into paragraphs because of 5000 character limit
|
||||||
|
parts = append(parts, strings.Split(htmlText(string(a.postHtml(p, false))), "\n\n")...)
|
||||||
|
|
||||||
|
// Create TTS audio for each part
|
||||||
|
partsBuffers := make([]io.Reader, len(parts))
|
||||||
|
var errs []error
|
||||||
|
var lock sync.Mutex
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i, part := range parts {
|
||||||
|
// Increase wait group
|
||||||
|
wg.Add(1)
|
||||||
|
go func(i int, part string) {
|
||||||
|
// Build SSML
|
||||||
|
ssml := "<speak>" + html.EscapeString(part) + "<break time=\"500ms\"/></speak>"
|
||||||
|
// Create TTS audio
|
||||||
|
var audioBuffer bytes.Buffer
|
||||||
|
err := a.createTTSAudio(lang, ssml, &audioBuffer)
|
||||||
|
if err != nil {
|
||||||
|
lock.Lock()
|
||||||
|
errs = append(errs, err)
|
||||||
|
lock.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Append buffer to partsBuffers
|
||||||
|
lock.Lock()
|
||||||
|
partsBuffers[i] = &audioBuffer
|
||||||
|
lock.Unlock()
|
||||||
|
// Decrease wait group
|
||||||
|
wg.Done()
|
||||||
|
}(i, part)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build SSML
|
// Wait for all parts to be created
|
||||||
var ssml strings.Builder
|
wg.Wait()
|
||||||
ssml.WriteString("<speak>")
|
|
||||||
ssml.WriteString(html.EscapeString(a.renderMdTitle(p.Title())))
|
|
||||||
ssml.WriteString("<break time=\"1s\"/>")
|
|
||||||
for _, part := range strings.Split(htmlText(string(a.postHtml(p, false))), "\n\n") {
|
|
||||||
ssml.WriteString(html.EscapeString(part))
|
|
||||||
ssml.WriteString("<break time=\"500ms\"/>")
|
|
||||||
}
|
|
||||||
ssml.WriteString("</speak>")
|
|
||||||
|
|
||||||
// Generate audio
|
// Check if any errors occurred
|
||||||
var audioBuffer bytes.Buffer
|
if len(errs) > 0 {
|
||||||
err := a.createTTSAudio(lang, ssml.String(), &audioBuffer)
|
return errs[0]
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Merge partsBuffers into final buffer
|
||||||
|
var final bytes.Buffer
|
||||||
|
mp3merge.MergeMP3(&final, partsBuffers...)
|
||||||
|
|
||||||
// Save audio
|
// Save audio
|
||||||
audioReader := bytes.NewReader(audioBuffer.Bytes())
|
audioReader := bytes.NewReader(final.Bytes())
|
||||||
fileHash, err := getSHA256(audioReader)
|
fileHash, err := getSHA256(audioReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -155,12 +186,6 @@ func (a *goBlog) createTTSAudio(lang, ssml string, w io.Writer) error {
|
||||||
return errors.New("writer not provided")
|
return errors.New("writer not provided")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check max length
|
|
||||||
// TODO: Support longer texts by splitting into multiple requests
|
|
||||||
// if len(ssml) > 5000 {
|
|
||||||
// return errors.New("text is too long")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Create request body
|
// Create request body
|
||||||
body := map[string]interface{}{
|
body := map[string]interface{}{
|
||||||
"audioConfig": map[string]interface{}{
|
"audioConfig": map[string]interface{}{
|
||||||
|
|
Loading…
Reference in New Issue