From d6518c3a5d0d34ed0f488f5185289618b8c6d6d2 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Tue, 4 Jan 2022 09:48:37 +0100 Subject: [PATCH 1/4] Update requests dependency --- go.mod | 2 +- go.sum | 4 ++-- mediaCompression.go | 7 +++---- ntfy.go | 3 ++- tts.go | 3 ++- webmentionSending.go | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index c1c5984..ee1647e 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/alecthomas/chroma v0.9.4 github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de github.com/c2h5oh/datasize v0.0.0-20200825124411-48ed595a09d2 - github.com/carlmjohnson/requests v0.21.13 + github.com/carlmjohnson/requests v0.22.1 github.com/cretz/bine v0.2.0 github.com/dchest/captcha v0.0.0-20200903113550-03f5f0333e1f github.com/dgraph-io/ristretto v0.1.0 diff --git a/go.sum b/go.sum index 6c9f8da..0fc9db5 100644 --- a/go.sum +++ b/go.sum @@ -65,8 +65,8 @@ github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc h1:biVzkmvwrH8 github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/c2h5oh/datasize v0.0.0-20200825124411-48ed595a09d2 h1:t8KYCwSKsOEZBFELI4Pn/phbp38iJ1RRAkDFNin1aak= github.com/c2h5oh/datasize v0.0.0-20200825124411-48ed595a09d2/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= -github.com/carlmjohnson/requests v0.21.13 h1:p9DiBwbrLG8uA67YPOrfGMG1ZRzRyPBaO9hXQpX+Ork= -github.com/carlmjohnson/requests v0.21.13/go.mod h1:Hw4fFOk3xDlHQbNRTGo4oc52TUTpVEq93sNy/H+mrQM= +github.com/carlmjohnson/requests v0.22.1 h1:YoifpEbpJW4LPRX/+0dJe3vTLducEE9Ib10k6lElIUM= +github.com/carlmjohnson/requests v0.22.1/go.mod h1:Hw4fFOk3xDlHQbNRTGo4oc52TUTpVEq93sNy/H+mrQM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= diff --git a/mediaCompression.go b/mediaCompression.go index aeb93bb..6e82bed 100644 --- a/mediaCompression.go +++ b/mediaCompression.go @@ -63,7 +63,7 @@ func (sp *shortpixel) compress(url string, upload mediaStorageSaveFunc, hc *http err := requests. URL("https://api.shortpixel.com/v2/reducer-sync.php"). Client(hc). - Post(). + Method(http.MethodPost). BodyJSON(map[string]interface{}{ "key": sp.key, "plugin_version": "GB001", @@ -100,7 +100,7 @@ func (tf *tinify) compress(url string, upload mediaStorageSaveFunc, hc *http.Cli err := requests. URL("https://api.tinify.com/shrink"). Client(hc). - Post(). + Method(http.MethodPost). BasicAuth("api", tf.key). BodyJSON(map[string]interface{}{ "source": map[string]interface{}{ @@ -124,7 +124,7 @@ func (tf *tinify) compress(url string, upload mediaStorageSaveFunc, hc *http.Cli err = requests. URL(compressedLocation). Client(hc). - Post(). + Method(http.MethodPost). BasicAuth("api", tf.key). BodyJSON(map[string]interface{}{ "resize": map[string]interface{}{ @@ -157,7 +157,6 @@ func (cf *cloudflare) compress(url string, upload mediaStorageSaveFunc, hc *http err := requests. URL(fmt.Sprintf("https://www.cloudflare.com/cdn-cgi/image/f=jpeg,q=75,metadata=none,fit=scale-down,w=%d,h=%d/%s", defaultCompressionWidth, defaultCompressionHeight, url)). Client(hc). - Get(). ToBytesBuffer(&imgBuffer). Fetch(context.Background()) if err != nil { diff --git a/ntfy.go b/ntfy.go index 016ea2d..e650200 100644 --- a/ntfy.go +++ b/ntfy.go @@ -2,6 +2,7 @@ package main import ( "context" + "net/http" "strings" "github.com/carlmjohnson/requests" @@ -22,7 +23,7 @@ func (a *goBlog) sendNtfy(cfg *configNtfy, msg string) error { URL(cfg.Topic). Client(a.httpClient). UserAgent(appUserAgent). - Post(). + Method(http.MethodPost). BodyReader(strings.NewReader(msg)). Fetch(context.Background()) } diff --git a/tts.go b/tts.go index 38cece9..c2e0dae 100644 --- a/tts.go +++ b/tts.go @@ -8,6 +8,7 @@ import ( "html" "io" "log" + "net/http" "net/url" "path" "strings" @@ -177,7 +178,7 @@ func (a *goBlog) createTTSAudio(lang, ssml string, w io.Writer) error { Param("key", gctts.GoogleAPIKey). Client(a.httpClient). UserAgent(appUserAgent). - Post(). + Method(http.MethodPost). BodyJSON(body). ToJSON(&response). Fetch(context.Background()) diff --git a/webmentionSending.go b/webmentionSending.go index 118b4ad..ddb4397 100644 --- a/webmentionSending.go +++ b/webmentionSending.go @@ -73,7 +73,7 @@ func (a *goBlog) sendWebmentions(p *post) error { func (a *goBlog) sendWebmention(endpoint, source, target string) error { // TODO: Pass all tests from https://webmention.rocks/ - return requests.URL(endpoint).Client(a.httpClient).Post().UserAgent(appUserAgent). + return requests.URL(endpoint).Client(a.httpClient).Method(http.MethodPost).UserAgent(appUserAgent). BodyForm(url.Values{ "source": []string{source}, "target": []string{target}, From d953b331c4c678e09115e0237e3c3e7cc2d55f7d Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Tue, 4 Jan 2022 10:37:48 +0100 Subject: [PATCH 2/4] Some improvements --- blogroll.go | 27 +++++++++++---------------- feeds_test.go | 24 ++++++++++++------------ geo.go | 36 +++++++++++------------------------- httpClient_test.go | 34 +++++++++++++++------------------- indieAuthServer_test.go | 18 ++++-------------- mediaCompression.go | 20 ++++++++++---------- postsDeleter.go | 4 +++- postsDeleter_test.go | 6 ++++-- posts_test.go | 12 ++---------- sitemap_test.go | 6 +----- utils.go | 9 +++++---- 11 files changed, 78 insertions(+), 118 deletions(-) diff --git a/blogroll.go b/blogroll.go index 45b7483..a3da962 100644 --- a/blogroll.go +++ b/blogroll.go @@ -2,13 +2,14 @@ package main import ( "bytes" - "fmt" + "context" "log" "net/http" "sort" "strings" "time" + "github.com/carlmjohnson/requests" "github.com/kaorimatz/go-opml" "github.com/thoas/go-funk" "go.goblog.app/app/pkgs/contenttype" @@ -64,22 +65,16 @@ func (a *goBlog) getBlogrollOutlines(blog string) ([]*opml.Outline, error) { if cache := a.db.loadOutlineCache(blog); cache != nil { return cache, nil } - req, err := http.NewRequest(http.MethodGet, config.Opml, nil) - if err != nil { - return nil, err - } + rb := requests.URL(config.Opml).Client(a.httpClient).UserAgent(appUserAgent) if config.AuthHeader != "" && config.AuthValue != "" { - req.Header.Set(config.AuthHeader, config.AuthValue) + rb.Header(config.AuthHeader, config.AuthValue) } - res, err := a.httpClient.Do(req) - if err != nil { - return nil, err - } - defer res.Body.Close() - if code := res.StatusCode; code < 200 || 300 <= code { - return nil, fmt.Errorf("opml request not successful, status code: %d", code) - } - o, err := opml.Parse(res.Body) + var o *opml.OPML + err := rb.Handle(func(r *http.Response) (err error) { + defer r.Body.Close() + o, err = opml.Parse(r.Body) + return + }).Fetch(context.Background()) if err != nil { return nil, err } @@ -117,7 +112,7 @@ func (db *database) loadOutlineCache(blog string) []*opml.Outline { if err != nil || data == nil { return nil } - o, err := opml.NewParser(bytes.NewReader(data)).Parse() + o, err := opml.Parse(bytes.NewReader(data)) if err != nil { return nil } diff --git a/feeds_test.go b/feeds_test.go index f17980c..e4ddaaf 100644 --- a/feeds_test.go +++ b/feeds_test.go @@ -1,9 +1,11 @@ package main import ( + "context" "net/http" "testing" + "github.com/carlmjohnson/requests" "github.com/mmcdole/gofeed" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -17,6 +19,7 @@ func Test_feeds(t *testing.T) { _ = app.initDatabase(false) app.initComponents(false) app.d, _ = app.buildRouter() + handlerClient := newHandlerClient(app.d) err := app.createPost(&post{ Path: "/testpost", @@ -26,21 +29,18 @@ func Test_feeds(t *testing.T) { Parameters: map[string][]string{"title": {"Test Post"}}, Content: "Test Content", }) - require.NoError(t, err) for _, typ := range []feedType{rssFeed, atomFeed, jsonFeed} { - req, _ := http.NewRequest(http.MethodGet, "http://localhost:8080/posts."+string(typ), nil) - res, err := doHandlerRequest(req, app.d) - - require.NoError(t, err) - - require.Equal(t, http.StatusOK, res.StatusCode) - - fp := gofeed.NewParser() - feed, err := fp.Parse(res.Body) - _ = res.Body.Close() - + var feed *gofeed.Feed + err := requests.URL("http://localhost:8080/posts." + string(typ)).Client(handlerClient). + Handle(func(r *http.Response) (err error) { + fp := gofeed.NewParser() + defer r.Body.Close() + feed, err = fp.Parse(r.Body) + return + }). + Fetch(context.Background()) require.NoError(t, err) require.NotNil(t, feed) diff --git a/geo.go b/geo.go index b93d370..9a007b0 100644 --- a/geo.go +++ b/geo.go @@ -1,14 +1,14 @@ package main import ( + "bytes" + "context" "embed" "fmt" - "io" - "net/http" - "net/url" "strings" gogeouri "git.jlel.se/jlelse/go-geouri" + "github.com/carlmjohnson/requests" geojson "github.com/paulmach/go.geojson" "github.com/thoas/go-funk" ) @@ -39,33 +39,19 @@ func (a *goBlog) photonReverse(lat, lon float64, lang string) ([]byte, error) { if cache != nil { return cache, nil } - uv := url.Values{} - uv.Set("lat", fmt.Sprintf("%v", lat)) - uv.Set("lon", fmt.Sprintf("%v", lon)) + var buf bytes.Buffer + rb := requests.URL("https://photon.komoot.io/reverse").Client(a.httpClient).UserAgent(appUserAgent).ToBytesBuffer(&buf) + rb.Param("lat", fmt.Sprintf("%v", lat)).Param("lon", fmt.Sprintf("%v", lon)) if lang == "de" || lang == "fr" || lang == "it" { - uv.Set("lang", lang) + rb.Param("lang", lang) } else { - uv.Set("lang", "en") + rb.Param("lang", "en") } - req, err := http.NewRequest(http.MethodGet, "https://photon.komoot.io/reverse?"+uv.Encode(), nil) - if err != nil { + if err := rb.Fetch(context.Background()); err != nil { return nil, err } - req.Header.Set(userAgent, appUserAgent) - resp, err := a.httpClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("response status code: %v", resp.StatusCode) - } - ba, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - _ = a.db.cachePersistently(cacheKey, ba) - return ba, nil + _ = a.db.cachePersistently(cacheKey, buf.Bytes()) + return buf.Bytes(), nil } func geoOSMLink(g *gogeouri.Geo) string { diff --git a/httpClient_test.go b/httpClient_test.go index ef667b5..2d4a001 100644 --- a/httpClient_test.go +++ b/httpClient_test.go @@ -15,25 +15,21 @@ type fakeHttpClient struct { func newFakeHttpClient() *fakeHttpClient { fc := &fakeHttpClient{} - fc.Client = &http.Client{ - Transport: &handlerRoundTripper{ - handler: http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - fc.req = r - if fc.handler != nil { - rec := httptest.NewRecorder() - fc.handler.ServeHTTP(rec, r) - fc.res = rec.Result() - // Copy the headers from the response recorder - for k, v := range rec.Header() { - rw.Header()[k] = v - } - // Copy result status code and body - rw.WriteHeader(fc.res.StatusCode) - _, _ = io.Copy(rw, rec.Body) - } - }), - }, - } + fc.Client = newHandlerClient(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + fc.req = r + if fc.handler != nil { + rec := httptest.NewRecorder() + fc.handler.ServeHTTP(rec, r) + fc.res = rec.Result() + // Copy the headers from the response recorder + for k, v := range rec.Header() { + rw.Header()[k] = v + } + // Copy result status code and body + rw.WriteHeader(fc.res.StatusCode) + _, _ = io.Copy(rw, rec.Body) + } + })) return fc } diff --git a/indieAuthServer_test.go b/indieAuthServer_test.go index 187f8b7..99e6d0f 100644 --- a/indieAuthServer_test.go +++ b/indieAuthServer_test.go @@ -51,24 +51,14 @@ func Test_indieAuthServer(t *testing.T) { _ = app.initDatabase(false) app.initComponents(false) - app.ias.Client = &http.Client{ - Transport: &handlerRoundTripper{ - handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - }), - }, - } + app.ias.Client = newHandlerClient(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) iac := indieauth.NewClient( "https://example.com/", "https://example.com/redirect", - &http.Client{ - Transport: &handlerRoundTripper{ - handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - app.d.ServeHTTP(w, r) - }), - }, - }, + newHandlerClient(app.d), ) require.NotNil(t, iac) diff --git a/mediaCompression.go b/mediaCompression.go index 6e82bed..2a43f25 100644 --- a/mediaCompression.go +++ b/mediaCompression.go @@ -90,13 +90,14 @@ type tinify struct { } func (tf *tinify) compress(url string, upload mediaStorageSaveFunc, hc *http.Client) (string, error) { + tinifyErr := errors.New("failed to compress image using tinify") // Check url fileExtension, allowed := urlHasExt(url, "jpg", "jpeg", "png") if !allowed { return "", nil } // Compress - compressedLocation := "" + headers := http.Header{} err := requests. URL("https://api.tinify.com/shrink"). Client(hc). @@ -107,17 +108,16 @@ func (tf *tinify) compress(url string, upload mediaStorageSaveFunc, hc *http.Cli "url": url, }, }). - Handle(func(r *http.Response) error { - compressedLocation = r.Header.Get("Location") - if compressedLocation == "" { - return errors.New("location header missing") - } - return nil - }). + ToHeaders(headers). Fetch(context.Background()) if err != nil { log.Println("Tinify error:", err.Error()) - return "", errors.New("failed to compress image using tinify") + return "", tinifyErr + } + compressedLocation := headers.Get("Location") + if compressedLocation == "" { + log.Println("Tinify error: location header missing") + return "", tinifyErr } // Resize and download image var imgBuffer bytes.Buffer @@ -137,7 +137,7 @@ func (tf *tinify) compress(url string, upload mediaStorageSaveFunc, hc *http.Cli Fetch(context.Background()) if err != nil { log.Println("Tinify error:", err.Error()) - return "", errors.New("failed to compress image using tinify") + return "", tinifyErr } // Upload compressed file return uploadCompressedFile(fileExtension, &imgBuffer, upload) diff --git a/postsDeleter.go b/postsDeleter.go index da3fa0a..6468ded 100644 --- a/postsDeleter.go +++ b/postsDeleter.go @@ -26,7 +26,9 @@ func (a *goBlog) checkDeletedPosts() { for _, post := range postsToDelete { // Check if post is deleted for more than 7 days if deleted, err := dateparse.ParseLocal(post.firstParameter("deleted")); err == nil && deleted.Add(time.Hour*24*7).Before(time.Now()) { - a.deletePost(post.Path) + if err := a.deletePost(post.Path); err != nil { + log.Println("Error deleting post:", err) + } } } } diff --git a/postsDeleter_test.go b/postsDeleter_test.go index 87cf6a7..47b0af7 100644 --- a/postsDeleter_test.go +++ b/postsDeleter_test.go @@ -16,12 +16,13 @@ func Test_checkDeletedPosts(t *testing.T) { app.initComponents(false) // Create a post - app.createPost(&post{ + err := app.createPost(&post{ Content: "Test", Status: statusPublished, Path: "/testpost", Section: "posts", }) + require.NoError(t, err) // Check if post count is 1 count, err := app.db.countPosts(&postsRequestConfig{}) @@ -49,7 +50,8 @@ func Test_checkDeletedPosts(t *testing.T) { require.Equal(t, 1, count) // Set deleted time to more than 7 days ago - app.db.replacePostParam("/testpost", "deleted", []string{time.Now().Add(-time.Hour * 24 * 8).Format(time.RFC3339)}) + err = app.db.replacePostParam("/testpost", "deleted", []string{time.Now().Add(-time.Hour * 24 * 8).Format(time.RFC3339)}) + require.NoError(t, err) // Run deleter app.checkDeletedPosts() diff --git a/posts_test.go b/posts_test.go index b6d3bc6..8def9b9 100644 --- a/posts_test.go +++ b/posts_test.go @@ -34,11 +34,7 @@ func Test_serveDate(t *testing.T) { require.NoError(t, err) - client := &http.Client{ - Transport: &handlerRoundTripper{ - handler: app.d, - }, - } + client := newHandlerClient(app.d) var resString string @@ -128,11 +124,7 @@ func Test_servePost(t *testing.T) { }) require.NoError(t, err) - client := &http.Client{ - Transport: &handlerRoundTripper{ - handler: app.d, - }, - } + client := newHandlerClient(app.d) var resString string diff --git a/sitemap_test.go b/sitemap_test.go index 3dfe504..abb44d8 100644 --- a/sitemap_test.go +++ b/sitemap_test.go @@ -36,11 +36,7 @@ func Test_sitemap(t *testing.T) { }) require.NoError(t, err) - client := &http.Client{ - Transport: &handlerRoundTripper{ - handler: app.d, - }, - } + client := newHandlerClient(app.d) var resString string diff --git a/utils.go b/utils.go index 879c26b..8c91ac4 100644 --- a/utils.go +++ b/utils.go @@ -302,14 +302,15 @@ func (rt *handlerRoundTripper) RoundTrip(req *http.Request) (*http.Response, err return nil, errors.New("no handler") } +func newHandlerClient(handler http.Handler) *http.Client { + return &http.Client{Transport: &handlerRoundTripper{handler: handler}} +} + func doHandlerRequest(req *http.Request, handler http.Handler) (*http.Response, error) { - client := &http.Client{ - Transport: &handlerRoundTripper{handler: handler}, - } if req.URL.Path == "" { req.URL.Path = "/" } - return client.Do(req) + return newHandlerClient(handler).Do(req) } func saveToFile(reader io.Reader, fileName string) error { From df5098b4c0a01a05c175320c65d50d3d27105f92 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Tue, 4 Jan 2022 18:15:09 +0100 Subject: [PATCH 3/4] Fix and improve htmlText util method for TTS and use it for summary as well --- postsFuncs.go | 7 ++----- tts.go | 2 +- utils.go | 48 ++++++++++++++++++++++++++++++++++++------------ utils_test.go | 34 +++++++++++++++++++++++++++------- 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/postsFuncs.go b/postsFuncs.go index 5d5fee8..93f528d 100644 --- a/postsFuncs.go +++ b/postsFuncs.go @@ -8,7 +8,6 @@ import ( "time" gogeouri "git.jlel.se/jlelse/go-geouri" - "github.com/PuerkitoBio/goquery" "github.com/araddon/dateparse" "gopkg.in/yaml.v3" ) @@ -113,11 +112,9 @@ func (a *goBlog) postSummary(p *post) (summary string) { } html := string(a.postHtml(p, false)) if splitted := strings.Split(html, summaryDivider); len(splitted) > 1 { - doc, _ := goquery.NewDocumentFromReader(strings.NewReader(splitted[0])) - summary = doc.Text() + summary = htmlText(splitted[0]) } else { - doc, _ := goquery.NewDocumentFromReader(strings.NewReader(html)) - summary = doc.Find("p").First().Text() + summary = strings.Split(htmlText(html), "\n\n")[0] } return } diff --git a/tts.go b/tts.go index c2e0dae..7a9915b 100644 --- a/tts.go +++ b/tts.go @@ -64,7 +64,7 @@ func (a *goBlog) createPostTTSAudio(p *post) error { ssml.WriteString("") ssml.WriteString(html.EscapeString(a.renderMdTitle(p.Title()))) ssml.WriteString("") - ssml.WriteString(html.EscapeString(cleanHTMLText(string(a.postHtml(p, false))))) + ssml.WriteString(html.EscapeString(htmlText(string(a.postHtml(p, false))))) ssml.WriteString("") // Generate audio diff --git a/utils.go b/utils.go index 8c91ac4..57b2eb4 100644 --- a/utils.go +++ b/utils.go @@ -246,24 +246,48 @@ func mBytesString(size int64) string { } func htmlText(s string) string { - doc, _ := goquery.NewDocumentFromReader(strings.NewReader(s)) + // Build policy to only allow a subset of HTML tags + textPolicy := bluemonday.StrictPolicy() + textPolicy.AllowElements("h1", "h2", "h3", "h4", "h5", "h6") // Headers + textPolicy.AllowElements("p") // Paragraphs + textPolicy.AllowElements("ol", "ul", "li") // Lists + textPolicy.AllowElements("blockquote") // Blockquotes + // Filter HTML tags + htmlBuf := textPolicy.SanitizeReader(strings.NewReader(s)) + // Read HTML into document + doc, _ := goquery.NewDocumentFromReader(htmlBuf) var text strings.Builder - paragraphs := doc.Find("p") - if paragraphs.Length() == 0 { - text.WriteString(doc.Text()) + if bodyChild := doc.Find("body").Children(); bodyChild.Length() > 0 { + // Input was real HTML, so build the text from the body + // Declare recursive function to print childs + var printChilds func(childs *goquery.Selection) + printChilds = func(childs *goquery.Selection) { + childs.Each(func(i int, sel *goquery.Selection) { + if i > 0 && // Not first child + sel.Is("h1, h2, h3, h4, h5, h6, p, ol, ul, li, blockquote") { // All elements that start a new paragraph + text.WriteString("\n\n") + } + if sel.Is("ol > li") { // List item in ordered list + fmt.Fprintf(&text, "%d. ", i+1) // Add list item number + } + if sel.Children().Length() > 0 { // Has children + printChilds(sel.Children()) // Recursive call to print childs + } else { + text.WriteString(sel.Text()) // Print text + } + }) + } + printChilds(bodyChild) } else { - paragraphs.Each(func(i int, s *goquery.Selection) { - if i > 0 { - text.WriteString("\n\n") - } - text.WriteString(s.Text()) - }) + // Input was probably just text, so just use the text + text.WriteString(doc.Text()) } - r := strings.TrimSpace(text.String()) - return r + // Trim whitespace and return + return strings.TrimSpace(text.String()) } func cleanHTMLText(s string) string { + // Clean HTML with UGC policy and return text return htmlText(bluemonday.UGCPolicy().Sanitize(s)) } diff --git a/utils_test.go b/utils_test.go index 87aeda7..d458966 100644 --- a/utils_test.go +++ b/utils_test.go @@ -76,13 +76,33 @@ func Test_urlHasExt(t *testing.T) { }) } -func Test_cleanHTMLText(t *testing.T) { - assert.Equal(t, `"This is a 'test'" 😁`, cleanHTMLText(`"This is a 'test'" 😁`)) - assert.Equal(t, `Test`, cleanHTMLText(`Test`)) - assert.Equal(t, "Test\n\nTest", cleanHTMLText(`

Test

Test

`)) - assert.Equal(t, "Test\n\nTest", cleanHTMLText("

Test

\n

Test

")) - assert.Equal(t, "Test\n\nTest", cleanHTMLText("

Test

\n

Test

")) - assert.Equal(t, "Test test\n\nTest", cleanHTMLText(`

Test test

Test

`)) +func Test_htmlText(t *testing.T) { + // Text without HTML + assert.Equal(t, "This is a test", htmlText("This is a test")) + // Text without HTML and Emojis + assert.Equal(t, "This is a test 😁", htmlText("This is a test 😁")) + // Text without HTML and quoutes + assert.Equal(t, "This is a 'test'", htmlText("This is a 'test'")) + // Text with formatting (like bold or italic) + assert.Equal(t, "This is a test", htmlText("This is a test")) + assert.Equal(t, "This is a test", htmlText("This is a test")) + // Unordered list + assert.Equal(t, "Test\n\nTest", htmlText(`
  • Test
  • Test
`)) + // Ordered list + assert.Equal(t, "1. Test\n\n2. Test", htmlText(`
  1. Test
  2. Test
`)) + // Nested unordered list + assert.Equal(t, "Test\n\nTest\n\nTest", htmlText(`
  • Test
    • Test
    • Test
`)) + // Headers and paragraphs + assert.Equal(t, "Test\n\nTest", htmlText(`

Test

Test

`)) + assert.Equal(t, "Test\n\nTest\n\nTest", htmlText(`

Test

Test

Test

`)) + // Blockquote + assert.Equal(t, "Test\n\nBlockqoute content", htmlText(`

Test

Blockqoute content

`)) + // Nested blockquotes + assert.Equal(t, "Blockqoute content\n\nBlockqoute content", htmlText(`

Blockqoute content

Blockqoute content

`)) + // Code (should be ignored) + assert.Equal(t, "Test", htmlText(`

Test

Code content
`)) + // Inline code (should not be ignored) + assert.Equal(t, "Test Code content", htmlText(`

Test Code content

`)) } func Test_containsStrings(t *testing.T) { From a67df287154b203f83a6a341cb31a24e2b43669d Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Tue, 4 Jan 2022 18:23:54 +0100 Subject: [PATCH 4/4] Do speak breaks between paragraphs --- tts.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tts.go b/tts.go index 7a9915b..c9723db 100644 --- a/tts.go +++ b/tts.go @@ -64,7 +64,10 @@ func (a *goBlog) createPostTTSAudio(p *post) error { ssml.WriteString("") ssml.WriteString(html.EscapeString(a.renderMdTitle(p.Title()))) ssml.WriteString("") - ssml.WriteString(html.EscapeString(htmlText(string(a.postHtml(p, false))))) + for _, part := range strings.Split(htmlText(string(a.postHtml(p, false))), "\n\n") { + ssml.WriteString(html.EscapeString(part)) + ssml.WriteString("") + } ssml.WriteString("") // Generate audio