From 6bfaf16e25033554d8d0dacf391b234128e2cbe9 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Wed, 14 Dec 2022 14:56:27 +0100 Subject: [PATCH] Automatically fetch reply and like title (Updates #45) --- app.go | 3 + config.go | 10 + httpRouters.go | 8 +- microformats.go | 188 ++++++++++++++++++ microformats_test.go | 37 ++++ pkgs/httpcachetransport/httpCacheTransport.go | 45 +++++ .../httpCacheTransport_test.go | 50 +++++ postsDb.go | 16 ++ postsFuncs.go | 4 + settings.go | 89 +++++---- settingsDb.go | 2 + strings/de.yaml | 2 + strings/default.yaml | 2 + ui.go | 16 ++ webmention.go | 1 - webmentionVerification.go | 139 +------------ 16 files changed, 435 insertions(+), 177 deletions(-) create mode 100644 microformats.go create mode 100644 microformats_test.go create mode 100644 pkgs/httpcachetransport/httpCacheTransport.go create mode 100644 pkgs/httpcachetransport/httpCacheTransport_test.go diff --git a/app.go b/app.go index 334d7d1..1ae4995 100644 --- a/app.go +++ b/app.go @@ -75,6 +75,9 @@ type goBlog struct { compressors []mediaCompression mediaStorageInit sync.Once mediaStorage mediaStorage + // Microformats + mfInit sync.Once + mfCache *ristretto.Cache // Minify min minify.Minifier // Plugins diff --git a/config.go b/config.go index c9cb122..9995e6a 100644 --- a/config.go +++ b/config.go @@ -100,6 +100,8 @@ type configBlog struct { hideOldContentWarning bool hideShareButton bool hideTranslateButton bool + addReplyTitle bool + addLikeTitle bool // Editor state WebSockets esws sync.Map esm sync.Mutex @@ -530,6 +532,14 @@ func (a *goBlog) initConfig(logging bool) error { if err != nil { return err } + bc.addReplyTitle, err = a.getBooleanSettingValue(settingNameWithBlog(blog, addReplyTitleSetting), false) + if err != nil { + return err + } + bc.addLikeTitle, err = a.getBooleanSettingValue(settingNameWithBlog(blog, addLikeTitleSetting), false) + if err != nil { + return err + } } // Log success a.cfg.initialized = true diff --git a/httpRouters.go b/httpRouters.go index fd6245f..7c5fb35 100644 --- a/httpRouters.go +++ b/httpRouters.go @@ -467,9 +467,11 @@ func (a *goBlog) blogSettingsRouter(_ *configBlog) func(r chi.Router) { r.Post(settingsCreateSectionPath, a.settingsCreateSection) r.Post(settingsUpdateSectionPath, a.settingsUpdateSection) r.Post(settingsUpdateDefaultSectionPath, a.settingsUpdateDefaultSection) - r.Post(settingsHideOldContentWarningPath, a.settingsHideOldContentWarning) - r.Post(settingsHideShareButtonPath, a.settingsHideShareButton) - r.Post(settingsHideTranslateButtonPath, a.settingsHideTranslateButton) + r.Post(settingsHideOldContentWarningPath, a.settingsHideOldContentWarning()) + r.Post(settingsHideShareButtonPath, a.settingsHideShareButton()) + r.Post(settingsHideTranslateButtonPath, a.settingsHideTranslateButton()) + r.Post(settingsAddReplyTitlePath, a.settingsAddReplyTitle()) + r.Post(settingsAddLikeTitlePath, a.settingsAddLikeTitle()) r.Post(settingsUpdateUserPath, a.settingsUpdateUser) r.Post(settingsUpdateProfileImagePath, a.serveUpdateProfileImage) r.Post(settingsDeleteProfileImagePath, a.serveDeleteProfileImage) diff --git a/microformats.go b/microformats.go new file mode 100644 index 0000000..2d13644 --- /dev/null +++ b/microformats.go @@ -0,0 +1,188 @@ +package main + +import ( + "bytes" + "context" + "net/http" + "net/url" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/carlmjohnson/requests" + "github.com/dgraph-io/ristretto" + "go.goblog.app/app/pkgs/bufferpool" + "go.goblog.app/app/pkgs/contenttype" + "go.goblog.app/app/pkgs/httpcachetransport" + "willnorris.com/go/microformats" +) + +func (a *goBlog) initMicroformatsCache() { + a.mfInit.Do(func() { + a.mfCache, _ = ristretto.NewCache(&ristretto.Config{ + NumCounters: 100, + MaxCost: 10, // Cache http responses for 10 requests + BufferItems: 64, + IgnoreInternalCost: true, + }) + }) +} + +type microformatsResult struct { + Title, Content, Author, Url string + source string + hasUrl bool +} + +func (a *goBlog) parseMicroformats(u string, cache bool) (*microformatsResult, error) { + buf := bufferpool.Get() + defer bufferpool.Put(buf) + rb := requests.URL(u). + Method(http.MethodGet). + Accept(contenttype.HTMLUTF8). + Client(a.httpClient). + ToBytesBuffer(buf) + if cache { + a.initMicroformatsCache() + rb.Transport(httpcachetransport.NewHttpCacheTransport(a.httpClient.Transport, a.mfCache, 10*time.Minute)) + } + err := rb.Fetch(context.Background()) + if err != nil { + return nil, err + } + return a.parseMicroformatsFromBytes(u, buf.Bytes()) +} + +func (a *goBlog) parseMicroformatsFromBytes(u string, b []byte) (*microformatsResult, error) { + parsedUrl, err := url.Parse(u) + if err != nil { + return nil, err + } + m := µformatsResult{ + source: u, + } + // Fill from microformats + m.fillFromData(microformats.Parse(bytes.NewReader(b), parsedUrl)) + if m.Url == "" { + m.Url = u + } + // Set title when content is empty as well + if m.Title == "" && m.Content == "" { + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + if title := doc.Find("title"); title != nil { + m.Title = title.Text() + } + } + // Reset title if it's just a prefix of the content + if m.Title != "" && strings.HasPrefix(m.Content, m.Title) { + m.Title = "" + } + return m, nil +} + +func (m *microformatsResult) fillFromData(mf *microformats.Data) { + // Fill data + for _, i := range mf.Items { + if m.fill(i) { + break + } + } +} + +func (m *microformatsResult) fill(mf *microformats.Microformat) bool { + if mfHasType(mf, "h-entry") { + // Check URL + if url, ok := mf.Properties["url"]; ok && len(url) > 0 { + if url0, ok := url[0].(string); ok { + if strings.EqualFold(url0, m.source) { + // Is searched entry + m.hasUrl = true + m.Url = url0 + // Reset attributes to refill + m.Author = "" + m.Title = "" + m.Content = "" + } else if m.hasUrl { + // Already found entry + return false + } else if m.Url == "" { + // Is the first entry + m.Url = url0 + } else { + // Is not the first entry + return false + } + } + } + // Title + m.fillTitle(mf) + // Content + m.fillContent(mf) + // Author + m.fillAuthor(mf) + return m.hasUrl + } + for _, mfc := range mf.Children { + if m.fill(mfc) { + return true + } + } + return false +} + +func (m *microformatsResult) fillTitle(mf *microformats.Microformat) { + if m.Title != "" { + return + } + if name, ok := mf.Properties["name"]; ok && len(name) > 0 { + if title, ok := name[0].(string); ok { + m.Title = strings.TrimSpace(title) + } + } +} + +func (m *microformatsResult) fillContent(mf *microformats.Microformat) { + if m.Content != "" { + return + } + if contents, ok := mf.Properties["content"]; ok && len(contents) > 0 { + if content, ok := contents[0].(map[string]string); ok { + if contentHTML, ok := content["html"]; ok { + m.Content = cleanHTMLText(contentHTML) + // Replace newlines with spaces + m.Content = strings.ReplaceAll(m.Content, "\n", " ") + // Collapse double spaces + m.Content = strings.Join(strings.Fields(m.Content), " ") + // Trim spaces + m.Content = strings.TrimSpace(m.Content) + } + } + } +} + +func (m *microformatsResult) fillAuthor(mf *microformats.Microformat) { + if m.Author != "" { + return + } + if authors, ok := mf.Properties["author"]; ok && len(authors) > 0 { + if author, ok := authors[0].(*microformats.Microformat); ok { + if names, ok := author.Properties["name"]; ok && len(names) > 0 { + if name, ok := names[0].(string); ok { + m.Author = strings.TrimSpace(name) + } + } + } + } +} + +func mfHasType(mf *microformats.Microformat, typ string) bool { + for _, t := range mf.Type { + if typ == t { + return true + } + } + return false +} diff --git a/microformats_test.go b/microformats_test.go new file mode 100644 index 0000000..f047ed3 --- /dev/null +++ b/microformats_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "net/http" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_parseMicroformats(t *testing.T) { + + app := &goBlog{ + cfg: createDefaultTestConfig(t), + } + err := app.initConfig(false) + require.NoError(t, err) + + testHtmlBytes, err := os.ReadFile("testdata/wmtest.html") + require.NoError(t, err) + testHtml := string(testHtmlBytes) + + mockClient := newFakeHttpClient() + mockClient.setFakeResponse(http.StatusOK, testHtml) + + app.httpClient = mockClient.Client + + m, err := app.parseMicroformats("https://example.net/articles/micropub-crossposting-to-twitter-and-enabling-tweetstorms", false) + require.NoError(t, err) + + assert.Equal(t, "Micropub, Crossposting to Twitter, and Enabling “Tweetstorms”", m.Title) + assert.NotEmpty(t, m.Content) + assert.Equal(t, "Test Blogger", m.Author) + assert.Equal(t, "https://example.net/articles/micropub-crossposting-to-twitter-and-enabling-tweetstorms", m.Url) + +} diff --git a/pkgs/httpcachetransport/httpCacheTransport.go b/pkgs/httpcachetransport/httpCacheTransport.go new file mode 100644 index 0000000..656f1b3 --- /dev/null +++ b/pkgs/httpcachetransport/httpCacheTransport.go @@ -0,0 +1,45 @@ +package httpcachetransport + +import ( + "bufio" + "bytes" + "net/http" + "net/http/httputil" + "time" + + "github.com/dgraph-io/ristretto" +) + +type httpCacheTransport struct { + parent http.RoundTripper + ristrettoCache *ristretto.Cache + ttl time.Duration +} + +func (t *httpCacheTransport) RoundTrip(r *http.Request) (*http.Response, error) { + requestUrl := r.URL.String() + if t.ristrettoCache != nil { + if cached, hasCached := t.ristrettoCache.Get(requestUrl); hasCached { + if cachedResp, ok := cached.([]byte); ok { + return http.ReadResponse(bufio.NewReader(bytes.NewReader(cachedResp)), r) + } + } + } + resp, err := t.parent.RoundTrip(r) + if err == nil && t.ristrettoCache != nil { + respBytes, err := httputil.DumpResponse(resp, true) + if err != nil { + return resp, err + } + t.ristrettoCache.SetWithTTL(requestUrl, respBytes, 1, t.ttl) + t.ristrettoCache.Wait() + return http.ReadResponse(bufio.NewReader(bytes.NewReader(respBytes)), r) + } + return resp, err +} + +// Creates a new http.RoundTripper that caches all +// request responses (by the request URL) in ristretto. +func NewHttpCacheTransport(parent http.RoundTripper, ristrettoCache *ristretto.Cache, ttl time.Duration) http.RoundTripper { + return &httpCacheTransport{parent, ristrettoCache, ttl} +} diff --git a/pkgs/httpcachetransport/httpCacheTransport_test.go b/pkgs/httpcachetransport/httpCacheTransport_test.go new file mode 100644 index 0000000..0e9b7c6 --- /dev/null +++ b/pkgs/httpcachetransport/httpCacheTransport_test.go @@ -0,0 +1,50 @@ +package httpcachetransport + +import ( + "bufio" + "context" + "net/http" + "strings" + "testing" + "time" + + "github.com/carlmjohnson/requests" + "github.com/dgraph-io/ristretto" + "github.com/stretchr/testify/assert" +) + +const fakeResponse = `HTTP/1.1 200 OK +Content-Type: text/html; charset=UTF-8 +Date: Wed, 14 Dec 2022 10:34:03 GMT + + + +` + +func TestHttpCacheTransport(t *testing.T) { + cache, _ := ristretto.NewCache(&ristretto.Config{ + NumCounters: 100, + MaxCost: 10, + BufferItems: 64, + IgnoreInternalCost: true, + }) + + counter := 0 + + orig := requests.RoundTripFunc(func(req *http.Request) (res *http.Response, err error) { + counter++ + return http.ReadResponse(bufio.NewReader(strings.NewReader(fakeResponse)), req) + }) + + client := &http.Client{ + Transport: NewHttpCacheTransport(orig, cache, time.Minute), + } + + err := requests.URL("https://example.com/").Client(client).Fetch(context.Background()) + assert.NoError(t, err) + + err = requests.URL("https://example.com/").Client(client).Fetch(context.Background()) + assert.NoError(t, err) + + assert.Equal(t, 1, counter) +} diff --git a/postsDb.go b/postsDb.go index 6580abe..899bbaa 100644 --- a/postsDb.go +++ b/postsDb.go @@ -85,6 +85,22 @@ func (a *goBlog) checkPost(p *post) (err error) { } p.Parameters[pk] = pvs } + // Automatically add reply title + if replyLink := p.firstParameter(a.cfg.Micropub.ReplyParam); replyLink != "" && p.firstParameter(a.cfg.Micropub.ReplyTitleParam) == "" && + a.cfg.Blogs[p.Blog].addReplyTitle { + // Is reply, but has no reply title + if mf, err := a.parseMicroformats(replyLink, true); err == nil && mf.Title != "" { + p.addParameter(a.cfg.Micropub.ReplyTitleParam, mf.Title) + } + } + // Automatically add like title + if likeLink := p.firstParameter(a.cfg.Micropub.LikeParam); likeLink != "" && p.firstParameter(a.cfg.Micropub.LikeTitleParam) == "" && + a.cfg.Blogs[p.Blog].addLikeTitle { + // Is like, but has no like title + if mf, err := a.parseMicroformats(likeLink, true); err == nil && mf.Title != "" { + p.addParameter(a.cfg.Micropub.LikeTitleParam, mf.Title) + } + } // Check path if p.Path != "/" { p.Path = strings.TrimSuffix(p.Path, "/") diff --git a/postsFuncs.go b/postsFuncs.go index 007d90f..328b4ea 100644 --- a/postsFuncs.go +++ b/postsFuncs.go @@ -35,6 +35,10 @@ func (p *post) firstParameter(parameter string) (result string) { return } +func (p *post) addParameter(parameter, value string) { + p.Parameters[parameter] = append(p.Parameters[parameter], value) +} + func (a *goBlog) postHtml(p *post, absolute bool) (res string) { buf := bufferpool.Get() a.postHtmlToWriter(buf, p, absolute) diff --git a/settings.go b/settings.go index 6d9d878..4c24519 100644 --- a/settings.go +++ b/settings.go @@ -23,12 +23,31 @@ func (a *goBlog) serveSettings(w http.ResponseWriter, r *http.Request) { hideOldContentWarning: bc.hideOldContentWarning, hideShareButton: bc.hideShareButton, hideTranslateButton: bc.hideTranslateButton, + addReplyTitle: bc.addReplyTitle, + addLikeTitle: bc.addLikeTitle, userNick: a.cfg.User.Nick, userName: a.cfg.User.Name, }, }) } +func (a *goBlog) booleanBlogSettingHandler(settingName string, apply func(*configBlog, bool)) http.HandlerFunc { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + blog, bc := a.getBlog(r) + // Read values + settingValue := r.FormValue(settingName) == "on" + // Update + err := a.saveBooleanSettingValue(settingNameWithBlog(blog, settingName), settingValue) + if err != nil { + a.serveError(w, r, "Failed to update setting in database", http.StatusInternalServerError) + return + } + // Apply + apply(bc, settingValue) + http.Redirect(w, r, bc.getRelativePath(settingsPath), http.StatusFound) + }) +} + const settingsDeleteSectionPath = "/deletesection" func (a *goBlog) settingsDeleteSection(w http.ResponseWriter, r *http.Request) { @@ -157,53 +176,45 @@ func (a *goBlog) settingsUpdateDefaultSection(w http.ResponseWriter, r *http.Req const settingsHideOldContentWarningPath = "/oldcontentwarning" -func (a *goBlog) settingsHideOldContentWarning(w http.ResponseWriter, r *http.Request) { - blog, bc := a.getBlog(r) - // Read values - hideOldContentWarning := r.FormValue(hideOldContentWarningSetting) == "on" - // Update - err := a.saveBooleanSettingValue(settingNameWithBlog(blog, hideOldContentWarningSetting), hideOldContentWarning) - if err != nil { - a.serveError(w, r, "Failed to update setting to hide old content warning in database", http.StatusInternalServerError) - return - } - bc.hideOldContentWarning = hideOldContentWarning - a.cache.purge() - http.Redirect(w, r, bc.getRelativePath(settingsPath), http.StatusFound) +func (a *goBlog) settingsHideOldContentWarning() http.HandlerFunc { + return a.booleanBlogSettingHandler(hideOldContentWarningSetting, func(cb *configBlog, b bool) { + cb.hideOldContentWarning = b + a.cache.purge() + }) } const settingsHideShareButtonPath = "/sharebutton" -func (a *goBlog) settingsHideShareButton(w http.ResponseWriter, r *http.Request) { - blog, bc := a.getBlog(r) - // Read values - hideShareButton := r.FormValue(hideShareButtonSetting) == "on" - // Update - err := a.saveBooleanSettingValue(settingNameWithBlog(blog, hideShareButtonSetting), hideShareButton) - if err != nil { - a.serveError(w, r, "Failed to update setting to hide share button in database", http.StatusInternalServerError) - return - } - bc.hideShareButton = hideShareButton - a.cache.purge() - http.Redirect(w, r, bc.getRelativePath(settingsPath), http.StatusFound) +func (a *goBlog) settingsHideShareButton() http.HandlerFunc { + return a.booleanBlogSettingHandler(hideShareButtonSetting, func(cb *configBlog, b bool) { + cb.hideShareButton = b + a.cache.purge() + }) } const settingsHideTranslateButtonPath = "/translatebutton" -func (a *goBlog) settingsHideTranslateButton(w http.ResponseWriter, r *http.Request) { - blog, bc := a.getBlog(r) - // Read values - hideTranslateButton := r.FormValue(hideTranslateButtonSetting) == "on" - // Update - err := a.saveBooleanSettingValue(settingNameWithBlog(blog, hideTranslateButtonSetting), hideTranslateButton) - if err != nil { - a.serveError(w, r, "Failed to update setting to hide translate button in database", http.StatusInternalServerError) - return - } - bc.hideTranslateButton = hideTranslateButton - a.cache.purge() - http.Redirect(w, r, bc.getRelativePath(settingsPath), http.StatusFound) +func (a *goBlog) settingsHideTranslateButton() http.HandlerFunc { + return a.booleanBlogSettingHandler(hideTranslateButtonSetting, func(cb *configBlog, b bool) { + cb.hideTranslateButton = b + a.cache.purge() + }) +} + +const settingsAddReplyTitlePath = "/replytitle" + +func (a *goBlog) settingsAddReplyTitle() http.HandlerFunc { + return a.booleanBlogSettingHandler(addReplyTitleSetting, func(cb *configBlog, b bool) { + cb.addReplyTitle = b + }) +} + +const settingsAddLikeTitlePath = "/liketitle" + +func (a *goBlog) settingsAddLikeTitle() http.HandlerFunc { + return a.booleanBlogSettingHandler(addLikeTitleSetting, func(cb *configBlog, b bool) { + cb.addLikeTitle = b + }) } const settingsUpdateUserPath = "/user" diff --git a/settingsDb.go b/settingsDb.go index e46a919..325f337 100644 --- a/settingsDb.go +++ b/settingsDb.go @@ -19,6 +19,8 @@ const ( hideTranslateButtonSetting = "hidetranslatebutton" userNickSetting = "usernick" userNameSetting = "username" + addReplyTitleSetting = "addreplytitle" + addLikeTitleSetting = "addliketitle" ) func (a *goBlog) getSettingValue(name string) (string, error) { diff --git a/strings/de.yaml b/strings/de.yaml index 27aaa70..9568f4d 100644 --- a/strings/de.yaml +++ b/strings/de.yaml @@ -1,4 +1,6 @@ acommentby: "Ein Kommentar von" +addliketitledesc: "Automatisch einen Like-Titel zu neuen und aktualisierten Beiträgen mit einem Like-Link ohne manuell gesetzten Like-Titel hinzufügen." +addreplytitledesc: "Automatisch einen Reply-Titel zu neuen und aktualisierten Beiträgen mit einem Reply-Link ohne manuell gesetzten Reply-Titel hinzufügen." captchainstructions: "Bitte gib die Ziffern aus dem oberen Bild ein" chars: "Buchstaben" comment: "Kommentar" diff --git a/strings/default.yaml b/strings/default.yaml index cd0f509..9c730a6 100644 --- a/strings/default.yaml +++ b/strings/default.yaml @@ -1,4 +1,6 @@ acommentby: "A comment by" +addliketitledesc: "Automatically add like title to new and updated posts with a like link and no manually set like title." +addreplytitledesc: "Automatically add reply title to new and updated posts with a reply link and no manually set reply title." apfollower: "Follower" apfollowers: "ActivityPub followers" apinbox: "Inbox" diff --git a/ui.go b/ui.go index 80e79ed..d4fc1a9 100644 --- a/ui.go +++ b/ui.go @@ -1536,6 +1536,8 @@ type settingsRenderData struct { hideOldContentWarning bool hideShareButton bool hideTranslateButton bool + addReplyTitle bool + addLikeTitle bool userNick string userName string } @@ -1584,6 +1586,20 @@ func (a *goBlog) renderSettings(hb *htmlbuilder.HtmlBuilder, rd *renderData) { hideTranslateButtonSetting, srd.hideTranslateButton, ) + // Add reply title + a.renderBooleanSetting(hb, rd, + rd.Blog.getRelativePath(settingsPath+settingsAddReplyTitlePath), + a.ts.GetTemplateStringVariant(rd.Blog.Lang, "addreplytitledesc"), + addReplyTitleSetting, + srd.addReplyTitle, + ) + // Add like title + a.renderBooleanSetting(hb, rd, + rd.Blog.getRelativePath(settingsPath+settingsAddLikeTitlePath), + a.ts.GetTemplateStringVariant(rd.Blog.Lang, "addliketitledesc"), + addLikeTitleSetting, + srd.addLikeTitle, + ) // User settings a.renderUserSettings(hb, rd, srd) diff --git a/webmention.go b/webmention.go index 020f1ae..52adab4 100644 --- a/webmention.go +++ b/webmention.go @@ -34,7 +34,6 @@ type mention struct { Author string Status webmentionStatus Submentions []*mention - hasUrl bool } func (a *goBlog) initWebmention() { diff --git a/webmentionVerification.go b/webmentionVerification.go index 0149936..e92c0d4 100644 --- a/webmentionVerification.go +++ b/webmentionVerification.go @@ -9,15 +9,12 @@ import ( "io" "log" "net/http" - "net/url" "strings" "time" - "github.com/PuerkitoBio/goquery" "github.com/samber/lo" "go.goblog.app/app/pkgs/bufferpool" "go.goblog.app/app/pkgs/contenttype" - "willnorris.com/go/microformats" ) func (a *goBlog) initWebmentionQueue() { @@ -150,9 +147,9 @@ func (a *goBlog) verifyMention(m *mention) error { } func (a *goBlog) verifyReader(m *mention, body io.Reader) error { - linksBuffer, gqBuffer, mfBuffer := bufferpool.Get(), bufferpool.Get(), bufferpool.Get() - defer bufferpool.Put(linksBuffer, gqBuffer, mfBuffer) - if _, err := io.Copy(io.MultiWriter(linksBuffer, gqBuffer, mfBuffer), body); err != nil { + linksBuffer, mfBuffer := bufferpool.Get(), bufferpool.Get() + defer bufferpool.Put(linksBuffer, mfBuffer) + if _, err := io.Copy(io.MultiWriter(linksBuffer, mfBuffer), body); err != nil { return err } // Check if source mentions target @@ -187,136 +184,10 @@ func (a *goBlog) verifyReader(m *mention, body io.Reader) error { return errors.New("target not found in source") } // Fill mention attributes - sourceURL, err := url.Parse(defaultIfEmpty(m.NewSource, m.Source)) + mf, err := a.parseMicroformatsFromBytes(defaultIfEmpty(m.NewSource, m.Source), mfBuffer.Bytes()) if err != nil { return err } - m.Title = "" - m.Content = "" - m.Author = "" - m.Url = "" - m.hasUrl = false - m.fillFromData(microformats.Parse(mfBuffer, sourceURL)) - if m.Url == "" { - m.Url = m.Source - } - // Set title when content is empty as well - if m.Title == "" && m.Content == "" { - doc, err := goquery.NewDocumentFromReader(gqBuffer) - if err != nil { - return err - } - if title := doc.Find("title"); title != nil { - m.Title = title.Text() - } - } - // Reset title if it's just a prefix of the content - if m.Title != "" && strings.HasPrefix(m.Content, m.Title) { - m.Title = "" - } + m.Title, m.Content, m.Author, m.Url = mf.Title, mf.Content, mf.Author, defaultIfEmpty(mf.Url, m.Source) return nil } - -func (m *mention) fillFromData(mf *microformats.Data) { - // Fill data - for _, i := range mf.Items { - if m.fill(i) { - break - } - } -} - -func (m *mention) fill(mf *microformats.Microformat) bool { - if mfHasType(mf, "h-entry") { - // Check URL - if url, ok := mf.Properties["url"]; ok && len(url) > 0 { - if url0, ok := url[0].(string); ok { - if strings.EqualFold(url0, defaultIfEmpty(m.NewSource, m.Source)) { - // Is searched entry - m.hasUrl = true - m.Url = url0 - // Reset attributes to refill - m.Author = "" - m.Title = "" - m.Content = "" - } else if m.hasUrl { - // Already found entry - return false - } else if m.Url == "" { - // Is the first entry - m.Url = url0 - } else { - // Is not the first entry - return false - } - } - } - // Title - m.fillTitle(mf) - // Content - m.fillContent(mf) - // Author - m.fillAuthor(mf) - return m.hasUrl - } - for _, mfc := range mf.Children { - if m.fill(mfc) { - return true - } - } - return false -} - -func (m *mention) fillTitle(mf *microformats.Microformat) { - if m.Title != "" { - return - } - if name, ok := mf.Properties["name"]; ok && len(name) > 0 { - if title, ok := name[0].(string); ok { - m.Title = strings.TrimSpace(title) - } - } -} - -func (m *mention) fillContent(mf *microformats.Microformat) { - if m.Content != "" { - return - } - if contents, ok := mf.Properties["content"]; ok && len(contents) > 0 { - if content, ok := contents[0].(map[string]string); ok { - if contentHTML, ok := content["html"]; ok { - m.Content = cleanHTMLText(contentHTML) - // Replace newlines with spaces - m.Content = strings.ReplaceAll(m.Content, "\n", " ") - // Collapse double spaces - m.Content = strings.Join(strings.Fields(m.Content), " ") - // Trim spaces - m.Content = strings.TrimSpace(m.Content) - } - } - } -} - -func (m *mention) fillAuthor(mf *microformats.Microformat) { - if m.Author != "" { - return - } - if authors, ok := mf.Properties["author"]; ok && len(authors) > 0 { - if author, ok := authors[0].(*microformats.Microformat); ok { - if names, ok := author.Properties["name"]; ok && len(names) > 0 { - if name, ok := names[0].(string); ok { - m.Author = strings.TrimSpace(name) - } - } - } - } -} - -func mfHasType(mf *microformats.Microformat, typ string) bool { - for _, t := range mf.Type { - if typ == t { - return true - } - } - return false -}