From 29e29bbf6f9b8120a414c80518f9468a0b8c15cf Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Mon, 16 Nov 2020 12:05:34 +0100 Subject: [PATCH] Fix crash because of webmention verification --- go.mod | 6 +- go.sum | 4 +- webmentions.go | 7 +- webmentionsVerify.go | 204 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 10 deletions(-) create mode 100644 webmentionsVerify.go diff --git a/go.mod b/go.mod index 9b49fea..7f44f15 100644 --- a/go.mod +++ b/go.mod @@ -46,16 +46,16 @@ require ( go.uber.org/zap v1.16.0 // indirect golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect - golang.org/x/net v0.0.0-20201110031124-69a78807bb2b // indirect + golang.org/x/net v0.0.0-20201110031124-69a78807bb2b golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 golang.org/x/sys v0.0.0-20201113233024-12cec1faf1ba // indirect golang.org/x/text v0.3.4 // indirect - golang.org/x/tools v0.0.0-20201113202037-1643af1435f3 // indirect + golang.org/x/tools v0.0.0-20201116002733-ac45abd4c88c // indirect gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b // indirect gopkg.in/ini.v1 v1.62.0 // indirect gopkg.in/yaml.v2 v2.3.0 // indirect gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 honnef.co/go/tools v0.0.1-2020.1.6 // indirect - willnorris.com/go/microformats v1.1.1 // indirect + willnorris.com/go/microformats v1.1.1 willnorris.com/go/webmention v0.0.0-20200623235404-057ea514ab98 ) diff --git a/go.sum b/go.sum index 9a63268..3ed0ac7 100644 --- a/go.sum +++ b/go.sum @@ -661,8 +661,8 @@ golang.org/x/tools v0.0.0-20191216052735-49a3e744a425 h1:VvQyQJN0tSuecqgcIxMWnnf golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20201113202037-1643af1435f3 h1:7R7+wzd5VuLvCNyHZ/MG511kkoP/DBEzkbh8qUsFbY8= -golang.org/x/tools v0.0.0-20201113202037-1643af1435f3/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201116002733-ac45abd4c88c h1:quJUizHRFn7XriXTIOCLKSr76x2cMbNGfvfy9ubOO0g= +golang.org/x/tools v0.0.0-20201116002733-ac45abd4c88c/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= diff --git a/webmentions.go b/webmentions.go index b8489ab..40dbd21 100644 --- a/webmentions.go +++ b/webmentions.go @@ -1,7 +1,6 @@ package main import ( - "context" "database/sql" "fmt" "log" @@ -43,8 +42,8 @@ func initWebmention() { func startWebmentionVerifier() { go func() { for { - verifyNextWebmention() time.Sleep(30 * time.Second) + verifyNextWebmention() } }() } @@ -148,9 +147,7 @@ func verifyNextWebmention() error { Source: m.Source, Target: m.Target, } - if err := wmd.Verify(context.Background(), wmm, func(c *wmd.VerifyOptions) { - c.MaxRedirects = 15 - }); err != nil { + if err := wmVerify(wmm); err != nil { // Invalid return deleteWebmention(m.ID) } diff --git a/webmentionsVerify.go b/webmentionsVerify.go new file mode 100644 index 0000000..58dcbc8 --- /dev/null +++ b/webmentionsVerify.go @@ -0,0 +1,204 @@ +package main + +// Copied from https://github.com/zerok/webmentiond/blob/main/pkg/webmention/verify.go and modified + +import ( + "bytes" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "github.com/zerok/webmentiond/pkg/webmention" + "golang.org/x/net/html" + "willnorris.com/go/microformats" +) + +type wmVerifyOptions struct { + MaxRedirects int +} + +func wmVerify(mention *webmention.Mention) error { + client := &http.Client{} + client.CheckRedirect = func(r *http.Request, via []*http.Request) error { + if len(via) > 15 { + return errors.New("too many redirects") + } + return nil + } + req, err := http.NewRequest(http.MethodGet, mention.Source, nil) + if err != nil { + return err + } + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + return wmVerifyReader(resp.Body, mention) +} + +func wmVerifyReader(body io.Reader, mention *webmention.Mention) error { + var tokenBuffer bytes.Buffer + var mfBuffer bytes.Buffer + sourceURL, err := url.Parse(mention.Source) + if err != nil { + return err + } + io.Copy(io.MultiWriter(&tokenBuffer, &mfBuffer), body) + tokenizer := html.NewTokenizer(&tokenBuffer) + mf := microformats.Parse(&mfBuffer, sourceURL) + inTitle := false + inAudio := false + inVideo := false + title := "" + u, err := url.Parse(mention.Source) + if err == nil { + title = u.Hostname() + } + var contentOK bool +loop: + for { + tt := tokenizer.Next() + switch tt { + case html.TextToken: + if inTitle { + title = strings.TrimSpace(string(tokenizer.Text())) + } + case html.EndTagToken: + tagName, _ := tokenizer.TagName() + switch string(tagName) { + case "title": + inTitle = false + case "audio": + inAudio = false + case "video": + inVideo = false + } + case html.ErrorToken: + err := tokenizer.Err() + if err == io.EOF { + break loop + } + return err + case html.SelfClosingTagToken: + fallthrough + case html.StartTagToken: + tagName, _ := tokenizer.TagName() + switch string(tagName) { + case "title": + inTitle = true + case "audio": + inAudio = true + case "video": + inVideo = true + case "source": + if inVideo || inAudio { + src := getAttr(tokenizer, "src") + if src == mention.Target { + mention.Title = title + contentOK = true + continue + } + } + case "img": + src := getAttr(tokenizer, "src") + if src == mention.Target { + mention.Title = title + contentOK = true + continue + } + case "a": + href := getAttr(tokenizer, "href") + if href == mention.Target { + mention.Title = title + contentOK = true + continue + } + } + + } + } + if !contentOK { + return fmt.Errorf("target not found in content") + } + mfFillMentionFromData(mention, mf) + return nil +} + +func mfFillMentionFromData(mention *webmention.Mention, mf *microformats.Data) { + for _, i := range mf.Items { + mfFillMention(mention, i) + } +} + +func mfFillMention(mention *webmention.Mention, mf *microformats.Microformat) bool { + if mfHasType(mf, "h-entry") { + if name, ok := mf.Properties["name"]; ok && len(name) > 0 { + if title, ok := name[0].(string); ok { + mention.Title = title + } + } + if commented, ok := mf.Properties["in-reply-to"]; ok && len(commented) > 0 { + if commentedItem, ok := commented[0].(string); ok && commentedItem == mention.Target { + mention.Type = "comment" + } + } + if commented, ok := mf.Properties["like-of"]; ok && len(commented) > 0 { + if commentedItem, ok := commented[0].(string); ok && commentedItem == mention.Target { + mention.Type = "like" + } + } + if contents, ok := mf.Properties["content"]; ok && len(contents) > 0 { + if content, ok := contents[0].(map[string]interface{}); ok { + if rawContentValue, ok := content["value"]; ok { + if contentValue, ok := rawContentValue.(string); ok { + mention.Content = contentValue + } + } + } + } + if authors, ok := mf.Properties["author"]; ok && len(authors) > 0 { + if author, ok := authors[0].(*microformats.Microformat); ok { + if names, ok := author.Properties["name"]; ok && len(names) > 0 { + if authorName, ok := names[0].(string); ok { + mention.AuthorName = authorName + } + } + } + } + return true + } else if len(mf.Children) > 0 { + for _, m := range mf.Children { + if mfFillMention(mention, m) { + return true + } + } + } + return false +} + +func mfHasType(mf *microformats.Microformat, typ string) bool { + for _, t := range mf.Type { + if typ == t { + return true + } + } + return false +} + +func getAttr(tokenizer *html.Tokenizer, attr string) string { + var result string + for { + key, value, more := tokenizer.TagAttr() + if string(key) == attr { + result = string(value) + } + if !more { + break + } + } + return result +}