2020-11-16 11:05:34 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2021-05-24 07:12:46 +00:00
|
|
|
"encoding/gob"
|
2020-11-16 11:05:34 +00:00
|
|
|
"errors"
|
2020-11-16 17:34:29 +00:00
|
|
|
"fmt"
|
2020-11-16 11:05:34 +00:00
|
|
|
"io"
|
2020-11-25 11:36:14 +00:00
|
|
|
"log"
|
2020-11-16 11:05:34 +00:00
|
|
|
"net/http"
|
|
|
|
"net/url"
|
2021-02-27 07:31:06 +00:00
|
|
|
"strings"
|
2021-05-24 07:12:46 +00:00
|
|
|
"time"
|
2020-11-16 11:05:34 +00:00
|
|
|
|
2020-11-16 13:18:14 +00:00
|
|
|
"github.com/PuerkitoBio/goquery"
|
2021-04-16 18:00:38 +00:00
|
|
|
"github.com/thoas/go-funk"
|
2021-11-19 16:36:03 +00:00
|
|
|
"go.goblog.app/app/pkgs/contenttype"
|
2020-11-16 11:05:34 +00:00
|
|
|
"willnorris.com/go/microformats"
|
|
|
|
)
|
|
|
|
|
2021-06-06 12:39:42 +00:00
|
|
|
func (a *goBlog) initWebmentionQueue() {
|
2020-11-16 17:34:29 +00:00
|
|
|
go func() {
|
|
|
|
for {
|
2021-06-06 12:39:42 +00:00
|
|
|
qi, err := a.db.peekQueue("wm")
|
2021-05-24 07:12:46 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Println(err.Error())
|
|
|
|
continue
|
|
|
|
} else if qi != nil {
|
|
|
|
var m mention
|
|
|
|
err = gob.NewDecoder(bytes.NewReader(qi.content)).Decode(&m)
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err.Error())
|
2021-06-06 12:39:42 +00:00
|
|
|
_ = a.db.dequeue(qi)
|
2020-11-25 11:36:14 +00:00
|
|
|
continue
|
|
|
|
}
|
2021-06-06 12:39:42 +00:00
|
|
|
err = a.verifyMention(&m)
|
2021-05-24 07:12:46 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Println(fmt.Sprintf("Failed to verify webmention from %s to %s: %s", m.Source, m.Target, err.Error()))
|
|
|
|
}
|
2021-06-06 12:39:42 +00:00
|
|
|
err = a.db.dequeue(qi)
|
2021-05-24 07:12:46 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Println(err.Error())
|
2020-11-25 11:36:14 +00:00
|
|
|
}
|
2021-05-24 08:44:43 +00:00
|
|
|
} else {
|
|
|
|
// No item in the queue, wait a moment
|
|
|
|
time.Sleep(15 * time.Second)
|
2020-11-25 11:36:14 +00:00
|
|
|
}
|
2020-11-16 11:05:34 +00:00
|
|
|
}
|
2020-11-16 17:34:29 +00:00
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
2021-06-06 12:39:42 +00:00
|
|
|
func (a *goBlog) queueMention(m *mention) error {
|
|
|
|
if wm := a.cfg.Webmention; wm != nil && wm.DisableReceiving {
|
2021-04-23 17:36:57 +00:00
|
|
|
return errors.New("webmention receiving disabled")
|
|
|
|
}
|
2021-05-24 07:12:46 +00:00
|
|
|
var buf bytes.Buffer
|
|
|
|
if err := gob.NewEncoder(&buf).Encode(m); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-06-06 12:39:42 +00:00
|
|
|
return a.db.enqueue("wm", buf.Bytes(), time.Now())
|
2020-11-25 11:36:14 +00:00
|
|
|
}
|
|
|
|
|
2021-06-06 12:39:42 +00:00
|
|
|
func (a *goBlog) verifyMention(m *mention) error {
|
2021-11-19 16:36:03 +00:00
|
|
|
// Request target
|
|
|
|
targetReq, err := http.NewRequest(http.MethodGet, m.Target, nil)
|
2020-11-16 17:34:29 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-11-19 16:36:03 +00:00
|
|
|
targetReq.Header.Set("Accept", contenttype.HTMLUTF8)
|
|
|
|
setLoggedIn(targetReq, true)
|
|
|
|
targetResp, err := doHandlerRequest(targetReq, a.getAppRouter())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// Check if target has a valid status code
|
|
|
|
if targetResp.StatusCode != http.StatusOK {
|
2021-11-19 16:57:38 +00:00
|
|
|
if a.cfg.Debug {
|
2021-11-19 22:30:36 +00:00
|
|
|
a.debug(fmt.Sprintf("Webmention for unknown path: %s", m.Target))
|
2021-11-19 16:57:38 +00:00
|
|
|
}
|
2021-11-19 17:21:09 +00:00
|
|
|
return a.db.deleteWebmention(m)
|
2021-11-19 16:36:03 +00:00
|
|
|
}
|
|
|
|
// Check if target has a redirect
|
|
|
|
if respReq := targetResp.Request; respReq != nil {
|
|
|
|
if ru := respReq.URL; m.Target != ru.String() {
|
|
|
|
m.NewTarget = ru.String()
|
2021-06-06 12:39:42 +00:00
|
|
|
}
|
2021-11-19 16:36:03 +00:00
|
|
|
}
|
|
|
|
// Request source
|
|
|
|
sourceReq, err := http.NewRequest(http.MethodGet, m.Source, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
sourceReq.Header.Set("Accept", contenttype.HTMLUTF8)
|
|
|
|
var sourceResp *http.Response
|
|
|
|
if strings.HasPrefix(m.Source, a.cfg.Server.PublicAddress) ||
|
|
|
|
(a.cfg.Server.ShortPublicAddress != "" && strings.HasPrefix(m.Source, a.cfg.Server.ShortPublicAddress)) {
|
|
|
|
setLoggedIn(sourceReq, true)
|
|
|
|
sourceResp, err = doHandlerRequest(sourceReq, a.getAppRouter())
|
2021-05-14 16:24:02 +00:00
|
|
|
} else {
|
2021-11-19 16:36:03 +00:00
|
|
|
sourceReq.Header.Set(userAgent, appUserAgent)
|
|
|
|
sourceResp, err = a.httpClient.Do(sourceReq)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// Check if source has a valid status code
|
|
|
|
if sourceResp.StatusCode != http.StatusOK {
|
2021-11-19 16:57:38 +00:00
|
|
|
if a.cfg.Debug {
|
2021-11-19 22:30:36 +00:00
|
|
|
a.debug(fmt.Sprintf("Delete webmention because source doesn't have valid status code: %s", m.Source))
|
2021-11-19 16:57:38 +00:00
|
|
|
}
|
2021-11-19 16:36:03 +00:00
|
|
|
return a.db.deleteWebmention(m)
|
2020-11-16 11:05:34 +00:00
|
|
|
}
|
2021-11-10 18:24:54 +00:00
|
|
|
// Check if source has a redirect
|
2021-11-19 16:36:03 +00:00
|
|
|
if respReq := sourceResp.Request; respReq != nil {
|
2021-11-10 21:24:36 +00:00
|
|
|
if ru := respReq.URL; m.Source != ru.String() {
|
|
|
|
m.NewSource = ru.String()
|
|
|
|
}
|
2021-11-10 18:24:54 +00:00
|
|
|
}
|
|
|
|
// Parse response body
|
2021-11-19 17:21:09 +00:00
|
|
|
err = a.verifyReader(m, sourceResp.Body)
|
2021-11-19 16:36:03 +00:00
|
|
|
_ = sourceResp.Body.Close()
|
2020-11-25 11:36:14 +00:00
|
|
|
if err != nil {
|
2021-11-19 16:57:38 +00:00
|
|
|
if a.cfg.Debug {
|
2021-11-19 22:30:36 +00:00
|
|
|
a.debug(fmt.Sprintf("Delete webmention because verifying %s threw error: %s", m.Source, err.Error()))
|
2021-11-19 16:57:38 +00:00
|
|
|
}
|
2021-11-19 16:36:03 +00:00
|
|
|
return a.db.deleteWebmention(m)
|
2020-11-16 17:34:29 +00:00
|
|
|
}
|
2021-06-15 20:20:54 +00:00
|
|
|
if cr := []rune(m.Content); len(cr) > 500 {
|
|
|
|
m.Content = string(cr[0:497]) + "…"
|
2020-11-16 17:34:29 +00:00
|
|
|
}
|
2021-06-15 20:20:54 +00:00
|
|
|
if tr := []rune(m.Title); len(tr) > 60 {
|
|
|
|
m.Title = string(tr[0:57]) + "…"
|
2020-12-13 09:39:00 +00:00
|
|
|
}
|
2020-11-16 17:34:29 +00:00
|
|
|
newStatus := webmentionStatusVerified
|
2021-11-19 16:36:03 +00:00
|
|
|
// Update or insert webmention
|
|
|
|
if a.db.webmentionExists(m) {
|
2021-11-19 16:57:38 +00:00
|
|
|
if a.cfg.Debug {
|
2021-11-19 22:30:36 +00:00
|
|
|
a.debug(fmt.Sprintf("Update webmention: %s => %s", m.Source, m.Target))
|
2021-11-19 16:57:38 +00:00
|
|
|
}
|
2021-11-10 18:24:54 +00:00
|
|
|
// Update webmention
|
2021-11-19 16:36:03 +00:00
|
|
|
err = a.db.updateWebmention(m, newStatus)
|
2021-11-10 21:24:36 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-11-25 11:36:14 +00:00
|
|
|
} else {
|
2021-11-10 18:24:54 +00:00
|
|
|
if m.NewSource != "" {
|
|
|
|
m.Source = m.NewSource
|
|
|
|
}
|
2021-11-19 16:36:03 +00:00
|
|
|
if m.NewTarget != "" {
|
|
|
|
m.Target = m.NewTarget
|
|
|
|
}
|
2021-11-10 21:24:36 +00:00
|
|
|
err = a.db.insertWebmention(m, newStatus)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-11-19 16:36:03 +00:00
|
|
|
a.sendNotification(fmt.Sprintf("New webmention from %s to %s", defaultIfEmpty(m.NewSource, m.Source), defaultIfEmpty(m.NewTarget, m.Target)))
|
2020-11-16 17:34:29 +00:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-11-19 17:21:09 +00:00
|
|
|
func (a *goBlog) verifyReader(m *mention, body io.Reader) error {
|
2020-11-16 13:18:14 +00:00
|
|
|
var linksBuffer, gqBuffer, mfBuffer bytes.Buffer
|
2021-02-08 17:51:07 +00:00
|
|
|
if _, err := io.Copy(io.MultiWriter(&linksBuffer, &gqBuffer, &mfBuffer), body); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-11-16 13:18:14 +00:00
|
|
|
// Check if source mentions target
|
2021-11-10 18:24:54 +00:00
|
|
|
links, err := allLinksFromHTML(&linksBuffer, defaultIfEmpty(m.NewSource, m.Source))
|
2020-11-16 11:05:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-04-16 18:00:38 +00:00
|
|
|
if _, hasLink := funk.FindString(links, func(s string) bool {
|
2021-11-19 17:21:09 +00:00
|
|
|
// Check if link belongs to installation
|
|
|
|
hasShortPrefix := a.cfg.Server.ShortPublicAddress != "" && strings.HasPrefix(s, a.cfg.Server.ShortPublicAddress)
|
|
|
|
hasLongPrefix := strings.HasPrefix(s, a.cfg.Server.PublicAddress)
|
|
|
|
if !hasShortPrefix && !hasLongPrefix {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// Check if link is or redirects to target
|
|
|
|
req, err := http.NewRequest(http.MethodGet, m.Target, nil)
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
req.Header.Set("Accept", contenttype.HTMLUTF8)
|
|
|
|
setLoggedIn(req, true)
|
|
|
|
resp, err := doHandlerRequest(req, a.getAppRouter())
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if resp.StatusCode == http.StatusOK && unescapedPath(resp.Request.URL.String()) == unescapedPath(defaultIfEmpty(m.NewTarget, m.Target)) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
2021-04-16 18:00:38 +00:00
|
|
|
}); !hasLink {
|
2020-11-16 13:18:14 +00:00
|
|
|
return errors.New("target not found in source")
|
2020-11-16 11:05:34 +00:00
|
|
|
}
|
2020-11-16 13:18:14 +00:00
|
|
|
// Fill mention attributes
|
2021-11-10 18:24:54 +00:00
|
|
|
sourceURL, err := url.Parse(defaultIfEmpty(m.NewSource, m.Source))
|
2020-11-16 13:18:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-06-15 20:20:54 +00:00
|
|
|
m.Title = ""
|
|
|
|
m.Content = ""
|
|
|
|
m.Author = ""
|
2021-11-19 21:17:15 +00:00
|
|
|
m.Url = ""
|
|
|
|
m.hasUrl = false
|
2020-11-25 11:36:14 +00:00
|
|
|
m.fillFromData(microformats.Parse(&mfBuffer, sourceURL))
|
2021-11-19 21:17:15 +00:00
|
|
|
if m.Url == "" {
|
|
|
|
m.Url = m.Source
|
|
|
|
}
|
2021-06-15 20:20:54 +00:00
|
|
|
// Set title when content is empty as well
|
|
|
|
if m.Title == "" && m.Content == "" {
|
|
|
|
doc, err := goquery.NewDocumentFromReader(&gqBuffer)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if title := doc.Find("title"); title != nil {
|
|
|
|
m.Title = title.Text()
|
|
|
|
}
|
|
|
|
}
|
2020-11-16 11:05:34 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-11-25 11:36:14 +00:00
|
|
|
func (m *mention) fillFromData(mf *microformats.Data) {
|
2021-11-19 21:17:15 +00:00
|
|
|
// Fill data
|
2020-11-16 11:05:34 +00:00
|
|
|
for _, i := range mf.Items {
|
2021-11-19 21:17:15 +00:00
|
|
|
if m.fill(i) {
|
|
|
|
break
|
|
|
|
}
|
2020-11-16 11:05:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-25 11:36:14 +00:00
|
|
|
func (m *mention) fill(mf *microformats.Microformat) bool {
|
2020-11-16 11:05:34 +00:00
|
|
|
if mfHasType(mf, "h-entry") {
|
2021-05-23 18:11:48 +00:00
|
|
|
// Check URL
|
|
|
|
if url, ok := mf.Properties["url"]; ok && len(url) > 0 {
|
|
|
|
if url0, ok := url[0].(string); ok {
|
2021-11-19 21:17:15 +00:00
|
|
|
if strings.EqualFold(url0, defaultIfEmpty(m.NewSource, m.Source)) {
|
|
|
|
// Is searched entry
|
|
|
|
m.hasUrl = true
|
|
|
|
m.Url = url0
|
|
|
|
// Reset attributes to refill
|
|
|
|
m.Author = ""
|
|
|
|
m.Title = ""
|
|
|
|
m.Content = ""
|
|
|
|
} else if m.hasUrl {
|
|
|
|
// Already found entry
|
|
|
|
return false
|
|
|
|
} else if m.Url == "" {
|
|
|
|
// Is the first entry
|
|
|
|
m.Url = url0
|
|
|
|
} else {
|
|
|
|
// Is not the first entry
|
2021-05-23 18:11:48 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Title
|
2021-06-18 12:32:03 +00:00
|
|
|
m.fillTitle(mf)
|
2021-05-23 18:11:48 +00:00
|
|
|
// Content
|
2021-06-18 12:32:03 +00:00
|
|
|
m.fillContent(mf)
|
2021-05-23 18:11:48 +00:00
|
|
|
// Author
|
2021-06-18 12:32:03 +00:00
|
|
|
m.fillAuthor(mf)
|
2021-11-19 21:53:25 +00:00
|
|
|
return m.hasUrl
|
2021-06-18 12:32:03 +00:00
|
|
|
}
|
|
|
|
for _, mfc := range mf.Children {
|
|
|
|
if m.fill(mfc) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *mention) fillTitle(mf *microformats.Microformat) {
|
2021-11-19 21:53:25 +00:00
|
|
|
if m.Title != "" {
|
|
|
|
return
|
|
|
|
}
|
2021-06-18 12:32:03 +00:00
|
|
|
if name, ok := mf.Properties["name"]; ok && len(name) > 0 {
|
|
|
|
if title, ok := name[0].(string); ok {
|
|
|
|
m.Title = strings.TrimSpace(title)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *mention) fillContent(mf *microformats.Microformat) {
|
2021-11-19 21:53:25 +00:00
|
|
|
if m.Content != "" {
|
|
|
|
return
|
|
|
|
}
|
2021-06-18 12:32:03 +00:00
|
|
|
if contents, ok := mf.Properties["content"]; ok && len(contents) > 0 {
|
|
|
|
if content, ok := contents[0].(map[string]string); ok {
|
2021-11-10 21:24:36 +00:00
|
|
|
if contentHTML, ok := content["html"]; ok {
|
|
|
|
m.Content = cleanHTMLText(contentHTML)
|
2021-11-19 16:36:03 +00:00
|
|
|
// Replace newlines with spaces
|
|
|
|
m.Content = strings.ReplaceAll(m.Content, "\n", " ")
|
|
|
|
// Collapse double spaces
|
|
|
|
m.Content = strings.Join(strings.Fields(m.Content), " ")
|
|
|
|
// Trim spaces
|
|
|
|
m.Content = strings.TrimSpace(m.Content)
|
2020-11-16 11:05:34 +00:00
|
|
|
}
|
|
|
|
}
|
2021-05-23 18:11:48 +00:00
|
|
|
}
|
2021-06-18 12:32:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (m *mention) fillAuthor(mf *microformats.Microformat) {
|
2021-11-19 21:53:25 +00:00
|
|
|
if m.Author != "" {
|
|
|
|
return
|
|
|
|
}
|
2021-06-18 12:32:03 +00:00
|
|
|
if authors, ok := mf.Properties["author"]; ok && len(authors) > 0 {
|
|
|
|
if author, ok := authors[0].(*microformats.Microformat); ok {
|
|
|
|
if names, ok := author.Properties["name"]; ok && len(names) > 0 {
|
|
|
|
if name, ok := names[0].(string); ok {
|
|
|
|
m.Author = strings.TrimSpace(name)
|
|
|
|
}
|
2020-11-16 11:05:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func mfHasType(mf *microformats.Microformat, typ string) bool {
|
|
|
|
for _, t := range mf.Type {
|
|
|
|
if typ == t {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|