2020-11-16 11:05:34 +00:00
package main
import (
"bytes"
2021-05-14 16:24:02 +00:00
"context"
2020-11-16 17:34:29 +00:00
"database/sql"
2021-05-24 07:12:46 +00:00
"encoding/gob"
2020-11-16 11:05:34 +00:00
"errors"
2020-11-16 17:34:29 +00:00
"fmt"
2020-11-16 11:05:34 +00:00
"io"
2020-11-25 11:36:14 +00:00
"log"
2020-11-16 11:05:34 +00:00
"net/http"
2021-05-14 16:24:02 +00:00
"net/http/httptest"
2020-11-16 11:05:34 +00:00
"net/url"
2021-02-27 07:31:06 +00:00
"strings"
2021-05-24 07:12:46 +00:00
"time"
2020-11-16 11:05:34 +00:00
2020-11-16 13:18:14 +00:00
"github.com/PuerkitoBio/goquery"
2021-04-16 18:00:38 +00:00
"github.com/thoas/go-funk"
2020-11-16 11:05:34 +00:00
"willnorris.com/go/microformats"
)
2021-06-06 12:39:42 +00:00
func ( a * goBlog ) initWebmentionQueue ( ) {
2020-11-16 17:34:29 +00:00
go func ( ) {
for {
2021-06-06 12:39:42 +00:00
qi , err := a . db . peekQueue ( "wm" )
2021-05-24 07:12:46 +00:00
if err != nil {
log . Println ( err . Error ( ) )
continue
} else if qi != nil {
var m mention
err = gob . NewDecoder ( bytes . NewReader ( qi . content ) ) . Decode ( & m )
if err != nil {
log . Println ( err . Error ( ) )
2021-06-06 12:39:42 +00:00
_ = a . db . dequeue ( qi )
2020-11-25 11:36:14 +00:00
continue
}
2021-06-06 12:39:42 +00:00
err = a . verifyMention ( & m )
2021-05-24 07:12:46 +00:00
if err != nil {
log . Println ( fmt . Sprintf ( "Failed to verify webmention from %s to %s: %s" , m . Source , m . Target , err . Error ( ) ) )
}
2021-06-06 12:39:42 +00:00
err = a . db . dequeue ( qi )
2021-05-24 07:12:46 +00:00
if err != nil {
log . Println ( err . Error ( ) )
2020-11-25 11:36:14 +00:00
}
2021-05-24 08:44:43 +00:00
} else {
// No item in the queue, wait a moment
time . Sleep ( 15 * time . Second )
2020-11-25 11:36:14 +00:00
}
2020-11-16 11:05:34 +00:00
}
2020-11-16 17:34:29 +00:00
} ( )
}
2021-06-06 12:39:42 +00:00
func ( a * goBlog ) queueMention ( m * mention ) error {
if wm := a . cfg . Webmention ; wm != nil && wm . DisableReceiving {
2021-04-23 17:36:57 +00:00
return errors . New ( "webmention receiving disabled" )
}
2021-05-24 07:12:46 +00:00
var buf bytes . Buffer
if err := gob . NewEncoder ( & buf ) . Encode ( m ) ; err != nil {
return err
}
2021-06-06 12:39:42 +00:00
return a . db . enqueue ( "wm" , buf . Bytes ( ) , time . Now ( ) )
2020-11-25 11:36:14 +00:00
}
2021-06-06 12:39:42 +00:00
func ( a * goBlog ) verifyMention ( m * mention ) error {
2021-06-30 06:04:30 +00:00
// Parse url -> string for source and target
u , err := url . Parse ( m . Source )
if err != nil {
return err
}
m . Source = u . String ( )
2021-06-30 06:10:16 +00:00
// u, err = url.Parse(m.Target)
// if err != nil {
// return err
// }
// m.Target = u.String()
2021-06-30 06:04:30 +00:00
// Do request
2020-11-25 11:36:14 +00:00
req , err := http . NewRequest ( http . MethodGet , m . Source , nil )
2020-11-16 17:34:29 +00:00
if err != nil {
return err
}
2021-05-14 16:24:02 +00:00
var resp * http . Response
2021-06-06 12:39:42 +00:00
if strings . HasPrefix ( m . Source , a . cfg . Server . PublicAddress ) {
2021-05-14 16:24:02 +00:00
rec := httptest . NewRecorder ( )
2021-06-06 12:39:42 +00:00
for a . d == nil {
// Server not yet started
2021-06-15 20:20:54 +00:00
time . Sleep ( 1 * time . Second )
2021-06-06 12:39:42 +00:00
}
a . d . ServeHTTP ( rec , req . WithContext ( context . WithValue ( req . Context ( ) , loggedInKey , true ) ) )
2021-05-14 16:24:02 +00:00
resp = rec . Result ( )
} else {
req . Header . Set ( userAgent , appUserAgent )
2021-06-19 06:37:16 +00:00
resp , err = a . httpClient . Do ( req )
2021-05-14 16:24:02 +00:00
if err != nil {
return err
}
2020-11-16 11:05:34 +00:00
}
2020-11-25 11:36:14 +00:00
err = m . verifyReader ( resp . Body )
2021-06-18 12:32:03 +00:00
_ , _ = io . Copy ( io . Discard , resp . Body )
2020-11-25 11:36:14 +00:00
_ = resp . Body . Close ( )
if err != nil {
2021-06-06 12:39:42 +00:00
_ , err := a . db . exec ( "delete from webmentions where source = @source and target = @target" , sql . Named ( "source" , m . Source ) , sql . Named ( "target" , m . Target ) )
2020-11-25 11:36:14 +00:00
return err
2020-11-16 17:34:29 +00:00
}
2021-06-15 20:20:54 +00:00
if cr := [ ] rune ( m . Content ) ; len ( cr ) > 500 {
m . Content = string ( cr [ 0 : 497 ] ) + "…"
2020-11-16 17:34:29 +00:00
}
2021-06-15 20:20:54 +00:00
if tr := [ ] rune ( m . Title ) ; len ( tr ) > 60 {
m . Title = string ( tr [ 0 : 57 ] ) + "…"
2020-12-13 09:39:00 +00:00
}
2020-11-16 17:34:29 +00:00
newStatus := webmentionStatusVerified
2021-06-06 12:39:42 +00:00
if a . db . webmentionExists ( m . Source , m . Target ) {
2021-06-29 21:19:58 +00:00
_ , err = a . db . exec ( "update webmentions set status = @status, title = @title, content = @content, author = @author where lower(source) = lower(@source) and lower(target) = lower(@target)" ,
2020-11-25 11:36:14 +00:00
sql . Named ( "status" , newStatus ) , sql . Named ( "title" , m . Title ) , sql . Named ( "content" , m . Content ) , sql . Named ( "author" , m . Author ) , sql . Named ( "source" , m . Source ) , sql . Named ( "target" , m . Target ) )
} else {
2021-06-06 12:39:42 +00:00
_ , err = a . db . exec ( "insert into webmentions (source, target, created, status, title, content, author) values (@source, @target, @created, @status, @title, @content, @author)" ,
2020-11-25 11:36:14 +00:00
sql . Named ( "source" , m . Source ) , sql . Named ( "target" , m . Target ) , sql . Named ( "created" , m . Created ) , sql . Named ( "status" , newStatus ) , sql . Named ( "title" , m . Title ) , sql . Named ( "content" , m . Content ) , sql . Named ( "author" , m . Author ) )
2021-06-06 12:39:42 +00:00
a . sendNotification ( fmt . Sprintf ( "New webmention from %s to %s" , m . Source , m . Target ) )
2020-11-16 17:34:29 +00:00
}
return err
}
2020-11-25 11:36:14 +00:00
func ( m * mention ) verifyReader ( body io . Reader ) error {
2020-11-16 13:18:14 +00:00
var linksBuffer , gqBuffer , mfBuffer bytes . Buffer
2021-02-08 17:51:07 +00:00
if _ , err := io . Copy ( io . MultiWriter ( & linksBuffer , & gqBuffer , & mfBuffer ) , body ) ; err != nil {
return err
}
2020-11-16 13:18:14 +00:00
// Check if source mentions target
links , err := allLinksFromHTML ( & linksBuffer , m . Source )
2020-11-16 11:05:34 +00:00
if err != nil {
return err
}
2021-04-16 18:00:38 +00:00
if _ , hasLink := funk . FindString ( links , func ( s string ) bool {
return unescapedPath ( s ) == unescapedPath ( m . Target )
} ) ; ! hasLink {
2020-11-16 13:18:14 +00:00
return errors . New ( "target not found in source" )
2020-11-16 11:05:34 +00:00
}
2020-11-16 13:18:14 +00:00
// Fill mention attributes
sourceURL , err := url . Parse ( m . Source )
if err != nil {
return err
}
2021-06-15 20:20:54 +00:00
m . Title = ""
m . Content = ""
m . Author = ""
2020-11-25 11:36:14 +00:00
m . fillFromData ( microformats . Parse ( & mfBuffer , sourceURL ) )
2021-06-15 20:20:54 +00:00
// Set title when content is empty as well
if m . Title == "" && m . Content == "" {
doc , err := goquery . NewDocumentFromReader ( & gqBuffer )
if err != nil {
return err
}
if title := doc . Find ( "title" ) ; title != nil {
m . Title = title . Text ( )
}
}
2020-11-16 11:05:34 +00:00
return nil
}
2020-11-25 11:36:14 +00:00
func ( m * mention ) fillFromData ( mf * microformats . Data ) {
2020-11-16 11:05:34 +00:00
for _ , i := range mf . Items {
2020-11-25 11:36:14 +00:00
m . fill ( i )
2020-11-16 11:05:34 +00:00
}
}
2020-11-25 11:36:14 +00:00
func ( m * mention ) fill ( mf * microformats . Microformat ) bool {
2020-11-16 11:05:34 +00:00
if mfHasType ( mf , "h-entry" ) {
2021-05-23 18:11:48 +00:00
// Check URL
if url , ok := mf . Properties [ "url" ] ; ok && len ( url ) > 0 {
if url0 , ok := url [ 0 ] . ( string ) ; ok {
2021-06-30 06:04:30 +00:00
if ! strings . EqualFold ( url0 , m . Source ) {
2021-05-23 18:11:48 +00:00
// Not correct URL
return false
}
}
}
// Title
2021-06-18 12:32:03 +00:00
m . fillTitle ( mf )
2021-05-23 18:11:48 +00:00
// Content
2021-06-18 12:32:03 +00:00
m . fillContent ( mf )
2021-05-23 18:11:48 +00:00
// Author
2021-06-18 12:32:03 +00:00
m . fillAuthor ( mf )
return true
}
for _ , mfc := range mf . Children {
if m . fill ( mfc ) {
return true
}
}
return false
}
func ( m * mention ) fillTitle ( mf * microformats . Microformat ) {
if name , ok := mf . Properties [ "name" ] ; ok && len ( name ) > 0 {
if title , ok := name [ 0 ] . ( string ) ; ok {
m . Title = strings . TrimSpace ( title )
}
}
}
func ( m * mention ) fillContent ( mf * microformats . Microformat ) {
if contents , ok := mf . Properties [ "content" ] ; ok && len ( contents ) > 0 {
if content , ok := contents [ 0 ] . ( map [ string ] string ) ; ok {
if contentValue , ok := content [ "value" ] ; ok {
m . Content = strings . TrimSpace ( contentValue )
2020-11-16 11:05:34 +00:00
}
}
2021-05-23 18:11:48 +00:00
}
2021-06-18 12:32:03 +00:00
}
func ( m * mention ) fillAuthor ( mf * microformats . Microformat ) {
if authors , ok := mf . Properties [ "author" ] ; ok && len ( authors ) > 0 {
if author , ok := authors [ 0 ] . ( * microformats . Microformat ) ; ok {
if names , ok := author . Properties [ "name" ] ; ok && len ( names ) > 0 {
if name , ok := names [ 0 ] . ( string ) ; ok {
m . Author = strings . TrimSpace ( name )
}
2020-11-16 11:05:34 +00:00
}
}
}
}
func mfHasType ( mf * microformats . Microformat , typ string ) bool {
for _ , t := range mf . Type {
if typ == t {
return true
}
}
return false
}