package main import ( "crypto/tls" "fmt" "io" "log" "net/http" "strings" "sync" "time" ) func (a *goBlog) checkAllExternalLinks() { allPosts, err := a.db.getPosts(&postsRequestConfig{status: statusPublished, withoutParameters: true}) if err != nil { log.Println(err.Error()) return } wg := new(sync.WaitGroup) linkChan := make(chan stringPair) client := &http.Client{ Timeout: 30 * time.Second, Transport: &http.Transport{ DisableKeepAlives: true, TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, }, }, } responses := map[string]int{} rm := sync.RWMutex{} processFunc := func() { defer wg.Done() wg.Add(1) for postLinkPair := range linkChan { if strings.HasPrefix(postLinkPair.Second, a.cfg.Server.PublicAddress) { continue } rm.RLock() _, ok := responses[postLinkPair.Second] rm.RUnlock() if !ok { req, err := http.NewRequest(http.MethodGet, postLinkPair.Second, nil) if err != nil { fmt.Println(err.Error()) continue } // User-Agent from Tor req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.5") resp, err := client.Do(req) if err != nil { fmt.Println(postLinkPair.Second+" ("+postLinkPair.First+"):", err.Error()) continue } status := resp.StatusCode _, _ = io.Copy(io.Discard, resp.Body) resp.Body.Close() rm.Lock() responses[postLinkPair.Second] = status rm.Unlock() } rm.RLock() if response, ok := responses[postLinkPair.Second]; ok && !checkSuccessStatus(response) { fmt.Println(postLinkPair.Second+" ("+postLinkPair.First+"):", response) } rm.RUnlock() } } for i := 0; i < 20; i++ { go processFunc() } err = a.getExternalLinks(allPosts, linkChan) if err != nil { log.Println(err.Error()) return } wg.Wait() } func checkSuccessStatus(status int) bool { return status >= 200 && status < 400 } func (a *goBlog) getExternalLinks(posts []*post, linkChan chan<- stringPair) error { wg := new(sync.WaitGroup) for _, p := range posts { wg.Add(1) go func(p *post) { defer wg.Done() links, _ := allLinksFromHTMLString(string(a.absolutePostHTML(p)), a.fullPostURL(p)) for _, link := range links { linkChan <- stringPair{a.fullPostURL(p), link} } }(p) } wg.Wait() close(linkChan) return nil }