Add config option to block user agents in robots.txt

This commit is contained in:
Jan-Lukas Else 2023-08-09 14:21:39 +02:00
parent 138612ad2e
commit 5256540e4f
4 changed files with 71 additions and 20 deletions

View File

@ -35,6 +35,7 @@ type config struct {
TTS *configTTS `mapstructure:"tts"` TTS *configTTS `mapstructure:"tts"`
Reactions *configReactions `mapstructure:"reactions"` Reactions *configReactions `mapstructure:"reactions"`
Pprof *configPprof `mapstructure:"pprof"` Pprof *configPprof `mapstructure:"pprof"`
RobotsTxt *configRobotsTxt `mapstructure:"robotstxt"`
Debug bool `mapstructure:"debug"` Debug bool `mapstructure:"debug"`
initialized bool initialized bool
} }
@ -355,6 +356,10 @@ type configPlugin struct {
Config map[string]any `mapstructure:"config"` Config map[string]any `mapstructure:"config"`
} }
type configRobotsTxt struct {
BlockedBots []string `mapstructure:"blockedBots"`
}
func (a *goBlog) loadConfigFile(file string) error { func (a *goBlog) loadConfigFile(file string) error {
// Use viper to load the config file // Use viper to load the config file
v := viper.New() v := viper.New()

View File

@ -178,6 +178,11 @@ tts:
reactions: reactions:
enabled: true # Enable reactions (default is false) enabled: true # Enable reactions (default is false)
# Block bots using the robots.txt
robotstxt:
blockedBots: # List all bots that should be disallowed to crawl the site (default is empty)
- GPTBot
# Blogs # Blogs
defaultBlog: en # Default blog (needed because you can define multiple blogs) defaultBlog: en # Default blog (needed because you can define multiple blogs)
blogs: blogs:

View File

@ -14,6 +14,14 @@ func (a *goBlog) serveRobotsTXT(w http.ResponseWriter, _ *http.Request) {
return return
} }
_, _ = fmt.Fprint(w, "Allow: /\n\n") _, _ = fmt.Fprint(w, "Allow: /\n\n")
if a.cfg.RobotsTxt != nil && len(a.cfg.RobotsTxt.BlockedBots) != 0 {
for _, bot := range a.cfg.RobotsTxt.BlockedBots {
_, _ = fmt.Fprint(w, "User-agent: ")
_, _ = fmt.Fprint(w, bot)
_, _ = fmt.Fprint(w, "\n")
_, _ = fmt.Fprint(w, "Disallow: /\n\n")
}
}
_, _ = fmt.Fprintf(w, "Sitemap: %s\n", a.getFullAddress(sitemapPath)) _, _ = fmt.Fprintf(w, "Sitemap: %s\n", a.getFullAddress(sitemapPath))
for _, bc := range a.cfg.Blogs { for _, bc := range a.cfg.Blogs {
_, _ = fmt.Fprintf(w, "Sitemap: %s\n", a.getFullAddress(bc.getRelativePath(sitemapBlogPath))) _, _ = fmt.Fprintf(w, "Sitemap: %s\n", a.getFullAddress(bc.getRelativePath(sitemapBlogPath)))

View File

@ -10,29 +10,62 @@ import (
func Test_robotsTXT(t *testing.T) { func Test_robotsTXT(t *testing.T) {
app := &goBlog{ t.Run("Default", func(t *testing.T) {
cfg: &config{ app := &goBlog{
Server: &configServer{ cfg: &config{
PublicAddress: "https://example.com", Server: &configServer{
PublicAddress: "https://example.com",
},
}, },
}, }
}
rec := httptest.NewRecorder() rec := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/robots.txt", nil) req := httptest.NewRequest("GET", "/robots.txt", nil)
app.serveRobotsTXT(rec, req) app.serveRobotsTXT(rec, req)
assert.Equal(t, http.StatusOK, rec.Code) assert.Equal(t, http.StatusOK, rec.Code)
assert.Equal(t, "User-agent: *\nAllow: /\n\nSitemap: https://example.com/sitemap.xml\n", rec.Body.String()) assert.Equal(t, "User-agent: *\nAllow: /\n\nSitemap: https://example.com/sitemap.xml\n", rec.Body.String())
})
app.cfg.PrivateMode = &configPrivateMode{ t.Run("Private mode", func(t *testing.T) {
Enabled: true, app := &goBlog{
} cfg: &config{
assert.True(t, app.isPrivate()) Server: &configServer{
PublicAddress: "https://example.com",
},
PrivateMode: &configPrivateMode{
Enabled: true,
},
},
}
rec = httptest.NewRecorder() assert.True(t, app.isPrivate())
req = httptest.NewRequest("GET", "/robots.txt", nil)
app.serveRobotsTXT(rec, req) rec := httptest.NewRecorder()
assert.Equal(t, http.StatusOK, rec.Code) req := httptest.NewRequest("GET", "/robots.txt", nil)
assert.Equal(t, "User-agent: *\nDisallow: /\n", rec.Body.String()) app.serveRobotsTXT(rec, req)
assert.Equal(t, http.StatusOK, rec.Code)
assert.Equal(t, "User-agent: *\nDisallow: /\n", rec.Body.String())
})
t.Run("Blocked bot", func(t *testing.T) {
app := &goBlog{
cfg: &config{
Server: &configServer{
PublicAddress: "https://example.com",
},
RobotsTxt: &configRobotsTxt{
BlockedBots: []string{
"GPTBot",
},
},
},
}
rec := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/robots.txt", nil)
app.serveRobotsTXT(rec, req)
assert.Equal(t, http.StatusOK, rec.Code)
assert.Equal(t, "User-agent: *\nAllow: /\n\nUser-agent: GPTBot\nDisallow: /\n\nSitemap: https://example.com/sitemap.xml\n", rec.Body.String())
})
} }