diff --git a/config.go b/config.go index a9f2634..5b30c7f 100644 --- a/config.go +++ b/config.go @@ -35,6 +35,7 @@ type config struct { TTS *configTTS `mapstructure:"tts"` Reactions *configReactions `mapstructure:"reactions"` Pprof *configPprof `mapstructure:"pprof"` + RobotsTxt *configRobotsTxt `mapstructure:"robotstxt"` Debug bool `mapstructure:"debug"` initialized bool } @@ -355,6 +356,10 @@ type configPlugin struct { Config map[string]any `mapstructure:"config"` } +type configRobotsTxt struct { + BlockedBots []string `mapstructure:"blockedBots"` +} + func (a *goBlog) loadConfigFile(file string) error { // Use viper to load the config file v := viper.New() diff --git a/example-config.yml b/example-config.yml index d6e5572..b8f74cc 100644 --- a/example-config.yml +++ b/example-config.yml @@ -178,6 +178,11 @@ tts: reactions: enabled: true # Enable reactions (default is false) +# Block bots using the robots.txt +robotstxt: + blockedBots: # List all bots that should be disallowed to crawl the site (default is empty) + - GPTBot + # Blogs defaultBlog: en # Default blog (needed because you can define multiple blogs) blogs: diff --git a/robotstxt.go b/robotstxt.go index 1965bb7..9925369 100644 --- a/robotstxt.go +++ b/robotstxt.go @@ -14,6 +14,14 @@ func (a *goBlog) serveRobotsTXT(w http.ResponseWriter, _ *http.Request) { return } _, _ = fmt.Fprint(w, "Allow: /\n\n") + if a.cfg.RobotsTxt != nil && len(a.cfg.RobotsTxt.BlockedBots) != 0 { + for _, bot := range a.cfg.RobotsTxt.BlockedBots { + _, _ = fmt.Fprint(w, "User-agent: ") + _, _ = fmt.Fprint(w, bot) + _, _ = fmt.Fprint(w, "\n") + _, _ = fmt.Fprint(w, "Disallow: /\n\n") + } + } _, _ = fmt.Fprintf(w, "Sitemap: %s\n", a.getFullAddress(sitemapPath)) for _, bc := range a.cfg.Blogs { _, _ = fmt.Fprintf(w, "Sitemap: %s\n", a.getFullAddress(bc.getRelativePath(sitemapBlogPath))) diff --git a/robotstxt_test.go b/robotstxt_test.go index 581fc6e..8544334 100644 --- a/robotstxt_test.go +++ b/robotstxt_test.go @@ -10,29 +10,62 @@ import ( func Test_robotsTXT(t *testing.T) { - app := &goBlog{ - cfg: &config{ - Server: &configServer{ - PublicAddress: "https://example.com", + t.Run("Default", func(t *testing.T) { + app := &goBlog{ + cfg: &config{ + Server: &configServer{ + PublicAddress: "https://example.com", + }, }, - }, - } + } - rec := httptest.NewRecorder() - req := httptest.NewRequest("GET", "/robots.txt", nil) - app.serveRobotsTXT(rec, req) - assert.Equal(t, http.StatusOK, rec.Code) - assert.Equal(t, "User-agent: *\nAllow: /\n\nSitemap: https://example.com/sitemap.xml\n", rec.Body.String()) + rec := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/robots.txt", nil) + app.serveRobotsTXT(rec, req) + assert.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, "User-agent: *\nAllow: /\n\nSitemap: https://example.com/sitemap.xml\n", rec.Body.String()) + }) - app.cfg.PrivateMode = &configPrivateMode{ - Enabled: true, - } - assert.True(t, app.isPrivate()) + t.Run("Private mode", func(t *testing.T) { + app := &goBlog{ + cfg: &config{ + Server: &configServer{ + PublicAddress: "https://example.com", + }, + PrivateMode: &configPrivateMode{ + Enabled: true, + }, + }, + } - rec = httptest.NewRecorder() - req = httptest.NewRequest("GET", "/robots.txt", nil) - app.serveRobotsTXT(rec, req) - assert.Equal(t, http.StatusOK, rec.Code) - assert.Equal(t, "User-agent: *\nDisallow: /\n", rec.Body.String()) + assert.True(t, app.isPrivate()) + + rec := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/robots.txt", nil) + app.serveRobotsTXT(rec, req) + assert.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, "User-agent: *\nDisallow: /\n", rec.Body.String()) + }) + + t.Run("Blocked bot", func(t *testing.T) { + app := &goBlog{ + cfg: &config{ + Server: &configServer{ + PublicAddress: "https://example.com", + }, + RobotsTxt: &configRobotsTxt{ + BlockedBots: []string{ + "GPTBot", + }, + }, + }, + } + + rec := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/robots.txt", nil) + app.serveRobotsTXT(rec, req) + assert.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, "User-agent: *\nAllow: /\n\nUser-agent: GPTBot\nDisallow: /\n\nSitemap: https://example.com/sitemap.xml\n", rec.Body.String()) + }) }