# Robots.txt for Anysite.io # Last updated: 2026-04-07 # Allow all legitimate crawlers User-agent: * Allow: / # Optimize crawl budget - disallow non-content pages Disallow: /ghost/ Disallow: /p/ Disallow: /email/ Disallow: /admin/ Disallow: /content/images/size/ Disallow: /*?v= Disallow: /*&v= Disallow: /members/api/ Disallow: /webmentions/receive/ Disallow: /r/ # Block old URLs that 301 redirect (prevent duplicate indexing) Disallow: /home/ Disallow: /rest-api/ Disallow: /mcp/ Disallow: /cli/ Disallow: /privacy/ Disallow: /terms/ # Allow important content Allow: /products/ Allow: /endpoints/ Allow: /pricing/ Allow: /blog/ Allow: /about/ # Specific crawler rules for major search engines User-agent: Googlebot Crawl-delay: 0 Allow: / User-agent: Bingbot Crawl-delay: 0 Allow: / User-agent: Slurp Crawl-delay: 0 Allow: / User-agent: DuckDuckBot Crawl-delay: 0 Allow: / # Allow AI crawlers for training User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Claude-Web Allow: / User-agent: anthropic-ai Allow: / User-agent: Google-Extended Allow: / User-agent: CCBot Allow: / User-agent: PerplexityBot Allow: / # Allow SEO analysis crawlers User-agent: AhrefsBot Crawl-delay: 1 Allow: / User-agent: SemrushBot Crawl-delay: 1 Allow: / # Block aggressive scrapers User-agent: DotBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: rogerbot Disallow: / # Prevent crawling of tracking parameters Clean-param: utm_source&utm_medium&utm_campaign&utm_content&utm_term&ref&fbclid&gclid / # Sitemap location Sitemap: https://anysite.io/sitemap.xml