# Robots.txt for Sabeq.net - Data Analytics Learning Platform # https://sabeq.net/robots.txt # Allow all search engines to crawl the entire site User-agent: * Allow: / # Block access to admin and private areas (if any) Disallow: /admin/ Disallow: /private/ Disallow: /temp/ Disallow: /cache/ # Block access to sensitive files and directories Disallow: /*.log$ Disallow: /*.sql$ Disallow: /*.txt$ Allow: /robots.txt Allow: /sitemap.xml # Block access to development and testing files Disallow: /test/ Disallow: /dev/ Disallow: /_dev/ Disallow: /staging/ # Block access to configuration files Disallow: /.htaccess Disallow: /.env Disallow: /config/ Disallow: /includes/ # Allow important files for SEO Allow: /css/ Allow: /js/ Allow: /img/ Allow: /fonts/ Allow: /favicon.ico Allow: /site.webmanifest Allow: /sw.js # Special instructions for major search engines # Google User-agent: Googlebot Allow: / Crawl-delay: 1 # Bing User-agent: Bingbot Allow: / Crawl-delay: 1 # Yandex User-agent: Yandex Allow: / Crawl-delay: 2 # Baidu (for potential Chinese market) User-agent: Baiduspider Allow: / Crawl-delay: 2 # Social Media Crawlers User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / # Schema.org crawlers User-agent: LinkedInBot Allow: / # SEO and Analytics Crawlers User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 # Block aggressive crawlers that might overload the server User-agent: BLEXBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: MegaIndex Disallow: / User-agent: SiteAuditBot Disallow: / # XML Sitemaps Sitemap: https://sabeq.net/sitemap.xml Sitemap: https://sabeq.net/sitemap-courses.xml Sitemap: https://sabeq.net/sitemap-blog.xml Sitemap: https://sabeq.net/sitemap-images.xml # Host directive (helps with canonicalization) Host: https://sabeq.net # Clean URLs - redirect crawlers to clean versions # (This would be handled by .htaccess, but good to document) # Request crawling during off-peak hours for heavy crawlers # Crawl-delay: 86400 (24 hours) for aggressive bots # Notes for webmasters: # - Update sitemap URLs when adding new content # - Monitor server logs for unusual crawling patterns # - Adjust crawl delays if server performance is affected # - Keep this file updated when site structure changes # Last updated: 2024-01-15 # Contact: info@sabeq.net for crawling issues