# wheretoemigrate.io robots.txt User-agent: * Allow: / Disallow: /api/ # Block raw data files only — keep /data/{slug} HTML pages crawlable Disallow: /data/*.csv$ Disallow: /data/*.json$ Disallow: /data/runtime/ Disallow: /geo Disallow: /search-index.json Disallow: /dl/ Disallow: /session-status/ Disallow: /job-status/ Disallow: /admin-emails.html Disallow: /widget/ Disallow: /assets/og-generator.html Disallow: /samples/ Disallow: /success.html Disallow: /admin/ Disallow: /pinterest/ Disallow: /dashboard.html Disallow: /admin-dashboard.html Disallow: /cdn-cgi/scripts/ Disallow: /500.html Disallow: /404.html Disallow: /yandex_* Disallow: /pinterest-*.html Disallow: /list-your-business/ Disallow: /press/outreach-templates.html # Categories that are 100% noindex shells — block from crawl entirely (saves crawl budget) # /countries/ has 349 indexable pages, NOT blocked here — they remain crawlable Disallow: /score/ # /move-from/ Disallow removed 2026-05-19 — was contradictory with FD-SEO-Tier=(a) # un-noindex of 85 pages + sitemap-move-from.xml + IndexNow ping. Was blocking 55 pages per GSC bucket 5. Disallow: /cost-and-visa/ Disallow: /visa-costs/ # AI crawlers — explicitly allowed User-agent: GPTBot Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ClaudeBot Allow: / User-agent: PerplexityBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Claude-Web Allow: / User-agent: Google-Extended Allow: / User-agent: Applebot-Extended Allow: / User-agent: cohere-ai Allow: / User-agent: YouBot Allow: / User-agent: Amazonbot Allow: / User-agent: Meta-ExternalAgent Allow: / # AI scrapers — blocked User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / Sitemap: https://wheretoemigrate.io/sitemap-index.xml # AI-crawler manifest (llmstxt.org spec) # Full canonical: https://wheretoemigrate.io/llms.txt # Deeper variant: https://wheretoemigrate.io/llms-full.txt