User-agent: *
Allow: /
Disallow: /api/
Disallow: /signup/
Disallow: /signin
Disallow: /forgot-password
Disallow: /account/
# Next.js bundle assets — fonts, JS chunks, CSS. Should never appear
# in Google Search Console (they're not pages). Added 2026-06-06 after
# GSC reported a .woff2 file as "Crawled - currently not indexed".
Disallow: /_next/

# AI crawlers — explicitly allowed.
#
# Two distinct things AI crawlers do:
#   (a) Train their foundation models (most of these bots).
#   (b) Fetch a page on-demand to answer a user's live query (ChatGPT-User,
#       OAI-SearchBot, PerplexityBot, ClaudeBot acting as user agent, etc.).
#
# We allow both. Fuse is consumer-facing and an LLM citing fusemobile.co.uk
# in an answer is a free distribution channel — better that the model has
# our actual content than guesses based on second-hand sources.
#
# Disallow rules above (`/api/`, `/signup/`, `/signin`, `/forgot-password`,
# `/account/`) still apply to every UA — wildcard rules win unless an
# AI-specific block re-allows them. We don't re-allow, so AI crawlers also
# stay out of those private paths.

# OpenAI
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Anthropic
User-agent: Claude-Web
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

# Google (separate from Googlebot — opts site into Gemini/Bard training)
User-agent: Google-Extended
Allow: /

# Apple Intelligence (opt-in beyond standard Applebot crawl)
User-agent: Applebot-Extended
Allow: /

# Meta AI Search
User-agent: meta-externalagent
Allow: /

User-agent: FacebookBot
Allow: /

# Amazon (Q, Rufus, Alexa)
User-agent: Amazonbot
Allow: /

# Perplexity answer engine
User-agent: PerplexityBot
Allow: /

# ByteDance (TikTok / Doubao)
User-agent: Bytespider
Allow: /

# Common Crawl — corpus that feeds many LLMs
User-agent: CCBot
Allow: /

# Mistral AI
User-agent: MistralAI-User
Allow: /

User-agent: Cohere-ai
Allow: /

Sitemap: https://www.fusemobile.co.uk/sitemap.xml