User-agent: * Disallow: /a/downloads/-/* Disallow: /admin Disallow: /cart Disallow: /orders Disallow: /checkouts/ Disallow: /checkout Disallow: /67479175349/checkouts Disallow: /67479175349/orders Disallow: /carts Disallow: /account Disallow: /collections/*sort_by* Disallow: /*/collections/*sort_by* Disallow: /collections/*+* Disallow: /collections/*%2B* Disallow: /collections/*%2b* Disallow: /*/collections/*+* Disallow: /*/collections/*%2B* Disallow: /*/collections/*%2b* Disallow: */collections/*filter*&*filter* Disallow: /blogs/*+* Disallow: /blogs/*%2B* Disallow: /blogs/*%2b* Disallow: /*/blogs/*+* Disallow: /*/blogs/*%2B* Disallow: /*/blogs/*%2b* Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /*/*?*ls=*&ls=* Disallow: /*/*?*ls%3D*%3Fls%3D* Disallow: /*/*?*ls%3d*%3fls%3d* Disallow: /search Disallow: /apple-app-site-association Disallow: /.well-known/shopify/monorail Disallow: /recommendations/products Disallow: /*/recommendations/products Disallow: /services/login_with_shop Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote # ────────────────────────────────────────────────────────────── # DDS additions to default '*' group # Crawl-bloat controls (faceted nav / thin duplicate parameters) # Pagination remains crawlable — only param permutations blocked # ────────────────────────────────────────────────────────────── Disallow: /collections/*?*sort_by* Disallow: /collections/*?*filter* Disallow: /collections/*?*constraint* Disallow: /collections/*?*pf_* Disallow: /collections/*?*view* Disallow: /collections/*?*grid_list* # Blog tag archives (thin duplicate content) Disallow: /blogs/*/tagged/* # Internal search results (infinite parameter combinations) Disallow: /search # ────────────────────────────────────────────────────────────── # Explicit content allows for default '*' group # Allow takes precedence over Disallow at equal/longer specificity # ────────────────────────────────────────────────────────────── Allow: /products/ Allow: /collections/ Allow: /pages/ Allow: /blogs/ Allow: /policies/ Allow: /products/*.json Allow: /collections/*.json Sitemap: https://ddsboston.com/sitemap.xml User-agent: adsbot-google Disallow: /checkouts/ Disallow: /checkout Disallow: /carts Disallow: /orders Disallow: /67479175349/checkouts Disallow: /67479175349/orders Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /services/login_with_shop User-agent: Nutch Disallow: / User-agent: AhrefsBot Crawl-delay: 10 Disallow: /a/downloads/-/* Disallow: /admin Disallow: /cart Disallow: /orders Disallow: /checkouts/ Disallow: /checkout Disallow: /67479175349/checkouts Disallow: /67479175349/orders Disallow: /carts Disallow: /account Disallow: /collections/*sort_by* Disallow: /*/collections/*sort_by* Disallow: /collections/*+* Disallow: /collections/*%2B* Disallow: /collections/*%2b* Disallow: /*/collections/*+* Disallow: /*/collections/*%2B* Disallow: /*/collections/*%2b* Disallow: */collections/*filter*&*filter* Disallow: /blogs/*+* Disallow: /blogs/*%2B* Disallow: /blogs/*%2b* Disallow: /*/blogs/*+* Disallow: /*/blogs/*%2B* Disallow: /*/blogs/*%2b* Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /*/*?*ls=*&ls=* Disallow: /*/*?*ls%3D*%3Fls%3D* Disallow: /*/*?*ls%3d*%3fls%3d* Disallow: /search Disallow: /apple-app-site-association Disallow: /.well-known/shopify/monorail Disallow: /services/login_with_shop Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Sitemap: https://ddsboston.com/sitemap.xml User-agent: AhrefsSiteAudit Crawl-delay: 10 Disallow: /a/downloads/-/* Disallow: /admin Disallow: /cart Disallow: /orders Disallow: /checkouts/ Disallow: /checkout Disallow: /67479175349/checkouts Disallow: /67479175349/orders Disallow: /carts Disallow: /account Disallow: /collections/*sort_by* Disallow: /*/collections/*sort_by* Disallow: /collections/*+* Disallow: /collections/*%2B* Disallow: /collections/*%2b* Disallow: /*/collections/*+* Disallow: /*/collections/*%2B* Disallow: /*/collections/*%2b* Disallow: */collections/*filter*&*filter* Disallow: /blogs/*+* Disallow: /blogs/*%2B* Disallow: /blogs/*%2b* Disallow: /*/blogs/*+* Disallow: /*/blogs/*%2B* Disallow: /*/blogs/*%2b* Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /*/*?*ls=*&ls=* Disallow: /*/*?*ls%3D*%3Fls%3D* Disallow: /*/*?*ls%3d*%3fls%3d* Disallow: /search Disallow: /apple-app-site-association Disallow: /.well-known/shopify/monorail Disallow: /services/login_with_shop Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Sitemap: https://ddsboston.com/sitemap.xml User-agent: MJ12bot Crawl-delay: 10 User-agent: Pinterest Crawl-delay: 1 # ════════════════════════════════════════════════════════════════ # SECTION 2 — AI CRAWLER DIRECTIVES (May 2026) # Strategy: ALLOW ALL reputable AI bots for maximum citations # Every AI answer that mentions DDS is free brand visibility + # potential referral traffic. The cost of allowing is server # bandwidth (negligible for Shopify). The cost of blocking is # missing from every Claude / ChatGPT / Perplexity / Gemini / # NotebookLM / Apple Intelligence answer for years. # ════════════════════════════════════════════════════════════════ # ────────────────────────────────────────────────────────────── # ANTHROPIC — 3-bot framework # Docs: https://support.claude.com/en/articles/8896518 # All three honor robots.txt as of May 2026. # ────────────────────────────────────────────────────────────── # ClaudeBot — training data crawler User-agent: ClaudeBot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # Claude-SearchBot — search index for Claude web search # Blocking this would remove DDS from Claude search answers. User-agent: Claude-SearchBot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # Claude-User — real-time user-initiated page fetches # Honors robots.txt per Anthropic. Allow for live citations. User-agent: Claude-User Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders # ────────────────────────────────────────────────────────────── # OPENAI — 3-bot framework # Docs: https://platform.openai.com/docs/bots # GPTBot + OAI-SearchBot honor robots.txt; ChatGPT-User may not. # ────────────────────────────────────────────────────────────── # GPTBot — training data crawler User-agent: GPTBot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # OAI-SearchBot — ChatGPT Search index # OpenAI states: blocking this removes you from ChatGPT search answers. User-agent: OAI-SearchBot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # ChatGPT-User — real-time user browsing fetches # May ignore robots.txt; rules included for compliant requests. User-agent: ChatGPT-User Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders # ────────────────────────────────────────────────────────────── # PERPLEXITY — 2-bot framework # Docs: https://docs.perplexity.ai/guides/bots # PerplexityBot honors robots.txt. Perplexity-User generally does not. # Cloudflare published evidence (Aug 2025) that Perplexity uses # undeclared crawlers to bypass blocks. Server-side controls required # for true blocking. We allow both because the strategy is visibility. # ────────────────────────────────────────────────────────────── # PerplexityBot — search index crawler User-agent: PerplexityBot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # Perplexity-User — on-demand retrieval User-agent: Perplexity-User Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders # ────────────────────────────────────────────────────────────── # GOOGLE AI — extended training tokens + new agentic bots # Googlebot itself remains governed by Shopify defaults above. # These tokens control AI-specific use cases. # ────────────────────────────────────────────────────────────── # Google-Extended — controls Gemini + AI Overviews training inclusion User-agent: Google-Extended Allow: / # Google-NotebookLM — fetches user-added sources for NotebookLM # Allowing this means DDS pages can be added as NotebookLM sources # and surface in NotebookLM-generated audio overviews + research notes. User-agent: Google-NotebookLM Allow: / # Google-CloudVertexBot — Vertex AI on-demand retrieval User-agent: Google-CloudVertexBot Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders # ────────────────────────────────────────────────────────────── # APPLE — Applebot + Applebot-Extended # Applebot powers Siri + Spotlight. Applebot-Extended controls # Apple Intelligence (iOS 18+) training inclusion. # ────────────────────────────────────────────────────────────── # Applebot — Siri, Spotlight, Apple search User-agent: Applebot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # Applebot-Extended — Apple Intelligence training token User-agent: Applebot-Extended Allow: / # ────────────────────────────────────────────────────────────── # META — 2-bot framework (training + fetcher) # ────────────────────────────────────────────────────────────── # Meta-ExternalAgent — Llama training crawler User-agent: Meta-ExternalAgent Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # Meta-ExternalFetcher — on-demand Meta AI fetches User-agent: Meta-ExternalFetcher Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders # ────────────────────────────────────────────────────────────── # AMAZON — Alexa + Amazon search AI # ────────────────────────────────────────────────────────────── User-agent: Amazonbot Allow: / Allow: /products/*.json Allow: /collections/*.json Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # ────────────────────────────────────────────────────────────── # MISTRAL AI — user-triggered retrieval # ────────────────────────────────────────────────────────────── User-agent: MistralAI-User Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders # ────────────────────────────────────────────────────────────── # DUCKDUCKGO AI ASSISTANT # Powers DuckDuckGo's AI Assist feature. # ────────────────────────────────────────────────────────────── User-agent: DuckAssistBot Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # ────────────────────────────────────────────────────────────── # COHERE — LLM training # ────────────────────────────────────────────────────────────── User-agent: cohere-ai Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # ────────────────────────────────────────────────────────────── # COMMON CRAWL — open dataset feeding many open-source LLMs # Allowing means DDS appears in derivative training corpora. # ────────────────────────────────────────────────────────────── User-agent: CCBot Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # ────────────────────────────────────────────────────────────── # ALLEN INSTITUTE FOR AI — academic open research # Low volume but academic citations carry weight. # ────────────────────────────────────────────────────────────── User-agent: AI2Bot Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search User-agent: AI2Bot-Dolma Allow: / Disallow: /admin Disallow: /account* Disallow: /cart* Disallow: /checkout Disallow: /checkouts/ Disallow: /orders Disallow: /search # ════════════════════════════════════════════════════════════════ # SECTION 3 — AGGRESSIVE / LOW-VALUE SCRAPER BLOCK LIST # ════════════════════════════════════════════════════════════════ # These bots provide ZERO visibility return for DDS while consuming # bandwidth and crawl budget. Block list expanded May 2026. # # IMPORTANT: Bytespider has documented robots.txt non-compliance. # robots.txt block is first line of defense. For genuine # blocking, server/Cloudflare rules are required (see DDS notes). # ════════════════════════════════════════════════════════════════ # ─── ByteDance scrapers (TikTok / Doubao) ───────────────────── User-agent: Bytespider Disallow: / User-agent: TikTokSpider Disallow: / # ─── SEO data resellers (sell data to DDS competitors) ──────── User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Disallow: / User-agent: SemrushBot-OCOB Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: PetalBot Disallow: / # ─── Image / dataset scrapers ───────────────────────────────── User-agent: ImagesiftBot Disallow: / User-agent: img2dataset Disallow: / # ─── Generic content scrapers feeding sold datasets ─────────── User-agent: Diffbot Disallow: / User-agent: Omgili Disallow: / User-agent: Omgilibot Disallow: / User-agent: webzio-extended Disallow: / User-agent: Timpibot Disallow: / User-agent: PanguBot Disallow: / User-agent: Kangaroo Bot Disallow: / User-agent: ICC-Crawler Disallow: / # ─── Brand monitoring scrapers (sold to competitors) ────────── User-agent: AwarioBot Disallow: / User-agent: AwarioSmartBot Disallow: / User-agent: AwarioRssBot Disallow: / # ─── Plagiarism / academic scrapers ─────────────────────────── User-agent: TurnitinBot Disallow: / # ─── Generic / unknown harvesting ───────────────────────────── User-agent: Sentibot Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: Youbot Disallow: / # ════════════════════════════════════════════════════════════════ # SECTION 4 — SITEMAP DECLARATIONS # ════════════════════════════════════════════════════════════════ # Shopify auto-injects the primary sitemap via group.sitemap above. # This section is reserved for any future custom sitemaps (e.g. # image sitemap, video sitemap) that Shopify doesn't auto-generate. # As of May 2026 — Shopify's auto-sitemap covers products, pages, # collections, blogs. No custom sitemaps required. # ════════════════════════════════════════════════════════════════ # ════════════════════════════════════════════════════════════════ # END robots.txt.liquid v3.0.0 # ════════════════════════════════════════════════════════════════