# Small robots.txt # More information about this file can be found at # http://www.robotstxt.org/ # In case your drupal site is in a subdirectory of your web root (e.g. /drupal) # add the name of this directory before the / (slash) below # example: Disallow: /drupal/aggregator # to stop a polite robot indexing an example dir # add a line like: user-agent: polite-bot # and: Disallow: /example-dir/ # Paths (clean URLs) User-agent: * Crawl-Delay: 10 Disallow: /aggregator Disallow: /tracker Disallow: /comment/reply Disallow: /node/add Disallow: /user Disallow: /files Disallow: /search Disallow: /book/print # Paths (no clean URLs) # User-agent: * # Crawl-Delay: 10 # Disallow: /?q=aggregator # Disallow: /?q=tracker # Disallow: /?q=comment/reply # Disallow: /?q=node/add # Disallow: /?q=user # Disallow: /?q=files # Disallow: /?q=search # Disallow: /?q=book/print # following rules taken from http://tips.webdesign10.com/robots-txt-and-drupal Disallow: /node$ # The URL http://example.com/node is a duplicate of http://example.com/. Disallow: /user$ # This will disallow the user form at http://example.com/user. If you would like to block all user pages, remove the trailing ampersand from this rule and all user pages will be blocked. Disallow: /*sort= # This takes care of problems with the Forum Module, the Views Module and other modules that sort tables by column headers. Disallow: /search$ # This will block your search form at http://example.com/search. That URL does a 302 redirect to http://example.com/search/node which is already blocked by the default robots.txt file. Disallow: /*/feed$ # Drupal creates RSS feeds for many types of content in the format http://example.com/taxonomy/term/25/0/feed. If you don't block those RSS feeds, Google will put them in the Supplemental Results (even if they don't label the Supplemental Results in the SERPs anymore). The RSS feeds are duplicate content because they are the same text content except marked up with RSS/XML instead of X/HTML. This rule with block all the RSS feeds on the site except for the main RSS feed which is located at http://example.com/rss.xml by default. Disallow: /*/track$ # This will block all of the URLs created by the Tracker Module which are in the format http://example.com/user/5/track. Disallow: /tracker? # The Tracker Module creates a paginated list of all the nodes on your site, beginning with the most recent. I believe that it's best for search engines to spider your content by approaching it in keyword-themed areas of the site (as they do through taxonomy or well-constructed Views). The tracker module organizes your content chronologically, not by keyword as taxonomy or Views do. The Tracker Module can also create thousands of extra pages on the site like this example from Drupal.org: http://drupal.org/tracker?page=6379. My recommendation is to leave http://example.com/tracker exposed to search engines, while blocking the paginated tracker pages like http://example.com/tracker?page=50. Leaving the just the first page of /tracker exposed to search engines allows search engines to rapidly find and index your latest content as it is posted.