# robots.txt # V0.9a - 11-08-07 EA - First thorough SEF core file # V1.0 - 11-13-07 EA - GWM Tools URL delete and comprehensive Blocking # V1.2 - 12-07-07 EA - Changes from analysing gb log entries # V2.0 - 12-18-07 EA - Bots # V2.1 - 02-21-09 EA - Block Basket ##### # REFUSE ##### User-agent: atSpider User-agent: b2w/0.1 User-agent: BecomeBot User-agent: CheeseBot User-agent: CherryPicker User-agent: CopyRightCheck User-agent: Copernic User-agent: Crescent User-agent: DSurf User-agent: dumbot User-agent: EliteSys Entry User-agent: EmailCollector User-agent: EmailSiphon User-agent: EmailWolf User-agent: Enterprise_Search/1.0 User-agent: Enterprise_Search User-agent: es User-agent: ExtractorPro User-agent: Flaming AttackBot User-agent: FreeFind User-agent: grub User-agent: grub-client User-agent: Hatena Antenna User-agent: Jetbot User-agent: Jetbot/1.0 User-agent: larbin User-agent: Mail Sweeper User-agent: munky User-agent: naver User-agent: NetMechanic User-agent: Nutch User-agent: OmniExplorer_Bot User-agent: Oracle Ultra Search User-agent: PerMan User-agent: ProWebWalker User-agent: psbot User-agent: Python-urllib User-agent: python-urllib/1.16 User-agent: Radiation Retriever 1.1 User-agent: Roverbot User-agent: searchpreview User-agent: SiteSnagger User-agent: sootle User-agent: Stanford User-agent: URL_Spider_Pro User-agent: WebBandit User-agent: WebEmailExtrac User-agent: WebVac User-agent: WebZip User-agent: xGet User-agent: wGet User-agent: WebWalk User-agent: webvac User-agent: WebReaper User-agent: WebMirror User-agent: WebFetcher User-agent: WebCopy User-agent: webcopier User-agent: WebCatcher User-agent: WebBandit User-agent: w3mir User-agent: vobsub User-agent: Templeton User-agent: ssearcher100 User-agent: SpiderBot User-agent: Shai'Hulud User-agent: PBWF User-agent: LightningDownload User-agent: KDD Exploror User-agent: Jeeves User-agent: Internet Explore User-agent: InfoSpiders User-agent: httrack User-agent: HavIndex User-agent: GetUrl User-agent: GetBot User-agent: ESIRover User-agent: Download Wonder User-agent: Collage User-agent: LNSpiderguy User-agent: Alexibot User-agent: Teleport User-agent: TeleportPro User-agent: Stanford Comp Sci User-agent: MIIxpc User-agent: Telesoft User-agent: Website Quester User-agent: moget/2.1 User-agent: WebZip/4.0 User-agent: WebStripper User-agent: WebSauger User-agent: WebCopier User-agent: NetAnts User-agent: Mister PiX User-agent: WebAuto User-agent: TheNomad User-agent: WWW-Collector-E User-agent: RMA User-agent: libWeb/clsHTTP User-agent: asterias User-agent: httplib User-agent: turingos User-agent: spanner User-agent: InfoNaviRobot User-agent: Harvest/1.5 User-agent: Bullseye/1.0 User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0 User-agent: CherryPickerSE/1.0 User-agent: CherryPickerElite/1.0 User-agent: WebBandit/3.50 User-agent: NICErsPRO User-agent: Microsoft URL Control - 5.01.4511 User-agent: DittoSpyder User-agent: Foobot User-agent: SpankBot User-agent: BotALot User-agent: lwp-trivial/1.34 User-agent: lwp-trivial User-agent: BunnySlippers User-agent: Microsoft URL Control - 6.00.8169 User-agent: URLy Warning User-agent: Wget/1.6 User-agent: Wget/1.5.3 User-agent: Wget User-agent: LinkWalker User-agent: cosmos User-agent: moget User-agent: hloader User-agent: URL Control User-agent: Zeus Link Scout User-agent: Zeus 32297 Webster Pro V2.9 Win32 User-agent: Webster Pro User-agent: EroCrawler User-agent: LinkScan/8.1a Unix User-agent: Keyword Density/0.9 User-agent: Kenjin Spider User-agent: Iron33/1.0.2 User-agent: Bookmark search tool User-agent: GetRight/4.2 User-agent: FairAd Client User-agent: Gaisbot User-agent: humanlinks User-agent: LinkextractorPro User-agent: Offline Explorer User-agent: Mata Hari User-agent: LexiBot User-agent: Web Image Collector User-agent: The Intraformant User-agent: True_Robot/1.0 User-agent: True_Robot User-agent: BlowFish/1.0 User-agent: JennyBot User-agent: MIIxpc/4.2 User-agent: BuiltBotTough User-agent: ProPowerBot/2.14 User-agent: BackDoorBot/1.0 User-agent: toCrawl/UrlDispatcher User-agent: WebEnhancer User-agent: suzuran User-agent: VCI WebViewer VCI WebViewer Win32 User-agent: VCI User-agent: Szukacz/1.4 User-agent: QueryN Metasearch User-agent: Openfind data gathere User-agent: Openfind User-agent: Xenu's Link Sleuth 1.1c User-agent: Xenu's User-agent: Zeus User-agent: RepoMonkey Bait & Tackle/v1.01 User-agent: RepoMonkey User-agent: Microsoft URL Control User-agent: Openbot Disallow: / ########################## # ALL THE USUAL STUFF ########################## User-agent: * Disallow: /administrator/ Disallow: /cache/ Disallow: /components/ Disallow: /editor/ Disallow: /help/ Disallow: /includes/ Disallow: /language/ Disallow: /mambots/ Disallow: /media/ Disallow: /modules/ Disallow: /templates/ Disallow: /installation/ Disallow: /*.jpg$ ###### # Block Basket ###### Disallow: /basket/* ################## # Block Affiliate Deep Links 12/04/07 ################## Disallow: /aff/* Disallow: /ffa/* ################# # No leak to Social Bookmarking 12/15/07 ################# Disallow: /social/* ##################### # SITE-SPECIFIC ##################### Disallow: /prot* Disallow: /LO* Disallow: /xsitemap.html Disallow: /scotland_tourism.php Disallow: /scotland-tourism.php ############################## # NON-RELEVENT INDEX BLOCK ############################## #Disallow: /scotland-travel.php Disallow: /search-accommodation Disallow: /terms-and-conditions.php Disallow: /budgetscot-accommodation-about-the-company.php Disallow: /contact.php Disallow: /about-the-company.php ################################## # OUR '404' DUPLICATE NAV BLOCK ################################## # Disallow our '404' page from indexing as again it is redundancy of nav page # this should clear the google cache of out of date material as well as avoid duplication Disallow: /scotland.php #################################### # LEGACY SEARCH AND LIST AND MODULE #################################### Disallow: /*advsearchr* Disallow: /*list-acc* Disallow: /*hotpr* Disallow: /index.php Disallow: /index2.php #################################### # ATOMIC PROPERTY LEVEL #################################### # block the old form access to property pages # old form is /scotland/accommodation/TYPE/name/nos... # new form is /scotland/accommodation/TYPE/-gh-name-.php #Disallow: /scotland/accommodation/guest-house/name/desc/* #Disallow: /scotland/accommodation/guest-house/name/asc.php #generic block of non-SEF'ed stuff Disallow: /scotland/accommodation/guest-house/name/* Disallow: /scotland/accommodation/bed-&-breakfast/name/* Disallow: /scotland/accommodation/bed-%26-breakfast/name/* Disallow: /scotland/accommodation/self-catering/name/* Disallow: /scotland/accommodation/self%11catering/name/* Disallow: /scotland/accommodation/budget-hotel/name/* #12-18-07 block hotel listings on side tier Disallow: /scotland/accommodation/budget-hotel/name/asc.php # any "_" names from olde Disallow: /scotland/accommodation/guest_house/* Disallow: /scotland/accommodation/budget_hotel/* Disallow: /scotland/accommodation/bed_&_breakfast/* Disallow: /scotland/accommodation/bed_%26_breakfast/* ################################################# # Old/New Ed Page for sp.nl PR redirect 01-12-08 ################################################# Disallow: /scotland/accommodation/edinburgh_all.php ################################### # TIER LISTING PAGE LEVEL ################################### # # ***WE BLOCK ALL OLD LISTING PAGES IRRESPECTIVE OF CURRENT STATUS*** 071107 in a few weeks after the 301's in effect # Disallow old shows from listings and any new non-conformant non-new-SEF'ed - this blocks all old (and new non-redone) listing tiers #Disallow: /scotland/show-accommodation/* # our original all edinburgh pages as we cant redirect them - others we can redirect as appropriate as not to lose PR Disallow: /*26793* # block islands as they are enumerated in tiers elsewhere Disallow: /*26794* # block the php extended original divided listing pages so they dont dup 071107 redirected # also so Edin (1 wk legacy) arent followed #Disallow: /scotland/accommodation/edinburgh-bed-breakfast.php #Disallow: /scotland/accommodation/edinburgh-guest-house.php #Disallow: /scotland/accommodation/edinburgh-self-catering.php # other areas - although some handled OK #Really old legacy stuff Disallow: /scot/accom/* Disallow: /scotland/show-accommodation/55719/* Disallow: /scotland/show-accommodation/54257/* Disallow: /scotland/show-accommodation/60630/* Disallow: /scotland/show-accommodation/26766/* # disallow england new Disallow: /*66438* # # Misc # Disallow: /scotland/accommodation/budget-hotel/white-heather-hotel.php ######################################## # NAVIGATION LEVEL SIDE-BAR (TIER+TYPES) ######################################## # # these are the side menu items that are now non-indexed on each separate page (700++) # can be reached via / url's from the navigation page (or ed-x-x.php) # block access to these # also blocks within Listing pages of Tier categories Disallow: /scotland/accommodation/edinburgh.php # Disallow all sections within the Edinburgh tier Disallow: /scotland/accommodation/edinburgh/page1.php Disallow: /scotland/accommodation/edinburgh/page2.php Disallow: /scotland/accommodation/edinburgh/page3.php Disallow: /scotland/accommodation/edinburgh/page4.php Disallow: /scotland/accommodation/edinburgh/page5.php Disallow: /scotland/accommodation/edinburgh/page6.php Disallow: /scotland/accommodation/edinburgh/page7.php Disallow: /scotland/accommodation/edinburgh/page8.php Disallow: /scotland/accommodation/edinburgh/page9.php Disallow: /scotland/accommodation/edinburgh/page10.php Disallow: /scotland/accommodation/edinburgh/page11.php # all other side tiers Disallow: /scotland/accommodation/glasgow.php Disallow: /scotland/accommodation/aberdeen.php Disallow: /scotland/accommodation/perth.php Disallow: /scotland/accommodation/dundee.php Disallow: /scotland/accommodation/stirling.php Disallow: /scotland/accommodation/st-andrews.php Disallow: /scotland/accommodation/islands.php # the generic listing routes Disallow: /scotland/accommodation/guest-house.php Disallow: /scotland/accommodation/bed-&-breakfast.php Disallow: /scotland/accommodation/self-catering.php Disallow: /scotland/accommodation/self%11catering.php # not yet hotels due to edin fall-thru # hotels 12-18-07 Disallow: /scotland/accommodation/budget-hotel.php # and block the old search (done above) # booking.com link block 24-10-08 Disallow /aff/booking/acc1 # ad tracking Disallow: /spnl Disallow: /track*.php #temporary disallow Disallow: /scotland/tourism/books.php ### #other ### Disallow: /other-sites.php #block links ######################## # Google can get any image ######################## # google can image anything it likes User-agent: Googlebot-Image Disallow: Allow: /*