# # robots.txt for artcarpstudio.com # Copyright (c) 2007-2009 James F. Carpenter # Email comments to: queller (at domain above) # Last Updated: Sunday, 15 February 2009 # # --------------------------------------------------------------- # # Straightforward policy: No bots allowed, apart from crawlers # for public search indexes. Running unauthorized bots will # likely garner an IP ban. Agent spoofing is also considered a # bot. Please behave accordingly. # # If you want your robot allowed, or think it has been wrongly # excluded, contact queller (see note above) - however, please # realize that merely so doing is, in itself, no guarantee for # inclusion. Applicability and comportment count. # # Thank you for your interest and cooperation. # # --------------------------------------------------------------- # Our lo-cal, no-carb, g-zipped sitemap: # Sitemap: http://artcarpstudio.com/sitemap.xml.gz # crawlers for adverts, blogs, images, multimedia, # RSS feeds, etc. that are inapplicable # User-agent: mediapartners-google Disallow: / User-agent: googlebot-mobile Disallow: / User-agent: googlebot-image Disallow: / User-agent: adsbot-google Disallow: / User-agent: msnbot-products Disallow: / User-agent: msnbot-newsblogs Disallow: / User-agent: msnbot-media Disallow: / User-agent: msnbot-academic Disallow: / User-agent: yahoo-seeker Disallow: / User-agent: yahoo-newscrawler Disallow: / User-agent: yahoo-mmcrawler Disallow: / User-agent: yahoo-mmaudvid Disallow: / User-agent: yahooseeker Disallow: / User-agent: yahoo-blogs Disallow: / User-agent: exabot-images Disallow: / # rules for Googlebot, # MSN-Live, Yahoo-Slurp # User-agent: googlebot User-agent: msnbot User-agent: slurp User-agent: deepbot User-agent: freshbot User-agent: googledeepbot User-agent: googlefreshbot User-agent: yahoo-slurp Disallow: /*.cgi Disallow: /*.css Disallow: /*.gif Disallow: /*.jpeg Disallow: /*.jpg Disallow: /*.js Disallow: /*.pl Disallow: /*.png Disallow: /*index.php Disallow: /admin/ Allow: /admin/legal.htm Allow: /admin/map.htm Disallow: /cgi-bin/ Disallow: /coming/ Allow: /coming/soon.htm Allow: /deadbooks/ Allow: /deadbooks/agdl/ Allow: /deadbooks/aoth/ Allow: /deadbooks/wol/ Allow: /illustration/ Allow: /illustration/bw/ Allow: /illustration/color/ Allow: /illustration/ox/ Disallow: /img/ Allow: /instruments/ Allow: /instruments/goodhorsey/ Allow: /instruments/hurdygurdy/ Allow: /instruments/oud/ Disallow: /links/ Allow: /links/links.htm Allow: /odds/ Allow: /odds/ends/ Disallow: /script/ Allow: /sculpture/ Allow: /sculpture/building/ Allow: /sculpture/dream/ Allow: /sculpture/eugene/ Allow: /sculpture/other/ Allow: /sculpture/planting/ Disallow: /templates/ Disallow: /text/ Disallow: /who/ Allow: /who/me.htm Allow: /workshop/ Allow: /workshop/bench/ # rules for Cuil (Twiceler), # Ask (Teoma), Gigablast (Gigabot) # User-agent: twiceler User-agent: teoma User-agent: gigabot Disallow: /admin/ Allow: /admin/legal.htm Allow: /admin/map.htm Disallow: /cgi-bin/ Disallow: /coming/ Allow: /coming/soon.htm Disallow: /deadbooks/index.php Disallow: /deadbooks/img/ Disallow: /deadbooks/agdl/index.php Disallow: /deadbooks/agdl/img/ Disallow: /deadbooks/aoth/index.php Disallow: /deadbooks/aoth/img/ Disallow: /deadbooks/wol/index.php Disallow: /deadbooks/wol/img/ Disallow: /illustration/index.php Disallow: /illustration/img/ Disallow: /illustration/bw/index.php Disallow: /illustration/bw/img/ Disallow: /illustration/color/index.php Disallow: /illustration/color/img/ Disallow: /illustration/ox/index.php Disallow: /illustration/ox/img/ Disallow: /img/ Disallow: /instruments/goodhorsey/index.php Disallow: /instruments/goodhorsey/img/ Disallow: /instruments/hurdygurdy/index.php Disallow: /instruments/hurdygurdy/img/ Disallow: /instruments/index.php Disallow: /instruments/img/ Disallow: /instruments/oud/index.php Disallow: /instruments/oud/img/ Disallow: /links/ Allow: /links/links.htm Disallow: /odds/index.php Disallow: /odds/img/ Disallow: /odds/ends/index.php Disallow: /odds/ends/img/ Disallow: /script/ Disallow: /sculpture/index.php Disallow: /sculpture/img/ Disallow: /sculpture/building/index.php Disallow: /sculpture/building/img/ Disallow: /sculpture/dream/index.php Disallow: /sculpture/dream/img/ Disallow: /sculpture/eugene/index.php Disallow: /sculpture/eugene/img/ Disallow: /sculpture/other/index.php Disallow: /sculpture/other/img/ Disallow: /sculpture/planting/index.php Disallow: /sculpture/planting/img/ Disallow: /templates/ Disallow: /text/ Disallow: /who/ Allow: /who/me.htm Disallow: /workshop/index.php Disallow: /workshop/img/ Disallow: /workshop/bench/index.php Disallow: /workshop/bench/img/ Disallow: /sitemap.xml Disallow: /Sitemap.xml # W3C Link Checker User-agent: W3C-checklink User-agent: W3C_Validator Disallow: /admin/ Allow: /admin/legal.htm Allow: /admin/map.htm Disallow: /cgi-bin/ Disallow: /templates/ Disallow: /text/ # Exclusionary rules for cases of inapplicability, # poor comportment, and general bandwidth waste that # have merited, at minimum, provisional suspensions. # (Need our "Complaints Department"? See above.) # User-agent: Asterias # SingingFish Disallow: / User-agent: Atomz # business search Disallow: / User-agent: bdfetch # brand protection searcher Disallow: / User-agent: BecomeBot # shopping-related indexer Disallow: / User-agent: Bloglines-Images # graphics crawler Disallow: / User-agent: CazoodleBot # apartment listings "research" Disallow: / User-agent: CheeseBot # (eh? curds?) Disallow: / User-agent: cogentbot # research; purposes inapposite Disallow: / User-agent: DariusBot # disorderly conduct Disallow: / User-agent: Diffbot # RSS and news feed crawler Disallow: / User-agent: DOC # polite enough, but not public Disallow: / User-agent: DotBot # stated purposes inapposite Disallow: / User-agent: duggmirror # no thanks Disallow: / User-agent: Galbot # contextual tagging robot Disallow: / User-agent: grub # poor comportment Disallow: / User-agent: grub-client # poor comportment Disallow: / User-agent: Horde of Xerxes # "Repulse the witless horde!" Disallow: / User-agent: hotsy-totsy-botsy # funny name, bad behavior Disallow: / User-agent: ia_archiver # Alexa / Internet Archive (Wayback Machine) Disallow: / User-agent: ichiro # multimedia search (http://bsearch.goo.ne.jp) Disallow: / User-agent: k2spider # disorderly conduct Disallow: / User-agent: librabot # Chinese bot falsely claiming MSN affiliation Disallow: / User-agent: lnspiderguy # LexisNexis business solutions Disallow: / User-agent: MuscatFerret # Euroferret robot Disallow: / User-agent: netmechanic # enterprise searcher Disallow: / User-Agent: NetResearchServer # tries to access non-existing URLs Disallow: / User-agent: OodleBot # classifieds service Disallow: / User-agent: PropSmartCrawler # real estate listings searcher Disallow: / User-agent: PSbot # PicSearch snagger Disallow: / User-agent: Roverbot # targeted email gatherer Disallow: / User-agent: SapphireWebCrawler # topic-partitioned indexing research Disallow: / User-agent: Scan4Mail # mail extraction Disallow: / User-agent: Semper.Pertinax # persistent misbehavior Disallow: / User-agent: sitecheck.internetseer.com # troublesome Disallow: / User-agent: TurnItInBot # educational info; plagiarism searches Disallow: / User-agent: UbiCrawler # polite enough, but results not public Disallow: / User-agent: UltraSeek # enterprise searches Disallow: / User-agent: wget # wild recursive traversals; wasteful Disallow: / User-agent: Xenu # predatory behavior Disallow: / User-agent: Yanga WorldSearch Bot Disallow: / User-agent: Zao # polite enough, but results not public Disallow: / User-agent: Zealbot # extreme zealousness Disallow: / User-agent: zombies # (literal or not) Disallow: /*brains Disallow: / User-agent: ZyBORG # brutish, predatory behavior Disallow: / # # generic rules - i.e., for everyone else # (a tad restrictive, but not a straitjacket) # User-agent: * Disallow: /admin/index.php Disallow: /admin/lynx.htm Disallow: /admin/error/ Disallow: /admin/img/ Disallow: /admin/probitio/ Disallow: /cgi-bin/ Disallow: /coming/index.php Disallow: /coming/img/ Disallow: /deadbooks/index.php Disallow: /deadbooks/img/ Disallow: /deadbooks/agdl/index.php Disallow: /deadbooks/agdl/img/ Disallow: /deadbooks/aoth/index.php Disallow: /deadbooks/aoth/img/ Disallow: /deadbooks/wol/index.php Disallow: /deadbooks/wol/img/ Disallow: /illustration/index.php Disallow: /illustration/img/ Disallow: /illustration/bw/index.php Disallow: /illustration/bw/img/ Disallow: /illustration/color/index.php Disallow: /illustration/color/img/ Disallow: /illustration/ox/index.php Disallow: /illustration/ox/img/ Disallow: /img/ Disallow: /instruments/goodhorsey/index.php Disallow: /instruments/goodhorsey/img/ Disallow: /instruments/hurdygurdy/index.php Disallow: /instruments/hurdygurdy/img/ Disallow: /instruments/index.php Disallow: /instruments/img/ Disallow: /instruments/oud/index.php Disallow: /instruments/oud/img/ Disallow: /links/index.php Disallow: /links/img/ Disallow: /odds/index.php Disallow: /odds/img/ Disallow: /odds/ends/index.php Disallow: /odds/ends/img/ Disallow: /script/ Disallow: /sculpture/index.php Disallow: /sculpture/img/ Disallow: /sculpture/building/index.php Disallow: /sculpture/building/img/ Disallow: /sculpture/dream/index.php Disallow: /sculpture/dream/img/ Disallow: /sculpture/eugene/index.php Disallow: /sculpture/eugene/img/ Disallow: /sculpture/other/index.php Disallow: /sculpture/other/img/ Disallow: /sculpture/planting/index.php Disallow: /sculpture/planting/img/ Disallow: /templates/ Disallow: /text/ Disallow: /who/index.php Disallow: /who/img/ Disallow: /workshop/index.php Disallow: /workshop/img/ Disallow: /workshop/bench/index.php Disallow: /workshop/bench/img/ # alternate universal rule # (smackdown ... held in reserve) # # User-agent: * # Disallow: / # ## EOF ##