RewriteEngine On
RewriteBase /
### Prevent .htaccess, .htpasswd and other files from being viewed by web clients ###
<FilesMatch "\.(htaccess|htpasswd|ini|phps|fla|psd|log|sh) $">
Order Allow,Deny
Deny from all
</FilesMatch>
### Prevent directory bowsing ###
Options -Indexes
################################################## #############################
###Block major search engines by IP ###
### Googlebot/2.1, Googlebot-Image/1.0 and Mediapartners-Google/2.1 ###
# from 66.249.64.0 to 66.249.95.255 ###
RewriteCond %{REMOTE_ADDR} !^66\.249\.[64-95]\.[0-255]$
### Google Feedfetcher ###
RewriteCond %{REMOTE_ADDR} !^72\.14\.199\.7$
RewriteCond %{REMOTE_ADDR} !^72\.14\.193\.68$
RewriteCond %{REMOTE_ADDR} !^209\.85\.238\.9$
RewriteCond %{REMOTE_ADDR} !^209\.85\.238\.17$
### Google-Sitemaps/1.0 from 74.125.0.0 to 74.125.255.255 ###
RewriteCond %{REMOTE_ADDR} !^74\.125\.[0-255]\.[0-255]$
### Slurp (Yahoo/Iktomi) from 74.6.0.0 to 74.6.255.255 ###
RewriteCond %{REMOTE_ADDR} !^74\.6\.[0-255]\.[0-255]$
RewriteCond %{REMOTE_ADDR} !^67\.195\.[0-255]\.[0-255]$
### Msnbot/1.1 from 65.52.0.0 to 65.55.255.255 ###
RewriteCond %{REMOTE_ADDR} !^65\.52\.[0-255]\.[0-255]$
RewriteRule .* - [F,L]
################################################## #############################
### Block Spam Bots ####
### Deny Fake Bots ###
BrowserMatch "^Java/?[1-9_\.]*" bad_bot
BrowserMatch "^MJ12bot/?[1-9_\.]*" bad_bot
SetEnvIfNoCase User-Agent "^8484 Boston Project/?[1-9_\.]*" bad_bot
SetEnvIfNoCase User-Agent "charlotte/" bad_bot
SetEnvIfNoCase User-Agent "curl/7.15.5 (i686-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8b zlib/1.2.3 libidn/0.6.5" bad_bot
SetEnvifNoCase User-Agent "^Heritrix/" bad_bot
SetEnvIfNoCase User-Agent "ia_archiver" bad_bot
SetEnvIfNoCase User-Agent "larbin/" bad-bot
SetEnvIfNoCase User-Agent "libwww-perl"" bad_bot
SetEnvIfNoCase User-Agent "^libcurl-agent/" bad_bot
SetEnvifNoCase User-Agent "IRC-Bbot" bad_bot
SetEnvifNoCase User-Agent "ISC Systems iRc Search 2.1" bad_bot
SetEnvIfNoCase User-Agent "^Jakarta\ Commons-HttpClient/" bad_bot
SetEnvIfNoCase User-Agent "^Java/" bad_bot
SetEnvIfNoCase User-Agent "^Microsoft\ URL\ Control.*$" bad_bot
SetEnvIfNoCase User-Agent "^MJ12bot/" bad_bot
SetEnvIfNoCase User-Agent "MJ12bot/v1.0.8" bad_bot
SetEnvIfNoCase User-Agent "^Missigua Locator" bad_bot
SetEnvIfNoCase User-Agent "^Mozilla/4\.0\ .*Win\ 9x\ 4\.90.*$" bad_bot
SetEnvIfNoCase User-Agent "Nutch" bad_bot
SetEnvIfNoCase User-Agent "^PEAR HTTP_Request class" bad_bot
SetEnvIfNoCase User-Agent "phpversion" bad_bot
SetEnvIfNoCase User-Agent "^psycheclone" bad_bot
SetEnvIfNoCase User-Agent "TencentTraveler" bad_bot
SetEnvIfNoCase User-Agent "^Web Downloader" bad_bot
SetEnvIfNoCase User-Agent "^Wells Search II" bad_bot
SetEnvIfNoCase User-Agent "^WEP Search 00" bad_bot
<Limit GET POST>
Order Allow,Deny
Allow from all
Deny from env=bad_bot
</Limit>
# Known Bad Bots
RewriteCond %{HTTP_USER_AGENT} ADSARobot|ah-ha|almaden|aktuelles|Anarchie|amzn_assoc|Arachmo|A SPSeek|ASSORT|ATHENS|Atomz|attach|attache|autoemai lspider|BackWeb|Bandit|BatchFTP|bdfetch|Bbot|Becom eBot|big.brother|Bitacle|BlackWidow|bmclient|boith o.com-dc|Boston\ Project|bot/1.0|BravoBrian\ SpiderEngine\ MarcoPolo|Bot\ mailto:craftbot@yahoo.com|Buddy|Bullseye|bumblebee |capture|CherryPicker|ChinaClaw|CICC|clipping|Clus hbot|Collector|Copier|Crescent|Crescent\ Internet\ ToolPak|Custo|cyberalert|Deweb|diagem|Digger|Digim arc|DIIbot|DISCo|DISCo\ Pump|DISCoFinder|Download\ Demon|Download\ Wonder|Downloader|Drip|DSurf15a|DTS.Agent|EasyDL|e Catch|ecollector|efp@gmx\.net|Email\ Extractor|EirGrabber|email|EmailCollector|EmailSip hon|EmailWolf|Express\ WebPictures|ExtractorPro|EyeNetIE|FavOrg|fastlwspi der|Favorites\ Sweeper|Fetch|FEZhead|FileHound|FlashGet\ WebWasher|FlickBot|fluffy|FrontPage|GalaxyBot|Gene ric|Getleft|GetRight|GetSmart|GetWeb!|GetWebPage|g igabaz|Girafabot|Go\!Zilla|Go!Zilla|Go-Ahead-Got-It|GornKer|gotit|Grabber|GrabNet|Grafula|Green\ Research|grub-client|Harvest|heritrix|hhjhj@yahoo|hloader|HMView |HomePageSearch|http\ generic|HTTrack|httpdown|httrack|ia_archiver|IBM_P lanetwide|Image\ Stripper|Image\ Sucker|imagefetch|IncyWincy|Indy*Library|Indy\ Library|informant|Ingelin|InterGET|Internet\ Ninja|InternetLinkagent|Internet\ Ninja|InternetSeer\.com|Iria|Irvine|JBH*agent|JetC ar|JOC|JOC\ Web\ Spider|JustView|kalooga|KWebGet|Lachesis|larbin|Le acher|LeechFTP|LexiBot|lftp|likse|Link|Link*Sleuth |LINKS\ ARoMATIZED|LinkWalker|LWP|lwp-trivial|Mag-Net|Magnet|Mac\ Finder|Mag-Net|Mass\ Downloader|MCspider|MJ12bot/v1\.0\.8|Memo|Microsoft.URL|MIDown\ tool|Mirror|Missigua\ Locator|Mister\ PiX|MMMtoCrawl\/UrlDispatcherLLL|monit|^Mozilla$|Mozilla.*Indy|Moz illa.*NEWT|Mozilla*MSIECrawler|MS\ FrontPage*|MSFrontPage|MSIECrawler|MSProxy|MSR-ISRCCrawler|multithreaddb|my-heritrix-crawler|nationaldirectory|Navroad|NearSite|NetAnts |NetCarta|NetMechanic|netprospector|NetResearchSer ver|NetSpider|Net\ Vampire|NetZIP|NetZip\ Downloader|NetZippy|NEWT|NICErsPRO|Ninja|NPBot|Nic heBot|noxtrumbot|Octopus|Offline\ Explorer|Offline\ Navigator|OmniExplorer|OpaL|Openfind|OpenTextSiteC rawler|OrangeBot|PageGrabber|Papa\ Foto|PackRat|pavuk|pcBrowser|PersonaPilot|Ping|Pin gALink|Pingdom|Pockey|POE-Component-Client-HTTP|Powermarks|Proxy|psbot|PSurf|psycheclone|puf| Pump|PushSite|QRVA|RealDownload|Reaper|Recorder|Re Get|replacer|RepoMonkey|Robozilla|Rover|RPT-HTTPClient|Rsync|Scooter|SearchExpress|searchhippo |searchterms\.it|Second\ Street\ Research|Seeker|Shai|Siphon|sitecheck|sitecheck.in ternetseer.com|SiteSnagger|SlySearch|SmartDownload |snagger|Snake|SpaceBison|Spegla|SpiderBot|sproose |SqWorm|Stripper|Sucker|SuperBot|SuperHTTP|Surfbot |SurfWalker|Szukacz|tAkeOut|tarspider|Teleport\ Pro|Templeton|TencentTraveler|TrueRobot|TV33_Merca tor|UIowaCrawler|UtilMind|URLSpiderPro|URL_Spider_ Pro|Vacuum|vagabondo|vayala|visibilitygap|VoidEYE| vspider|Web\ Downloader|w3mir|Web\ Data\ Extractor|Web\ Image\ Collector|Web\ Sucker|Wweb|WebAuto|WebBandit|web\.by\.mail|Webcli pping|webcollage|webcollector|WebCopier|webcraft@b ea|webdevil|webdownloader|Webdup|WebEMailExtrac|We bFetch|WebGo\ IS|WebHook|Webinator|WebLeacher|WEBMASTERS|WebMine r|WebMirror|webmole|WebReaper|WebSauger|Website|We bsite\ eXtractor|Website\ Quester|WebSnake|Webster|WebStripper|websucker|web vac|webwalk|webweasel|WebWhacker|WebZIP|Wget|Whack er|whizbang|WhosTalking|Widow|WinHTTP|WISEbot|WWWO FFLE|x-Tractor|^Xaldon\ WebSpider|WUMPUS|XGET|Yeti|zermelo|Zeus.*Webster|Z eus [NC]
RewriteRule ^.* - [F,L]
# Bots starting with Web
RewriteCond %{HTTP_USER_AGENT} ^web(zip|emaile|enhancer|fetch|go.?is|auto|bandit| clip|copier|master|reaper|sauger|site.?quester|wha ck) [NC,OR]
# Anywhere in UA -- Greedy REGEX
RewriteCond %{HTTP_USER_AGENT} ^.*(craftbot|download|extract|stripper|sucker|ninj a|clshttp|webspider|leacher|collector|grabber|webp ictures).*$ [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} libwww [NC,OR]
RewriteCond %{QUERY_STRING} ^(.*)=http: [NC]
RewriteRule ^(.*)$ - [F,L]
### Stop browser prefetching ####
SetEnvIfNoCase X-Forwarded-For .+ proxy=yes
SetEnvIfNoCase X-moz prefetch no_access=yes
### Block pre-fetch requests with X-moz headers ####
RewriteCond %{ENV:no_access} yes
RewriteRule .* - [F,L]
################################################## #############################
### Block Major Search Engines by bot name ###
# If you server supports the below, then add them too. They are just to make
# sure that Google, Yahoo/Slurp, MSN, Gigabot, Ask Jeeves/Teoma are not trying
# to access with another IP not mentioned above
SetEnvIfNoCase User-Agent "Googlebot" no_bot
SetEnvIfNoCase User-Agent "Googlebot/2.1" no_bot
SetEnvIfNoCase User-Agent "Yahoo" no_bot
SetEnvIfNoCase User-Agent "Slurp" no_bot
SetEnvIfNoCase User-Agent "Msnbot" no_bot
SetEnvIfNoCase User-Agent "Gigabot" no_bot
SetEnvIfNoCase User-Agent "Ask Jeeves" no_bot
SetEnvIfNoCase User-Agent "Ask Jeeves/Teoma" no_bot
<FilesMatch "(.*)">
Order Allow,Deny
Allow from all
Deny from env=no_bot
</FilesMatch>