# # ..;coxkOOOOOOkxoc;'. # .:d0NWMMMMMMMMMMMMMMWN0xc' # .:kXMMMMMMMMMMMMMMMMMMMMMMMXl. # .c0WMMMMMMMMMMMMMMMMMMMMMMMXd' # ,OWMMMMMMMMMMMMMMMMMMMMMMMXo' .. # cXMMMMMMXo::::::::::::::col. .lKXl. # lNMMMMMMM0' .lKWMMNo # :XMMMMMMMM0' .l0WMMMMMNc # .OMMMMMMMMM0' .ccccccc;. ,KMMMMMMMMO. # :NMMMMMMMMM0' oWMMMMMMWKc. oWMMMMMMMN: # lWMMMMMMMMM0' oWMMMMMMMMX: ,KMMMMMMMMo # oMMMMMMMMMM0' oWMMMMMMMMNc ,KMMMMMMMMd # cNMMMMMMMMM0' oWMMMMMMMNd. lWMMMMMMMWl # '0MMMMMMMMWk. ,oooooooc' ,0MMMMMMMMK, # oWMMMMMMXo. ,0MMMMMMMMWo # .xWMMMXd' ,dXMMMMMMMMWk. # .xWNx' .',''''''',,;coONMMMMMMMMMWk. # .:, .l0WWWWWWWWWWWMMMMMMMMMMMMMNd. # .lKWMMMMMMMMMMMMMMMMMMMMMMMWk; # .lKWMMMMMMMMMMMMMMMMMMMMMMMNk;. # .ckXWMMMMMMMMMMMMMMMMMMWXkl' # .;ldO0XNWWWWWWNXKOxl;. # ..'',,,,''.. # # # NOTE: Allow is a non-standard directive for robots.txt. It is allowed by Google bots. See https://developers.google.com/search/reference/robots_txt#allow User-agent: * Disallow: /admin/ Disallow: /trending/tweet/ Disallow: /newsletter/ Disallow: /healthcheck/ Disallow: /subpage/ Disallow: /ckeditor/ Disallow: /feeds/search/ Disallow: /jobs/edit/ Disallow: /wishfeedback/ Disallow: /api/ Disallow: /static/images/ # no deep queries to search Disallow: /search/* Allow: /search/$ # don't index our dynamic images Disallow: /user_media/ Allow: /google_news_sitemap.xml # hide old-school trend report Disallow: /news/*/541893/ # # Rules for specific crawlers below. Note these replace and override the '*' rules above. # # Allow Twitter to see all links User-agent: Twitterbot Disallow: # Allow Googlebot-News to see header images and favicons, BUT make it follow all the directives from our * group # See below link for why we have to repeat these directives # https://developers.google.com/search/reference/robots_txt#order-of-precedence-for-user-agents User-agent: Googlebot-News Disallow: /admin/ Disallow: /trending/tweet/ Disallow: /newsletter/ Disallow: /healthcheck/ Disallow: /subpage/ Disallow: /ckeditor/ Disallow: /feeds/search/ Disallow: /jobs/edit/ Disallow: /wishfeedback/ Disallow: /api/ Disallow: /static/images/ # no deep queries to search Disallow: /search/* Allow: /search/$ Allow: /google_news_sitemap.xml # hide old-school trend report Disallow: /news/*/541893/ # Allow Google News to see header images and favicons Allow: /user_media Allow: /static/images/favicons # Don't let Google Images crawler see anything at all User-agent: Googlebot-Image Disallow: / # Don't let PetalBot crawl at all User-agent: PetalBot Disallow: / # All Facebook crawler user-agent to see all User-agent: facebookexternalhit/1.1 Disallow: # Allow swiftbot custom search to see all, but with a delay User-agent: Swiftbot Disallow: Crawl-delay: 2 User-agent: AhrefsBot # We want this bot to crawl way slower http://ahrefs.com/robot/ Crawl-Delay: 10 # And be more aggressive on what not to allow Disallow: /admin/ Disallow: /trending/tweet/ Disallow: /newsletter/ Disallow: /healthcheck/ Disallow: /subpage/ Disallow: /ckeditor/ Disallow: /feeds/search/ Disallow: /jobs/edit/ Disallow: /wishfeedback/ Disallow: /api/ Disallow: /static/images/ Disallow: /search/ Disallow: /user_media/