From 371ea77be702f409e9307bbf084d51e6d4cd2542 Mon Sep 17 00:00:00 2001 From: Patrick McCann Date: Tue, 1 Oct 2024 12:00:58 -0400 Subject: [PATCH] Create known_crawler_lists --- known_crawler_lists | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 known_crawler_lists diff --git a/known_crawler_lists b/known_crawler_lists new file mode 100644 index 0000000..6a56b71 --- /dev/null +++ b/known_crawler_lists @@ -0,0 +1,5 @@ +https://github.com/privacy-tech-lab/gpc-web-crawler/blob/main/selenium-optmeowt-crawler/full-crawl-set.csv +https://github.com/InteractiveAdvertisingBureau/adstxtcrawler/blob/master/adstxt_domains_2018-02-13.txt +https://github.com/kaustubhd93/adstxt-crawler/tree/master/archives +https://github.com/zer0h/top-1000000-domains/blob/master/top-10000-domains +https://github.com/zer0h/top-1000000-domains/blob/master/top-100000-domains