-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.html
81 lines (81 loc) · 4.42 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<html>
<head>
<title>Search Engines Bookmarklet</title>
<link rel="icon" type="image/png" href="favicon.png" />
<style>
body {
font-family: monospace;
font-size: 1.4em;
margin: auto;
width: 66%;
padding-top: 3%;
text-align: center;
}
a, a:visited {
color: black;
}
small {
line-height: 50px;
font-size: 0.5em;
}
a img:hover {
box-shadow: 1px 1px 10px 2px #656565;
transition: box-shadow 0.4s;
}
</style>
</head>
<body>
<h1>SearchEnginesBookmarklet<br/><small>(by <a target="_blank" href="https://medialab.sciencespo.fr/">médialab Sciences Po</a>)</small></h1>
<p>Here is a very simple tool to help easily extract as tabular data (CSV) the results of web search engines such as:<br/>Google (web, images & Scholar), DuckDuckGo (web & images), Bing, Baidu or Qwant.</p>
<p>Built as a <a target="_blank" href="https://en.wikipedia.org/wiki/Bookmarklet">javascript bookmarklet</a> (intended for Firefox & Chrome only), it can be easily installed within a browser's bookmarks bar as a simple button to click in order to scrape results data without risking any ban as it still requires to load the search results page in the browser one by one.</p>
<br/>
<h3>How to install?</h3>
<p>Drag & drop the image below into your bookmarks bar<br/>(you may first need to enable its display from your browser's menu):</p>
<p><a id="bookmark" href=""><img src="images/duckduckgo-google-bing-baidu-256.png" alt="🔍 SearchEnginesBookmarklet"/></a></p>
<br/>
<h3>How does it work?</h3>
<p><img src="images/bookmarklets.png" alt="screenshots"/ style="width: 140%; margin-left: -20%;"></p>
<br/>
<p>
<a target="_blank" href="https://medialab.sciencespo.fr/en/tools">Discover more médialab tools</a> —
<a target="_blank" href="https://github.com/medialab/SearchEnginesBookmarklet">Source code</a>
<p>
</body>
<script>
document.addEventListener("DOMContentLoaded", function() {
var buildBookmarklet = function(hrefId, scriptName) {
var res = "javascript:(function(){" +
"if (window.SearchEnginesBookmarklet !== undefined && document.getElementById('BMoverlay')) return;" +
"window.SearchEnginesBookmarklet = '1.0';" +
"var userAgent = navigator.userAgent," +
"href = window.location.href," +
"bod = document.getElementsByTagName('body')[0]," +
"CSP = 'CSP protection detected!\\n\\nthis website forbids your browser to load the scripts required for SearchEnginesBookmarklet!'," +
"popup = '\\n\\nTry to enforce it using a browser extension such as the one that will open in a new tab (first allow it to pop-up). Then activate the extension, refresh the page and retry the bookmarklet.'," +
"script = document.createElement('script');" +
"script.type = 'text/javascript';" +
"script.src = '" + window.location.href.replace(/^https?:/, '').replace(/\/?(index.html)?$/, '/') + scriptName + ".js';" +
"if(~href.search(/:\\/\\/([^.]+\\.)?bing\\.[^/]+\\//)){" +
"HTMLBodyElement.prototype.appendChild = Node.prototype.appendChild;" +
"}" +
"bod || (bod = document.createElement('body'), document.documentElement.appendChild(bod));" +
"bod.appendChild(script);" +
"script.onerror = function(e){" +
"if (userAgent.indexOf('Chrome') > -1) {" +
"window.alert(CSP + popup);" +
"window.open('https://chromewebstore.google.com/detail/disable-content-security/ieelmcmcagommplceebfedjlakkhpden');" +
"} else if (userAgent.indexOf('Firefox') > -1) {" +
"window.alert(CSP + popup + '\\n\\n(you should use the \"Disable CSP\" option of the extension)');" +
"window.open('https://addons.mozilla.org/en-US/firefox/addon/mheadercontrol/');" +
"} else {" +
"window.alert(CSP + '\\n\\nThis bookmarklet is intended to work only within Firefox or Chrome/Chromium.\\n\\nPlease use one of these browsers and retry.');" +
"}" +
"console.log(e);" +
"}" +
"})()";
document.getElementById(hrefId).href = res.replace(/\s+/g, '%20');
};
buildBookmarklet("bookmark", "SearchEnginesBookmarklet");
});
</script>
</html>