Skip to content

Commit

Permalink
Moved to node-solr-client, seems much better, cleaner api and less bu…
Browse files Browse the repository at this point in the history
…gs..
  • Loading branch information
AvnerCohen committed Nov 6, 2012
1 parent e2971ed commit d5b5a3c
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 20 deletions.
16 changes: 8 additions & 8 deletions app.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
var http = require('http');
var director = require('director');
var log = require("./log").log;
var solr = require('solr');
var solr = require('solr-client');
var union = require('union');
var ecstatic = require('ecstatic');
var url = require('url');
Expand All @@ -10,23 +10,22 @@ var client = solr.createClient();


function solrSearch(searchterm) {
//add critera
var query = 'title_t:# OR summary_t:# OR body_t:#'.replace(/#/g, searchterm);

//add page to start from
var parts = url.parse(this.req.url, true);
var page = parts.query['page'];
query+= "&start=" + page;

var that = this;
client.query(query, function(err, response) {
var query = client.createQuery().q({
title_t: searchterm
}).start(page).rows(10);
client.search(query, function(err, obj) {
if (err) throw err;
var responseObj = JSON.parse(response);

that.res.writeHead(200, {
'Content-Type': 'text/html'
});
that.res.end(response);

that.res.end(JSON.stringify(obj));
});
}

Expand All @@ -40,6 +39,7 @@ var router = new director.http.Router({

var server = union.createServer({
before: [

function(req, res) {
var found = router.dispatch(req, res);
if (!found) {
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
"dependencies": {
"ecstatic": "~0.1.6",
"node.io": "~0.4.12",
"solr": "~0.2.2",
"cheerio": "~0.10.1",
"director": "~1.1.6",
"union": "~0.3.4",
"async": "~0.1.22"
"async": "~0.1.22",
"solr-client": "~0.2.3"
},
"engines": {
"node": "0.8.x",
Expand Down
5 changes: 3 additions & 2 deletions public/js/birds-search.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var birdsSearch = {
GAP_TO_KICK_SEARCH: 200,
CURRENT_PAGE: 0
CURRENT_PAGE: 0,
ROW_SIZE : 10
};

function changeSearchValue() {
Expand All @@ -20,7 +21,7 @@ function checkTimer() {

function invokeSearch() {
var val = $("#searchterm").val().trim();
var uri = "./search/" + val + "?page="+ birdsSearch.CURRENT_PAGE;
var uri = "./search/" + val + "?page="+ (birdsSearch.CURRENT_PAGE * birdsSearch.ROW_SIZE);
$.getJSON(uri, function(res) {
var $list = $("#results");
$list.empty();
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The flow of setting up the project is as follows:

node scrape

This will start a process to scrape the list of birds recorded in israel from: http://www.israbirding.com/checklist/
Not a mandatory process since repo also include the scraped pages from some point in world history. This will start a process to scrape the list of birds recorded in israel from: http://www.israbirding.com/checklist/

After that with a minor tweak on the bird names, it will scrape the relevant bird pages from wikipedia. The process takes a 2-4 minutes. It is really not optimized or parallelized as it should.

Expand Down
14 changes: 7 additions & 7 deletions solr-index.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
var solr = require('solr');
var solr = require('solr-client');
var fs = require('fs');
var cheerio = require('cheerio');
var log = require("./log").log;



var client = solr.createClient();
var query = "*:*";

client.del(null, query, function(err, response) {

client.deleteByQuery("*:*", function(err, response) {
if (err) throw err;
console.log('Deleted all docs matching query "' + query + '"');
console.log('Deleted all docs matching query');
client.commit();
reIndex();
});
Expand All @@ -23,7 +23,7 @@ function reIndex() {
//Extract: keywords
//extract body
var BASE_PATH = "./birds-kb/";
var BASE_LINK = "http://en.wikipedia.org/wiki/"
var BASE_LINK = "http://en.wikipedia.org/wiki/";
var files = fs.readdirSync(BASE_PATH);
var counter = 0;
//Loop through files and extract important content
Expand All @@ -37,15 +37,15 @@ function reIndex() {
continue;
}

var doc = {
var articale = {
id: counter++,
title_t: title,
link_t: BASE_LINK + files[i].replace("_data.txt", ""),
body_t: doc("div#bodyContent").text(),
summary_t: doc("div#mw-content-text p").first().html()
};

client.add(doc, done);
client.add(articale, done);
}

function done() {
Expand Down

0 comments on commit d5b5a3c

Please sign in to comment.