Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new CentralEuropeanStreetNameClassifier #88

Merged
merged 2 commits into from
Apr 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions classifier/CentralEuropeanStreetNameClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
const _ = require('lodash')
const SectionClassifier = require('./super/SectionClassifier')
const StreetClassification = require('../classification/StreetClassification')

/**
* Classifier which attempts to classify street names with no suffix or prefix
* when accompanied by a housenumber in the same section.
*
* see: https://github.com/pelias/parser/issues/83
*/

class CentralEuropeanStreetNameClassifier extends SectionClassifier {
each (section) {
// there must be excactly two childen in this section
// note: we may wish to relax/expand on this later
if (section.graph.length('child') !== 2) { return }

// get first and last child
let children = section.graph.findAll('child')
let first = _.first(children)
let last = _.last(children)

// section must end with a HouseNumberClassification
if (!last.classifications.hasOwnProperty('HouseNumberClassification')) { return }

// other elements cannot contain any public classifications
if (_.some(first.classifications, (c) => c.public)) { return }

// optionally check parent phrases too?
// if (_.some(first.graph.findAll('parent'), (p) => {
// if (p.norm !== first.norm) { return false }
// return _.some(p.classifications, (c) => c.public)
// })) { return }

// assume the first token is a street name
first.classify(new StreetClassification(0.5))
}
}

module.exports = CentralEuropeanStreetNameClassifier
52 changes: 52 additions & 0 deletions classifier/CentralEuropeanStreetNameClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const _ = require('lodash')
const CentralEuropeanStreetNameClassifier = require('./CentralEuropeanStreetNameClassifier')
const HouseNumberClassification = require('../classification/HouseNumberClassification')
const StreetClassification = require('../classification/StreetClassification')
const Span = require('../tokenization/Span')
const classifier = new CentralEuropeanStreetNameClassifier()

module.exports.tests = {}
module.exports.tests.classify = (test) => {
let valid = [
new Span('Foo 1').setChildren([
new Span('Foo'),
new Span('1').classify(new HouseNumberClassification(1.0))
]),
new Span('Bar 2137').setChildren([
new Span('Bar'),
new Span('2137').classify(new HouseNumberClassification(1.0))
])
]

valid.forEach(s => {
test(`classify: ${s.body}`, (t) => {
// run classifier
classifier.each(s, null, 1)

// get children
let children = s.graph.findAll('child')

// first child should now be classified as a street
t.deepEqual(_.first(children).classifications, {
StreetClassification: new StreetClassification(0.5)
})

// last child was unchanged
t.deepEqual(_.last(children).classifications, {
HouseNumberClassification: new HouseNumberClassification(1)
})

t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`CentralEuropeanStreetNameClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
6 changes: 5 additions & 1 deletion parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const ChainClassifier = require('../classifier/ChainClassifier')
const PlaceClassifier = require('../classifier/PlaceClassifier')
const IntersectionClassifier = require('../classifier/IntersectionClassifier')
// const MultiStreetClassifier = require('../classifier/MultiStreetClassifier')
const CentralEuropeanStreetNameClassifier = require('../classifier/CentralEuropeanStreetNameClassifier')
const CompositeClassifier = require('../classifier/CompositeClassifier')
const WhosOnFirstClassifier = require('../classifier/WhosOnFirstClassifier')
// const AdjacencyClassifier = require('../classifier/AdjacencyClassifier')
Expand Down Expand Up @@ -70,7 +71,10 @@ class AddressParser extends Parser {
new CompositeClassifier(require('../classifier/scheme/street_name')),
new CompositeClassifier(require('../classifier/scheme/street')),
new CompositeClassifier(require('../classifier/scheme/place')),
new CompositeClassifier(require('../classifier/scheme/intersection'))
new CompositeClassifier(require('../classifier/scheme/intersection')),

// additional classifiers which act on unclassified tokens
new CentralEuropeanStreetNameClassifier()
],
// solvers
[
Expand Down
26 changes: 26 additions & 0 deletions test/address.cze.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const testcase = (test, common) => {
let assert = common.assert(test)

assert('Korunní 810, Praha', [
{ street: 'Korunní' }, { housenumber: '810' },
{ locality: 'Praha' }
])

assert('Kájovská 68, Český Krumlov', [
{ street: 'Kájovská' }, { housenumber: '68' },
{ locality: 'Český Krumlov' }
])

assert('Beethovenova 641/9, Brno', [
{ street: 'Beethovenova' }, { housenumber: '641/9' },
{ locality: 'Brno' }
])
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`address CZEs: ${name}`, testFunction)
}

testcase(test, common)
}
5 changes: 5 additions & 0 deletions test/address.deu.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ const testcase = (test, common) => {
{ locality: 'Munich' }, { country: 'Germany' }
])

assert('Esplanade 17, Berlin', [
{ street: 'Esplanade' }, { housenumber: '17' },
{ locality: 'Berlin' }
])

assert('Königsallee Düsseldorf', [
{ street: 'Königsallee' },
{ locality: 'Düsseldorf' }
Expand Down
4 changes: 4 additions & 0 deletions test/address.fra.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ const testcase = (test, common) => {
assert(`10 Boulevard Saint-Germains Paris`, [
{ housenumber: '10' }, { street: `Boulevard Saint-Germains` }, { locality: 'Paris' }
])

assert(`Paris 75000, France`, [
{ locality: 'Paris' }, { postcode: '75000' }, { country: 'France' }
])
}

module.exports.all = (tape, common) => {
Expand Down
16 changes: 16 additions & 0 deletions test/address.hrv.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const testcase = (test, common) => {
let assert = common.assert(test)

assert('Zadarska 17, Pula', [
{ street: 'Zadarska' }, { housenumber: '17' },
{ locality: 'Pula' }
])
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`address HRV: ${name}`, testFunction)
}

testcase(test, common)
}
16 changes: 16 additions & 0 deletions test/address.pol.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const testcase = (test, common) => {
let assert = common.assert(test)

assert('Szewska 6, Kraków', [
{ street: 'Szewska' }, { housenumber: '6' },
{ locality: 'Kraków' }
])
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`address POL: ${name}`, testFunction)
}

testcase(test, common)
}
16 changes: 16 additions & 0 deletions test/address.svk.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const testcase = (test, common) => {
let assert = common.assert(test)

assert('Divadelná 41/3, Trnava', [
{ street: 'Divadelná' }, { housenumber: '41/3' },
{ locality: 'Trnava' }
])
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`address SVK: ${name}`, testFunction)
}

testcase(test, common)
}