Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add new StreetNumericClassification #176

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
10 changes: 10 additions & 0 deletions classification/StreetNumericClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class StreetNumericClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'street_numeric'
}
}

module.exports = StreetNumericClassification
29 changes: 29 additions & 0 deletions classifier/StreetNumericClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
const PhraseClassifier = require('./super/PhraseClassifier')
const StreetNumericClassification = require('../classification/StreetNumericClassification')
const libpostal = require('../resources/libpostal/libpostal')


// numeric streets languages
// languages which use street names that start with numbers
const prefix = ['pl']

class StreetNumericClassifier extends PhraseClassifier {
setup () {
this.index = {}
libpostal.load(this.index, ['pl'], 'numeric_streets.txt', {
lowercase: true,
})
}

each (span) {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm)) {
span.classify(new StreetNumericClassification(1))
}
}
}

module.exports = StreetNumericClassifier
49 changes: 49 additions & 0 deletions classifier/StreetNumericClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const StreetNumericClassifier = require('./StreetNumericClassifier')
const StreetNumericClassification = require('../classification/StreetNumericClassification')
const Span = require('../tokenization/Span')
const classifier = new StreetNumericClassifier()

module.exports.tests = {}

function classify (body) {
let s = new Span(body)
classifier.each(s, null, 1)
return s
}

module.exports.tests.contains_numerals = (test) => {
test('contains numerals: honours contains.numerals boolean', (t) => {
let s = new Span('example')
s.contains.numerals = true
classifier.each(s, null, 1)
t.deepEqual(s.classifications, {})
t.end()
})
}

module.exports.tests.polish_numeric_street = (test) => {
let valid = [
'listopada', 'maja', 'czerwca',
'pułku', 'strzelców', 'piechoty'
]

valid.forEach(token => {
test(`polish numeric street: ${token}`, (t) => {
let s = classify(token)
t.deepEqual(s.classifications, {
StreetNumericClassification: new StreetNumericClassification(token.length > 1 ? 1.0 : 0.2)
})
t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction){
return tape(`StreetNumericClassifier: ${name}`, testFunction)
}

for(var testCase in module.exports.tests){
module.exports.tests[testCase](test, common)
}
}
84 changes: 84 additions & 0 deletions classifier/scheme/street.js
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,90 @@ module.exports = [
}
]
},
{
// 11 Listopada
confidence: 0.81,
Class: StreetClassification,
scheme: [
{
is: ['NumericClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['StreetNumericClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// 11 Pułku Swoleżerów
confidence: 0.79,
Class: StreetClassification,
scheme: [
{
is: ['NumericClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['StreetNumericClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
},
{
is: ['StreetNumericClassification', 'AlphaClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// 11 Pułku Strzelców Podhalańskich
confidence: 0.79,
Class: StreetClassification,
scheme: [
{
is: ['NumericClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['StreetNumericClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
},
{
is: ['StreetNumericClassification', 'AlphaClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
},
{
is: ['StreetNumericClassification', 'AlphaClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// 4 Pułku Piechoty Wojska Polskiego
confidence: 0.79,
Class: StreetClassification,
scheme: [
{
is: ['NumericClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['StreetNumericClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
},
{
is: ['StreetNumericClassification', 'AlphaClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
},
{
is: ['StreetNumericClassification', 'AlphaClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
},
{
is: ['StreetNumericClassification', 'AlphaClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// Boulevard du Général Charles De Gaulle
confidence: 0.81,
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier')
const TokenPositionClassifier = require('../classifier/TokenPositionClassifier')
const HouseNumberClassifier = require('../classifier/HouseNumberClassifier')
const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetNumericClassifier = require('../classifier/StreetNumericClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier')
const StreetProperNameClassifier = require('../classifier/StreetProperNameClassifier')
Expand Down Expand Up @@ -52,6 +53,7 @@ class AddressParser extends Parser {
new PostcodeClassifier(),
new StreetPrefixClassifier(),
new StreetSuffixClassifier(),
new StreetNumericClassifier(),
new StreetProperNameClassifier(),
new RoadTypeClassifier(),
new ToponymClassifier(),
Expand Down
65 changes: 65 additions & 0 deletions resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
stycznia
lutego
marca
kwietnia
maja
czerwca
lipca
sierpnia
września
października
listopada
grudnia
pułku
brygady
batalionu
poznańskiego
kresowej
kompanii
morskiego
armii
dywizji
dywizjonu
wołyńskiej
korpusu
PLM
KDL
KDD
PP
strzelców
piechoty
eskadry
artylerii
zaodrzańskiego
lekkiej
szwoleżerów
drezdeńskiego
wspaniałych
żródeł
lotnictwa
lotniczego
lotniczej
wojska
polskiego
stawów
straconych
morskiego
kamienic
roku
elbląskiego
kaszubskiego
warszawskiego
sudeckiej
wileńskiej
praskiego
maja/łódzka
parkingowa
lecia
stufen
maja/piłsudskiego
zaodrzańskiego
południka
zakrętów
górnośląskiego
poznańskiego
kamienic
16 changes: 16 additions & 0 deletions test/address.pol.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@ const testcase = (test, common) => {
{ street: 'Żorska' }, { housenumber: '11' },
{ postcode: '47-400' }
])

assert('11 listopada 2/4', [
{ street: '11 listopada' }, { housenumber: '2/4' }
])

assert('1 Pułku Strzelców Bytomskich 2-4', [
{ street: '1 Pułku Strzelców Bytomskich' }, { housenumber: '2-4' }
])

assert('3 Warszawskiego Pułku Strzelców Polskich 11', [
{ street: '3 Warszawskiego Pułku Strzelców Polskich' }, { housenumber: '11' }
])

assert('1 Pułku Szwoleżerów 178', [
{ street: '1 Pułku Szwoleżerów' }, { housenumber: '178' }
])
}

module.exports.all = (tape, common) => {
Expand Down