Skip to content

Commit

Permalink
closes #12; normalizes the different data sets for
Browse files Browse the repository at this point in the history
proper matches till pinpoint level.
  • Loading branch information
mbasa committed Jun 10, 2024
1 parent 73cd2da commit bf9db77
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 49 deletions.
42 changes: 21 additions & 21 deletions data-patches/isj/patches/address_s.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
todofuken,shikuchoson,lat,lon,code,geog
北海道,札幌市,43.061972,141.354374,01100,SRID=4326;POINT(141.354374 43.061972)
宮城県,仙台市,38.268008,140.869617,04100,SRID=4326;POINT(140.869617 38.268008)
埼玉県,さいたま市,35.861515,139.645502,11100,SRID=4326;POINT(139.645502 35.861515)
千葉県,千葉市,35.607331,140.10638,12100,SRID=4326;POINT(140.10638 35.607331)
神奈川県,横浜市,35.444035,139.637954,14100,SRID=4326;POINT(139.637954 35.444035)
神奈川県,川崎市,35.530806,139.703012,14130,SRID=4326;POINT(139.703012 35.530806)
神奈川県,相模原市,35.571376,139.373268,14150,SRID=4326;POINT(139.373268 35.571376)
新潟県,新潟市,37.916128,139.036402,15100,SRID=4326;POINT(139.036402 37.916128)
静岡県,静岡市,34.975473,138.382388,22100,SRID=4326;POINT(138.382388 34.975473)
静岡県,浜松市,34.710865,137.726117,22130,SRID=4326;POINT(137.726117 34.710865)
愛知県,名古屋市,35.181433,136.906421,23100,SRID=4326;POINT(136.906421 35.181433)
京都府,京都市,35.011574,135.768181,26100,SRID=4326;POINT(135.768181 35.011574)
大阪府,大阪市,34.693891,135.502046,27100,SRID=4326;POINT(135.502046 34.693891)
大阪府,堺市,34.573354,135.48302,27140,SRID=4326;POINT(135.48302 34.573354)
兵庫県,神戸市,34.689495,135.195728,28100,SRID=4326;POINT(135.195728 34.689495)
岡山県,岡山市,34.655107,133.919566,33100,SRID=4326;POINT(133.919566 34.655107)
広島県,広島市,34.385253,132.455337,34100,SRID=4326;POINT(132.455337 34.385253)
福岡県,北九州市,33.883408,130.875183,40100,SRID=4326;POINT(130.875183 33.883408)
福岡県,福岡市,33.590313,130.401735,40130,SRID=4326;POINT(130.401735 33.590313)
熊本県,熊本市,32.803078,130.707897,43100,SRID=4326;POINT(130.707897 32.803078)
todofuken,shikuchoson,tr_shikuchoson,lat,lon,code,geog
北海道,札幌市,,43.061972,141.354374,01100,SRID=4326;POINT(141.354374 43.061972)
宮城県,仙台市,,38.268008,140.869617,04100,SRID=4326;POINT(140.869617 38.268008)
埼玉県,さいたま市,,35.861515,139.645502,11100,SRID=4326;POINT(139.645502 35.861515)
千葉県,千葉市,,35.607331,140.10638,12100,SRID=4326;POINT(140.10638 35.607331)
神奈川県,横浜市,,35.444035,139.637954,14100,SRID=4326;POINT(139.637954 35.444035)
神奈川県,川崎市,,35.530806,139.703012,14130,SRID=4326;POINT(139.703012 35.530806)
神奈川県,相模原市,,35.571376,139.373268,14150,SRID=4326;POINT(139.373268 35.571376)
新潟県,新潟市,,37.916128,139.036402,15100,SRID=4326;POINT(139.036402 37.916128)
静岡県,静岡市,,34.975473,138.382388,22100,SRID=4326;POINT(138.382388 34.975473)
静岡県,浜松市,,34.710865,137.726117,22130,SRID=4326;POINT(137.726117 34.710865)
愛知県,名古屋市,,35.181433,136.906421,23100,SRID=4326;POINT(136.906421 35.181433)
京都府,京都市,,35.011574,135.768181,26100,SRID=4326;POINT(135.768181 35.011574)
大阪府,大阪市,,34.693891,135.502046,27100,SRID=4326;POINT(135.502046 34.693891)
大阪府,堺市,,34.573354,135.48302,27140,SRID=4326;POINT(135.48302 34.573354)
兵庫県,神戸市,,34.689495,135.195728,28100,SRID=4326;POINT(135.195728 34.689495)
岡山県,岡山市,,34.655107,133.919566,33100,SRID=4326;POINT(133.919566 34.655107)
広島県,広島市,,34.385253,132.455337,34100,SRID=4326;POINT(132.455337 34.385253)
福岡県,北九州市,,33.883408,130.875183,40100,SRID=4326;POINT(130.875183 33.883408)
福岡県,福岡市,,33.590313,130.401735,40130,SRID=4326;POINT(130.401735 33.590313)
熊本県,熊本市,,32.803078,130.707897,43100,SRID=4326;POINT(130.707897 32.803078)
10 changes: 10 additions & 0 deletions sql/abr/convertABRTables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,13 @@ where a.lg_code = b.lg_code and
a.rsdt_id = b.rsdt_id and
COALESCE(a.rsdt2_id,'') = COALESCE(b.rsdt2_id,'') and
substr(a.lg_code,1,2) = substr(c.lg_code,1,2);

--
-- Normalize oaza data
--
update pggeocoder.address_g set tr_ooaza = normalizeAddr(ooaza);

--
-- Normalize shikuchoson data
--
update pggeocoder.address_g set tr_shikuchoson = normalizeAddr(shikuchoson);
4 changes: 4 additions & 0 deletions sql/createTables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ create table pggeocoder.address_t (
create table pggeocoder.address_s (
todofuken varchar(60),
shikuchoson varchar(60),
tr_shikuchoson varchar(60),
lat float,
lon float,
code varchar(5),
Expand All @@ -31,6 +32,7 @@ create table pggeocoder.address_s (
create table pggeocoder.address_o (
todofuken varchar(60),
shikuchoson varchar(60),
tr_shikuchoson varchar(60),
ooaza varchar(60),
tr_ooaza varchar(60),
lat float,
Expand All @@ -52,7 +54,9 @@ create table pggeocoder.address_c (
create table pggeocoder.address_g (
todofuken varchar(60),
shikuchoson varchar(60),
tr_shikuchoson varchar(60),
ooaza varchar(60),
tr_ooaza varchar(60),
chiban varchar(60),
go varchar(60),
lat float,
Expand Down
10 changes: 10 additions & 0 deletions sql/isj/convertISJDatas.sql
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,13 @@ insert into pggeocoder.address_t (todofuken, lat, lon, code, geog)
-- Normalize oaza data in address_o table
--
update pggeocoder.address_o set tr_ooaza = normalizeAddr(ooaza);

--
-- Normalize shikuchoson data in address_s table
--
update pggeocoder.address_s set tr_shikuchoson = normalizeAddr(shikuchoson);

--
-- Normalize shikuchoson data in address_o table
--
update pggeocoder.address_o set tr_shikuchoson = normalizeAddr(shikuchoson);
7 changes: 5 additions & 2 deletions sql/maintTables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ create index address_t2 on pggeocoder.address_t(code);
create index address_s1 on pggeocoder.address_s(todofuken);
create index address_s2 on pggeocoder.address_s(shikuchoson);
create index address_s3 on pggeocoder.address_s(code);
create index address_s4 on pggeocoder.address_s(tr_shikuchoson);

--
-- creating index for address_o
Expand All @@ -28,7 +29,7 @@ create index address_o1 on pggeocoder.address_o(todofuken);
create index address_o2 on pggeocoder.address_o(shikuchoson);
create index address_o3 on pggeocoder.address_o(ooaza);
create index address_o4 on pggeocoder.address_o(code);

create index address_o5 on pggeocoder.address_o(tr_shikuchoson);
--
-- creating index for address
--
Expand All @@ -44,7 +45,9 @@ create index address_g1 on pggeocoder.address_g(shikuchoson);
create index address_g2 on pggeocoder.address_g(ooaza);
create index address_g3 on pggeocoder.address_g(chiban);
create index address_g4 on pggeocoder.address_g(go);
create index address_g5 on pggeocoder.address_g using gist( geog );
create index address_g5 on pggeocoder.address_g(tr_ooaza);
create index address_g6 on pggeocoder.address_g(tr_shikuchoson);
create index address_g7 on pggeocoder.address_g using gist( geog );

--
-- for Reverse Geocoding
Expand Down
57 changes: 31 additions & 26 deletions sql/pgGeocoder.sql
Original file line number Diff line number Diff line change
Expand Up @@ -316,17 +316,18 @@ BEGIN

address := replace(paddress,' ','');
address := replace(address,' ','');
address := normalizeAddr( address );

IF r_todofuken <> '' THEN
tmpstr := split_part(address,r_todofuken,2);
SELECT INTO rec * FROM pggeocoder.address_s WHERE
todofuken = r_todofuken AND
tmpstr LIKE shikuchoson||'%'
ORDER BY length(shikuchoson) DESC;
tmpstr LIKE tr_shikuchoson||'%'
ORDER BY length(tr_shikuchoson) DESC;
ELSE
SELECT INTO rec * FROM pggeocoder.address_s WHERE
address LIKE shikuchoson||'%'
ORDER BY length(shikuchoson) DESC;
address LIKE tr_shikuchoson||'%'
ORDER BY length(tr_shikuchoson) DESC;
END IF;

--
Expand All @@ -337,10 +338,10 @@ BEGIN
IF r_todofuken <> '' THEN
SELECT INTO rec * FROM pggeocoder.address_s WHERE
todofuken = r_todofuken AND
address LIKE '%'||substr(shikuchoson,strpos(shikuchoson,'')+1)||'%';
address LIKE '%'||substr(tr_shikuchoson,strpos(tr_shikuchoson,'')+1)||'%';
ELSE
SELECT INTO rec * FROM pggeocoder.address_s WHERE
address LIKE substr(shikuchoson,strpos(shikuchoson,'')+1)||'%';
address LIKE substr(tr_shikuchoson,strpos(tr_shikuchoson,'')+1)||'%';
END IF;
END IF;

Expand Down Expand Up @@ -371,6 +372,7 @@ DECLARE
r_todofuken ALIAS FOR $2;
r_shikuchoson ALIAS FOR $3;
address varchar;
t_shikuchoson varchar;
tmpstr varchar;
tmpaddr varchar;
rec RECORD;
Expand All @@ -387,15 +389,19 @@ BEGIN
address := replace(paddress,' ','');
address := replace(address,' ','');

tmpstr := split_part(address,r_shikuchoson,2);

IF tmpstr = '' THEN
tmpstr := split_part(address,
substr(r_shikuchoson,strpos(r_shikuchoson,'')+1),2);
t_shikuchoson := normalizeAddr( r_shikuchoson );

tmpstr := split_part(normalizeAddr(address),t_shikuchoson,2);

IF tmpstr = '' THEN
tmpstr := split_part(normalizeAddr(address),
substr(t_shikuchoson,strpos(t_shikuchoson,'')+1),2);
END IF;

tmpstr := tmpstr || '-'; -- to match addresses like 杉並区清水1
tmpaddr := normalizeAddr( tmpstr );
--tmpstr := tmpstr || '-';
--tmpaddr := normalizeAddr( tmpstr );

tmpaddr := tmpstr || '-'; -- to match addresses like 杉並区清水1

--
-- Trying to parse Kyoto Addresses which contains Directions
Expand All @@ -408,7 +414,7 @@ BEGIN

SELECT INTO rec *,length(tr_ooaza) AS length FROM pggeocoder.address_o WHERE
todofuken = r_todofuken AND
shikuchoson = r_shikuchoson AND
tr_shikuchoson = t_shikuchoson AND
strpos(tmpaddr,tr_ooaza) > 1 ORDER BY length DESC LIMIT 1;

IF FOUND THEN
Expand All @@ -431,7 +437,7 @@ BEGIN

SELECT INTO rec *,length(tr_ooaza) AS length FROM pggeocoder.address_o WHERE
todofuken = r_todofuken AND
shikuchoson = r_shikuchoson AND
tr_shikuchoson = t_shikuchoson AND
strpos(tmpaddr,tr_ooaza) = 1 ORDER BY length DESC LIMIT 1;

IF FOUND THEN
Expand All @@ -451,7 +457,7 @@ BEGIN
--
SELECT INTO rec *,length(tr_ooaza) AS length FROM pggeocoder.address_o WHERE
todofuken = r_todofuken AND
shikuchoson = r_shikuchoson AND
tr_shikuchoson = t_shikuchoson AND
strpos(''||tmpaddr,tr_ooaza) = 1 ORDER BY length DESC LIMIT 1;

IF FOUND THEN
Expand All @@ -472,10 +478,9 @@ BEGIN
-- the 'Order By length' slows down the operation a bit
-- but produces more accurate matches.
--

SELECT INTO rec *,length(tr_ooaza) AS length FROM pggeocoder.address_o WHERE
todofuken = r_todofuken AND
shikuchoson = r_shikuchoson AND
tr_shikuchoson = t_shikuchoson AND
strpos(tmpaddr,tr_ooaza) = 1 ORDER BY length DESC LIMIT 1;

IF FOUND THEN
Expand Down Expand Up @@ -649,10 +654,10 @@ BEGIN
END IF;

tmpstr1 := 'SELECT * FROM pggeocoder.address_g WHERE ' ||
'shikuchoson = ' || quote_literal(r_shikuchoson) || ' AND ' ||
'ooaza = ' || quote_literal(r_ooaza) || ' AND ' ||
'chiban = ' || quote_literal(r_chiban) || ' AND ' ||
'go = ' || quote_literal(tmpstr3);
'tr_shikuchoson = ' || quote_literal(normalizeAddr(r_shikuchoson)) || ' AND ' ||
'tr_ooaza = ' || quote_literal(normalizeAddr(r_ooaza)) || ' AND ' ||
'chiban = ' || quote_literal(r_chiban) || ' AND ' ||
'go = ' || quote_literal(tmpstr3);

--RAISE NOTICE 'tmpstr1 %',tmpstr1;

Expand All @@ -662,11 +667,11 @@ BEGIN
output.code := 1;
output.x := rec.lon;
output.y := rec.lat;
output.address := rec.todofuken||rec.shikuchoson||
rec.ooaza||rec.chiban||'-'||tmpstr3;
output.address := rec.todofuken||r_shikuchoson||
r_ooaza||rec.chiban||'-'||tmpstr3;
output.todofuken := rec.todofuken;
output.shikuchoson:= rec.shikuchoson;
output.ooaza := rec.ooaza;
output.shikuchoson:= r_shikuchoson;
output.ooaza := r_ooaza;
output.chiban := rec.chiban;
output.go := tmpstr3;
END IF;
Expand Down

0 comments on commit bf9db77

Please sign in to comment.