Skip to content

Commit

Permalink
Merge pull request #106 from technologiestiftung/staging
Browse files Browse the repository at this point in the history
fix: age calculation respecting pflanzjahr=0 and preprocess trees dat…
  • Loading branch information
Jaszkowic authored Oct 17, 2023
2 parents f539ddb + ccc253b commit 6cfed52
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 124 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/test-harvest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ env:
MAPBOXTILESET: "xyz"
MAPBOXLAYERNAME: "abc"
LOGGING: INFO
SKIP_MAPBOX: "True"
# DATABASE_URL: postgresql://fangorn:ent@localhost:5432/trees?schema=public

jobs:
Expand Down Expand Up @@ -60,7 +61,7 @@ jobs:
id: api-start
run: cd api && supabase start | grep -w "service_role key" | cut -d ":" -f 2 | xargs | tr -d '\n' | awk '{print "service_role_key="$1}' >> "$GITHUB_OUTPUT" && cd ..
- name: run the harvester
run: docker run --env PG_SERVER='0.0.0.0' --env PG_DB --env PG_PORT --env PG_USER --env PG_PASS --env SUPABASE_URL --env SUPABASE_SERVICE_ROLE_KEY='${{ steps.api-start.outputs.service_role_key }}' --env SUPABASE_BUCKET_NAME --env MAPBOXTOKEN --env MAPBOXUSERNAME --env LOGGING --env OUTPUT --network host technologiestiftung/giessdenkiez-de-dwd-harvester:test
run: docker run --env PG_SERVER='0.0.0.0' --env SKIP_MAPBOX --env PG_DB --env PG_PORT --env PG_USER --env PG_PASS --env SUPABASE_URL --env SUPABASE_SERVICE_ROLE_KEY='${{ steps.api-start.outputs.service_role_key }}' --env SUPABASE_BUCKET_NAME --env MAPBOXTOKEN --env MAPBOXUSERNAME --env LOGGING --env OUTPUT --network host technologiestiftung/giessdenkiez-de-dwd-harvester:test
- name: stop the api
run: cd api && supabase stop && cd ..
release:
Expand Down
5 changes: 4 additions & 1 deletion harvester/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ RUN cd /app/ && python -m pip install --no-warn-script-location --prefix=/instal
FROM base as app
COPY --from=builder /install /usr/local

RUN apt-get update && apt-get -y install git && apt-get -y install make
RUN git clone https://github.com/mapbox/tippecanoe.git && cd tippecanoe && make -j && make install

# COPY harvester.py /app/
# COPY prepare.py /app/
# COPY grid/ grid/
# COPY assets/ /app/assets
COPY . /app/


CMD ["python", "/app/harvester.py"]
CMD python /app/harvester.py && python /app/mapbox-tree-update.py
121 changes: 0 additions & 121 deletions harvester/harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@
from dotenv import load_dotenv
import logging
import os
import sys
import math
import boto3
import requests
import json


# setting up logging
logging.basicConfig()
Expand Down Expand Up @@ -305,37 +299,6 @@
features_light.append(feature_template.format(
cell[1], cell[0], sum(clean[cellindex])))

def check_file_exists_in_supabase_storage(file_name):
url = f'{SUPABASE_URL}/storage/v1/object/info/public/{SUPABASE_BUCKET_NAME}/{file_name}'
response = requests.get(url)
return response.status_code == 200

def upload_file_to_supabase_storage(file_path, file_name):
try:
file = open(file_path, 'rb')
file_url = f'{SUPABASE_URL}/storage/v1/object/{SUPABASE_BUCKET_NAME}/{file_name}'
r = requests.put if check_file_exists_in_supabase_storage(file_name) else requests.post
response = r(
file_url,
files={'file': file},
headers={
'Authorization': f'Bearer {SUPABASE_SERVICE_ROLE_KEY}',
'ContentType': 'application/geo+json',
'AcceptEncoding': 'gzip, deflate, br'
},
)

if response.status_code == 200:
logging.info("✅ Uploaded {} to supabase storage".format(file_name))
else:
logging.warning(response.status_code)
logging.warning(response.content)
logging.warning("❌ Could not upload {} to supabase storage".format(file_name))

except Exception as error:
logging.warning(error)
logging.warning("❌ Could not upload {} supabase storage".format(file_name))

def finishGeojson(feature_list, file_name):
geojson = '{{"type":"FeatureCollection","properties":{{"start":"{}","end":"{}"}},"features":[{}]}}'.format(
startdate, enddate, ",".join(feature_list))
Expand All @@ -345,93 +308,9 @@ def finishGeojson(feature_list, file_name):
text_file.close()
n = None

upload_file_to_supabase_storage(path + file_name, file_name)

finishGeojson(features, "weather.geojson")
finishGeojson(features_light, "weather_light.geojson")

# create a CSV with all trees (id, lat, lng, radolan_sum)
with conn.cursor() as cur:
# WARNING: The db is still mislabeled lat <> lng
cur.execute("SELECT trees.id, trees.lat, trees.lng, trees.radolan_sum, (date_part('year', CURRENT_DATE) - trees.pflanzjahr) as age FROM trees WHERE ST_CONTAINS(ST_SetSRID (( SELECT ST_EXTENT (geometry) FROM radolan_geometry), 4326), trees.geom)")
trees = cur.fetchall()
trees_head = "id,lng,lat,radolan_sum,age"
trees_csv = trees_head
pLimit = math.ceil(len(trees) / 4)
pCount = 0
pfCount = 1
singleCSV = trees_head
singleCSVs = []
for tree in trees:
newLine = "\n"
newLine += "{},{},{},{}".format(tree[0], tree[1], tree[2], tree[3])
if tree[4] is None:
newLine += ","
else:
newLine += ",{}".format(int(tree[4]))
singleCSV += newLine
trees_csv += newLine
pCount += 1
if pCount >= pLimit:
text_file = open(path + "trees-p{}.csv".format(pfCount), "w")
singleCSVs.append(singleCSV)
n = text_file.write(singleCSV)
text_file.close()
n = None
pfCount += 1
pCount = 0
singleCSV = trees_head

text_file = open(path + "trees-p{}.csv".format(pfCount), "w")
singleCSVs.append(singleCSV)
n = text_file.write(singleCSV)
text_file.close()
n = None

text_file = open(path + "trees.csv", "w")
n = text_file.write(trees_csv)
text_file.close()
n = None

upload_file_to_supabase_storage(path + "trees.csv", "trees.csv")

for i in range(4):
upload_file_to_supabase_storage(path + "trees-p{}.csv".format(i + 1), "trees-p{}.csv".format(i + 1))

# send the updated csv to mapbox

# get upload credentials
try:
url = "https://api.mapbox.com/uploads/v1/{}/credentials?access_token={}".format(
os.getenv("MAPBOXUSERNAME"), os.getenv("MAPBOXTOKEN"))
response = requests.post(url)
s3_credentials = json.loads(response.content)

# upload latest data

s3mapbox = boto3.client('s3', aws_access_key_id=s3_credentials["accessKeyId"],
aws_secret_access_key=s3_credentials["secretAccessKey"], aws_session_token=s3_credentials["sessionToken"])
s3mapbox.upload_file(path + "trees.csv",
s3_credentials["bucket"], s3_credentials["key"])

# tell mapbox that new data has arrived

url = "https://api.mapbox.com/uploads/v1/{}?access_token={}".format(
os.getenv("MAPBOXUSERNAME"), os.getenv("MAPBOXTOKEN"))
payload = '{{"url":"http://{}.s3.amazonaws.com/{}","tileset":"{}.{}","name":"{}"}}'.format(
s3_credentials["bucket"], s3_credentials["key"], os.getenv("MAPBOXUSERNAME"), os.getenv("MAPBOXTILESET"), os.getenv("MAPBOXLAYERNAME"))
headers = {'content-type': 'application/json',
'Accept-Charset': 'UTF-8', 'Cache-Control': 'no-cache'}
response = requests.post(url, data=payload, headers=headers)
# wohooo!
logging.info("✅ Map updated to timespan: {} to {}".format(
startdate, enddate))
except:
logging.warning(
"could not upload tree data to mapbox for vector tiles")
trees_csv = None
csv_data = None

# remove all temporary files
shutil.rmtree(path)

Expand Down
Loading

0 comments on commit 6cfed52

Please sign in to comment.