-
Notifications
You must be signed in to change notification settings - Fork 0
/
mongodb_downloader.py
57 lines (49 loc) · 2.08 KB
/
mongodb_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
import pymongo
import pymongo.database
import pymongo.collection
import pymongo.results
import pymongo.server_api
from json import loads as json_loads
from pathlib import Path
from helpful_funcs import read_json_file_to_dict
from tabulate import tabulate
from typing import List
def connect_to_mongo_database(creds_json_file_path:Path=None) -> pymongo.database.Database:
"""
connects to the mongodb database and returns the database object
this can then be used to query different collections in the mongodb database
## Parameters
creds_json_file_path: Path
Path to the json file with the credentials to connect to the mongodb database
(not sharing them publicly on the repo for obvious reasons...)
## Returns
pymongo.database.Database
Database object to query the mongodb database
"""
# the credentials are stored in a json file
if creds_json_file_path is None:
creds_json_file_path = Path().cwd() / 'creds' / 'mongodb_creds.json'
connection_string = read_json_file_to_dict(creds_json_file_path)['connection_string']
# Create a new client and connect to the server
client = pymongo.MongoClient(connection_string, server_api=pymongo.server_api.ServerApi('1'))
db = client.statsbomb
return db
def download_collection_to_list_of_dicts(collection:pymongo.collection.Collection) -> List[dict]:
"""
This function downloads a collection from the mongodb database and returns it as a list of dictionaries.
which can then be processed into a pandas dataframe.
## Parameters
collection: pymongo.collection.Collection
Collection to download from the mongodb database
## Returns
List[dict]
List of dictionaries with the contents of the collection
"""
docs_cursor = collection.find({}, {'_id': False})
return list(docs_cursor)
if __name__ == '__main__':
db = connect_to_mongo_database()
competitions_collection = db.competitions
docs_list = download_collection_to_list_of_dicts(competitions_collection)
print(type(docs_list[0]))