-
Notifications
You must be signed in to change notification settings - Fork 0
/
bithumb_listings.py
127 lines (101 loc) · 5.38 KB
/
bithumb_listings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import requests
from bs4 import BeautifulSoup
import re
from datetime import datetime, timedelta
import time
import random
def parse_korean_date(date_string):
date_parts = re.findall(r'\d+', date_string)
return datetime(int(date_parts[0]), int(date_parts[1]), int(date_parts[2]))
def get_random_user_agent():
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36"
]
return random.choice(user_agents)
def get_headers():
return {
"User-Agent": get_random_user_agent(),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Cache-Control": "max-age=0",
}
def fetch_bithumb_listings():
url = "https://feed.bithumb.com/notice"
one_year_ago = datetime.now() - timedelta(days=365)
listing_notices = []
page = 1
old_notice_count = 0
max_old_notices = 5 # Stop after encountering 5 consecutive old notices
max_pages = 10 # Scrape a maximum of 10 pages
while page <= max_pages:
headers = get_headers()
response = requests.get(f"{url}?page={page}", headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
notices = soup.select('li[class^="ContentList_notice-list"]')
print(f"--- Page {page} ---")
print(f"Found {len(notices)} notices")
for i, notice in enumerate(notices, 1):
print(f"\nNotice {i}:")
print(notice.prettify())
time.sleep(random.uniform(1, 3))
title = notice.select_one('strong[class^="ContentList_title"]').text.strip()
date_string = notice.select_one('span[class^="ContentList_date"]').text.strip()
notice_date = parse_korean_date(date_string)
if notice_date < one_year_ago:
old_notice_count += 1
if old_notice_count >= max_old_notices:
return listing_notices
continue
else:
old_notice_count = 0 # Reset counter
if re.search(r'(상장|추가|신규)', title):
link = "https://feed.bithumb.com" + notice.find('a')['href']
detail_response = requests.get(link, headers=get_headers())
detail_soup = BeautifulSoup(detail_response.content, 'html.parser')
content = detail_soup.find('div', class_='notice_content').text
coin_match = re.search(r'$$(.*?)$$', title)
coin = coin_match.group(1) if coin_match else "Unknown"
deposit_time = re.search(r'입금\s*:\s*([\d]{4}년\s*[\d]{1,2}월\s*[\d]{1,2}일\s*[\d]{2}:[\d]{2})', content)
withdrawal_time = re.search(r'출금\s*:\s*([\d]{4}년\s*[\d]{1,2}월\s*[\d]{1,2}일\s*[\d]{2}:[\d]{2})', content)
trading_time = re.search(r'거래\s*:\s*([\d]{4}년\s*[\d]{1,2}월\s*[\d]{1,2}일\s*[\d]{2}:[\d]{2})', content)
listing_notices.append({
'title': title,
'link': link,
'date': date_string,
'coin': coin,
'deposit_time': deposit_time.group(1) if deposit_time else "Not specified",
'withdrawal_time': withdrawal_time.group(1) if withdrawal_time else "Not specified",
'trading_time': trading_time.group(1) if trading_time else "Not specified"
})
print(f"Processed notice: {title}")
if not notices:
print("No more notices found, stopping scraping")
break
page += 1
return listing_notices
def save_to_markdown(listings):
today = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
filename = f"bithumb_listings_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
with open(filename, 'w', encoding='utf-8') as f:
f.write(f"# Bithumb Listing Announcements (Generated: {today})\n\n")
for listing in listings:
f.write(f"## {listing['title']}\n\n")
f.write(f"- Publication Date: {listing['date']}\n")
f.write(f"- Coin: {listing['coin']}\n")
f.write(f"- Deposit Opening Time: {listing['deposit_time']}\n")
f.write(f"- Withdrawal Opening Time: {listing['withdrawal_time']}\n")
f.write(f"- Trading Start Time: {listing['trading_time']}\n")
f.write(f"- Link: [{listing['link']}]({listing['link']})\n\n")
f.write(f"\n\n*This document was automatically generated on {today}*")
print(f"Results saved to {filename}")
print("Starting to scrape Bithumb announcements...")
listings = fetch_bithumb_listings()
print(f"Scraped {len(listings)} relevant announcements")
save_to_markdown(listings)
print("Scraping completed")