# pip3 install lxml pymysql httplib2 wget pandas requests html5lib
以下是程式碼
download_hostory.py: 下載證交所上市/上櫃歷史交易資料的主程式, 這裡使用 thread 是因為測試發現 python 把記憶體吃光時會導致 NAS 直接關機
#!/bin/python3
import gstock
import datetime as dt
from datetime import timedelta
import time
import threading
def downloadJob(downloadDate):
gstock.downloadTWSE_CSV(downloadDate)
gstock.downloadOTC_CSV(downloadDate)
time.sleep(6)
#--- main() ---
startDate = dt.date(2004, 2, 12)
stopDate = dt.date.today()
downloadDate = startDate
while downloadDate<=stopDate:
t = threading.Thread(target = downloadJob, args = (downloadDate,))
t.start()
t.join()
downloadDate = downloadDate + timedelta(days=1)
|
download_today.py :下載上市/上櫃/期貨/選擇權的每日排程
#!/bin/python3 import gstock import datetime as dt import time from datetime import timedelta import threading def downloadJob(downloadDate): gstock.downloadTWSE_CSV(downloadDate) gstock.downloadOTC_CSV(downloadDate) gstock.downloadFuture_ZIP(downloadDate) gstock.downloadOP_ZIP(downloadDate) time.sleep(6) downloadDate = dt.date.today() #- timedelta(days=1) downloadJob(downloadDate) |
gstock.py: 下載程式庫
import datetime as dt import time from datetime import timedelta import pandas as pd import requests from urllib.parse import urlencode import wget #common variables rootPath = '/volume1/homes/ghosty/taifex/' #rootPath = '~/taifex/' def date_otc(date): year = date.year-1911 month = date.month day = date.day twday = '{}/{:02}/{:02}'.format(year,month,day) return twday def date_twse(date): year = date.year month = date.month day = date.day #twday = '{}/{:02}/{:02}'.format(year,month,day) #old twday = '{}{:02}{:02}'.format(year,month,day) return twday def date_taifex(date): year = date.year month = date.month day = date.day #twday = '{}/{:02}/{:02}'.format(year,month,day) #old twday = '{}_{:02}_{:02}'.format(year,month,day) return twday def date_file(date): year = date.year month = date.month day = date.day #twday = '{}/{:02}/{:02}'.format(year,month,day) #old twday = '{}_{:02}_{:02}'.format(year,month,day) return twday def writelog(msg): logfile = rootPath+"download.log" now = dt.datetime.today().strftime("%Y/%m/%d %H:%M:%S") fp = open(logfile, "a+") fp.write(now+" "+msg+"\n") fp.close() # -------------------------------- # TWSE & OTC download functions # -------------------------------- def downloadTWSE_CSV(date): url="https://www.twse.com.tw/exchangeReport/MI_INDEX" values = {'response' : 'csv', 'date' : date_twse(date), 'type' : 'ALL' } agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0' headers = {'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent': agent} response = requests.post(url, headers = headers, data = values) if (len(response.text)>0): outfile = rootPath+'TWSE/TWSE_'+date_file(date)+'.CSV' lines = response.text.split('\n') fp=open(outfile,"wb") #中文須以 binary mode 寫入 for line in lines: bin = bytearray() bin.extend(line.encode()) #將字串轉為binary fp.write(bin) fp.close() writelog('Write '+outfile) else: writelog('TWSE '+date_file(date)+' No data') def downloadOTC_CSV(date): url='http://www.tpex.org.tw/web/stock/aftertrading/otc_quotes_no1430/stk_wn1430_print.php?l=zh-tw&d='+date_otc(date)+'&se=EW' table = pd.read_html(url)[0] if (len(table)>2): table = table[0:len(table)-1] outfile = rootPath+'OTC/OTC_'+date_file(date)+'.CSV' table.to_csv(outfile, index=False) writelog('Write '+outfile) else: writelog('OTC '+date_file(date)+' No data') def downloadFuture_ZIP(date): url="https://www.taifex.com.tw/file/taifex/Dailydownload/Dailydownload/Daily_" + date_taifex(date) + ".zip" outfile = rootPath+'future_rpt/Daily_'+date_taifex(date)+'.zip' wget.download(url, out=outfile) writelog('Write '+outfile) def downloadOP_ZIP(date): url="https://www.taifex.com.tw/file/taifex/Dailydownload/OptionsDailydownload/OptionsDaily_" + date_taifex(date) + ".zip" outfile = rootPath+'option_rpt/OptionsDaily_'+date_taifex(date)+'.zip' wget.download(url, out=outfile) writelog('Write '+outfile) |
沒有留言:
張貼留言