2020年3月5日 星期四

使用 Synology 下載證交所交易資料

在前一篇安裝 pip3 後, 即可 ssh 進入來執行 python 程式. 本篇使用 python 來下載證交所資料, 先安裝 package 供未來使用:

# pip3 install lxml pymysql httplib2 wget pandas requests html5lib

以下是程式碼


download_hostory.py: 下載證交所上市/上櫃歷史交易資料的主程式, 這裡使用 thread 是因為測試發現 python 把記憶體吃光時會導致 NAS 直接關機


#!/bin/python3

import gstock
import datetime as dt
from datetime import timedelta
import time
import threading

def downloadJob(downloadDate):
        gstock.downloadTWSE_CSV(downloadDate)
        gstock.downloadOTC_CSV(downloadDate)
        time.sleep(6)
       
#--- main() ---
startDate = dt.date(2004, 2, 12)
stopDate = dt.date.today()
downloadDate = startDate
while downloadDate<=stopDate:   
        t = threading.Thread(target = downloadJob, args = (downloadDate,))
        t.start()
        t.join()
        downloadDate = downloadDate + timedelta(days=1) 


download_today.py :下載上市/上櫃/期貨/選擇權的每日排程


#!/bin/python3

import gstock
import datetime as dt
import time
from datetime import timedelta
import threading

def downloadJob(downloadDate):
    gstock.downloadTWSE_CSV(downloadDate)
    gstock.downloadOTC_CSV(downloadDate)
    gstock.downloadFuture_ZIP(downloadDate)
    gstock.downloadOP_ZIP(downloadDate)
    time.sleep(6)

downloadDate = dt.date.today() #- timedelta(days=1)
downloadJob(downloadDate)

gstock.py: 下載程式庫


import datetime as dt
import time
from datetime import timedelta
import pandas as pd
import requests
from urllib.parse import urlencode
import wget

#common variables

rootPath = '/volume1/homes/ghosty/taifex/'
#rootPath = '~/taifex/'

def date_otc(date):
    year  = date.year-1911
    month = date.month
    day   = date.day
    twday = '{}/{:02}/{:02}'.format(year,month,day)
    return twday

def date_twse(date):
    year  = date.year
    month = date.month
    day   = date.day
    #twday = '{}/{:02}/{:02}'.format(year,month,day)  #old
    twday = '{}{:02}{:02}'.format(year,month,day)
    return twday
  
def date_taifex(date):
    year  = date.year
    month = date.month
    day   = date.day
    #twday = '{}/{:02}/{:02}'.format(year,month,day)  #old
    twday = '{}_{:02}_{:02}'.format(year,month,day)
    return twday
  
def date_file(date):
    year  = date.year
    month = date.month
    day   = date.day
    #twday = '{}/{:02}/{:02}'.format(year,month,day)  #old
    twday = '{}_{:02}_{:02}'.format(year,month,day)
    return twday
  
def writelog(msg):  
    logfile = rootPath+"download.log"
    now = dt.datetime.today().strftime("%Y/%m/%d %H:%M:%S")
    fp = open(logfile, "a+")
    fp.write(now+" "+msg+"\n")
    fp.close()
  
# --------------------------------
# TWSE & OTC download functions  
# --------------------------------   
def downloadTWSE_CSV(date):
    url="https://www.twse.com.tw/exchangeReport/MI_INDEX"
    values = {'response' : 'csv', 'date' : date_twse(date), 'type' : 'ALL' }        
    agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'
    headers = {'Content-type': 'application/x-www-form-urlencoded',
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
           'User-Agent': agent}
    response = requests.post(url, headers = headers, data = values)
    if (len(response.text)>0):
        outfile = rootPath+'TWSE/TWSE_'+date_file(date)+'.CSV'
        lines = response.text.split('\n')
        fp=open(outfile,"wb") #中文須以 binary mode 寫入
        for line in lines:
            bin = bytearray()
            bin.extend(line.encode()) #將字串轉為binary
            fp.write(bin)
        fp.close()
        writelog('Write '+outfile)
          
    else:
        writelog('TWSE '+date_file(date)+' No data')  
  
def downloadOTC_CSV(date):
    url='http://www.tpex.org.tw/web/stock/aftertrading/otc_quotes_no1430/stk_wn1430_print.php?l=zh-tw&d='+date_otc(date)+'&se=EW'
    table = pd.read_html(url)[0]
    if (len(table)>2):
        table = table[0:len(table)-1]
        outfile = rootPath+'OTC/OTC_'+date_file(date)+'.CSV'
        table.to_csv(outfile, index=False)
        writelog('Write '+outfile)
    else:
        writelog('OTC '+date_file(date)+' No data')
      
def downloadFuture_ZIP(date):
    url="https://www.taifex.com.tw/file/taifex/Dailydownload/Dailydownload/Daily_" + date_taifex(date) + ".zip"
    outfile = rootPath+'future_rpt/Daily_'+date_taifex(date)+'.zip'
    wget.download(url, out=outfile)   
    writelog('Write '+outfile)
              
def downloadOP_ZIP(date):
    url="https://www.taifex.com.tw/file/taifex/Dailydownload/OptionsDailydownload/OptionsDaily_" + date_taifex(date) + ".zip"
    outfile = rootPath+'option_rpt/OptionsDaily_'+date_taifex(date)+'.zip'
    wget.download(url, out=outfile) 
    writelog('Write '+outfile)

沒有留言:

張貼留言