2017年3月12日 星期日

[python] 從台灣yahoo下載公司的基本資料



這個程式從每日成交資訊讀取股市代號, 然後從yahoo下載公司基本資料,存成CSV檔,例如1101台泥https://tw.stock.yahoo.com/d/s/company_1101.html
附註ETF權證特別股等沒有公司基本資料可下載


# -*- coding: utf-8 -*-
"""
Created on Sat Mar 11 17:11:09 2017

@author: ghosty
"""
import csv
import ast
import httplib2
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import pandas as pd
import datetime
from datetime import timedelta
                 
ProfileTitle =  ['股票代碼', '股票名稱', '產業類別', \
                  '104年現金股利', '104年股票股利', '104年盈餘配股', '104年公積配股', \
                  '成立時間', '上市()時間', \
                  '董事長', '總經理', '發言人', '股本', '營收比重', \
                  '營業毛利率', '營業利益率', '稅前淨利率', '資產報酬率', '股東權益報酬率', '每股淨值', \
                  'Y105Q3盈餘', 'Y105Q2盈餘', 'Y105Q1盈餘', 'Y104Q4盈餘', \
                  'Y1053季盈餘'
                 ]
                
def getProfile(stockID,stockName):
    url = 'https://tw.stock.yahoo.com/d/s/company_'+stockID+'.html'
    conn = httplib2.Http(cache=None)
    headers = {'Content-type': 'application/x-www-form-urlencoded',
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
           #'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'}      #windows
           #'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0'}      #Linux
           'User-Agent':'Mozilla/5.0 (Android; Mobile; rv:40.0) Gecko/40.0 Firefox/40.0'} #android phone
    resp, doc = conn.request(url, method='GET', body=None, headers=headers)
    #docStr = str(doc.decode('cp950'));     
    soup = BeautifulSoup(doc, 'html.parser')
    try:
        table1 = soup.findAll(text=' ')[0].parent.parent.parent
        table2 = soup.findAll(text='營業毛利率')[0].parent.parent.parent       
        category = table1.select('tr')[1].select('td')[1].text.strip()
        Y104cashshare = table1.select('tr')[1].select('td')[3].text.strip("")
        Y104stockshare = table1.select('tr')[2].select('td')[3].text.strip("")
        Y104earnshare = table1.select('tr')[3].select('td')[3].text.strip("")
        Y104remainshare = table1.select('tr')[4].select('td')[3].text.strip("")
        setupDate = table1.select('tr')[2].select('td')[1].text.strip().split("/")
        setupDate[0] = int(setupDate[0])+1911
        setupDate=str(setupDate[0])+'/'+setupDate[1]+'/'+setupDate[2]
        onboardDate = table1.select('tr')[3].select('td')[1].text.strip().split("/")
        onboardDate[0] = int(onboardDate[0])+1911
        onboardDate = str(onboardDate[0])+'/'+onboardDate[1]+'/'+onboardDate[2]
        chairman = table1.select('tr')[4].select('td')[1].text.strip()
        manager = table1.select('tr')[5].select('td')[1].text.strip()
        speaker = table1.select('tr')[6].select('td')[1].text.strip()
        capital = table1.select('tr')[7].select('td')[1].text.strip("")
        product = table1.select('tr')[10].select('td')[1].text.strip().strip('(2015)').strip()
        grossprofit  = table2.select('tr')[1].select('td')[1].text.strip()
        netprofit = table2.select('tr')[2].select('td')[1].text.strip()
        taxprofit = table2.select('tr')[3].select('td')[1].text.strip()
        rate = table2.select('tr')[4].select('td')[1].text.strip()
        Y105Q3 = table2.select('tr')[1].select('td')[3].text.strip().strip("")
        Y105Q2 = table2.select('tr')[2].select('td')[3].text.strip().strip("")
        Y105Q1 = table2.select('tr')[3].select('td')[3].text.strip().strip("")
        Y104Q4 = table2.select('tr')[4].select('td')[3].text.strip().strip("")       
        earn = table2.select('tr')[5].select('td')[1].text.strip()
        netvalue = table2.select('tr')[5].select('td')[2].text.strip("每股淨值:").strip().strip("")
        yearEarn = ast.literal_eval(Y105Q3) + ast.literal_eval(Y105Q2) + ast.literal_eval(Y105Q1)

        result = list([stockID, stockName, category , \
                  Y104cashshare, Y104stockshare, Y104earnshare, Y104remainshare, \
                  setupDate, onboardDate, \
                  chairman, manager, speaker, capital, product, \
                  grossprofit, netprofit, taxprofit, rate,  earn, netvalue, \
                  Y105Q3, Y105Q2, Y105Q1, Y104Q4, \
                  "{:0.2f}".format(yearEarn) \
                 ])
    except:
        result = [stockID, stockName, 'access fail']
       
    #print('result=',result)
    return result
   
#main
startTime = datetime.datetime.now()
csvfile = open('TwStockList.csv', newline='\n')
next(csvfile, None) #skip header line
stockList = csv.reader(csvfile, delimiter=',')

                
listProfile=[ProfileTitle]
for row in stockList: 
    result = getProfile(row[0],row[1])
    print(result)
    listProfile.append(result)
    #break  #test once

#save result
f = open("TwStockListProfile.csv","w")
w = csv.writer(f, lineterminator='\n')
w.writerows(listProfile)
f.close()

#performance calculation
stopTime =  datetime.datetime.now()
elapsedTime = stopTime - startTime
print('start time=',startTime)
print('stop  time=',stopTime)
print('elapsed =',elapsedTime)  



沒有留言:

張貼留言