這個程式從每日成交資訊讀取股市代號, 然後從yahoo下載公司基本資料,存成CSV檔,例如1101台泥:https://tw.stock.yahoo.com/d/s/company_1101.html
附註:ETF、權證、特別股等,沒有公司基本資料可下載
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 11 17:11:09 2017
@author: ghosty
"""
import csv
import ast
import httplib2
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import pandas as pd
import datetime
from datetime import timedelta
ProfileTitle = ['股票代碼', '股票名稱', '產業類別', \
'104年現金股利', '104年股票股利', '104年盈餘配股', '104年公積配股', \
'成立時間', '上市(櫃)時間', \
'董事長', '總經理', '發言人', '股本', '營收比重', \
'營業毛利率', '營業利益率', '稅前淨利率', '資產報酬率', '股東權益報酬率', '每股淨值', \
'Y105Q3盈餘', 'Y105Q2盈餘', 'Y105Q1盈餘', 'Y104Q4盈餘', \
'Y105前3季盈餘'
]
def getProfile(stockID,stockName):
url = 'https://tw.stock.yahoo.com/d/s/company_'+stockID+'.html'
conn = httplib2.Http(cache=None)
headers = {'Content-type': 'application/x-www-form-urlencoded',
'Accept':
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#'User-Agent':'Mozilla/5.0
(Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'} #windows
#'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101
Firefox/10.0'} #Linux
'User-Agent':'Mozilla/5.0 (Android; Mobile; rv:40.0) Gecko/40.0
Firefox/40.0'} #android phone
resp, doc = conn.request(url, method='GET', body=None,
headers=headers)
#docStr = str(doc.decode('cp950'));
soup = BeautifulSoup(doc, 'html.parser')
try:
table1 = soup.findAll(text='基 本 資 料')[0].parent.parent.parent
table2 = soup.findAll(text='營業毛利率')[0].parent.parent.parent
category = table1.select('tr')[1].select('td')[1].text.strip()
Y104cashshare = table1.select('tr')[1].select('td')[3].text.strip("元")
Y104stockshare =
table1.select('tr')[2].select('td')[3].text.strip("元")
Y104earnshare =
table1.select('tr')[3].select('td')[3].text.strip("元")
Y104remainshare =
table1.select('tr')[4].select('td')[3].text.strip("元")
setupDate =
table1.select('tr')[2].select('td')[1].text.strip().split("/")
setupDate[0] = int(setupDate[0])+1911
setupDate=str(setupDate[0])+'/'+setupDate[1]+'/'+setupDate[2]
onboardDate = table1.select('tr')[3].select('td')[1].text.strip().split("/")
onboardDate[0] = int(onboardDate[0])+1911
onboardDate =
str(onboardDate[0])+'/'+onboardDate[1]+'/'+onboardDate[2]
chairman = table1.select('tr')[4].select('td')[1].text.strip()
manager = table1.select('tr')[5].select('td')[1].text.strip()
speaker = table1.select('tr')[6].select('td')[1].text.strip()
capital = table1.select('tr')[7].select('td')[1].text.strip("億")
product = table1.select('tr')[10].select('td')[1].text.strip().strip('(2015年)').strip()
grossprofit =
table2.select('tr')[1].select('td')[1].text.strip()
netprofit = table2.select('tr')[2].select('td')[1].text.strip()
taxprofit = table2.select('tr')[3].select('td')[1].text.strip()
rate = table2.select('tr')[4].select('td')[1].text.strip()
Y105Q3 =
table2.select('tr')[1].select('td')[3].text.strip().strip("元")
Y105Q2 =
table2.select('tr')[2].select('td')[3].text.strip().strip("元")
Y105Q1 =
table2.select('tr')[3].select('td')[3].text.strip().strip("元")
Y104Q4 =
table2.select('tr')[4].select('td')[3].text.strip().strip("元")
earn = table2.select('tr')[5].select('td')[1].text.strip()
netvalue = table2.select('tr')[5].select('td')[2].text.strip("每股淨值:").strip().strip("元")
yearEarn = ast.literal_eval(Y105Q3) + ast.literal_eval(Y105Q2) +
ast.literal_eval(Y105Q1)
result = list([stockID, stockName, category , \
Y104cashshare, Y104stockshare,
Y104earnshare, Y104remainshare, \
setupDate, onboardDate, \
chairman, manager, speaker,
capital, product, \
grossprofit, netprofit,
taxprofit, rate, earn, netvalue, \
Y105Q3, Y105Q2, Y105Q1,
Y104Q4, \
"{:0.2f}".format(yearEarn) \
])
except:
result = [stockID, stockName, 'access fail']
#print('result=',result)
return result
#main
startTime = datetime.datetime.now()
csvfile = open('TwStockList.csv',
newline='\n')
next(csvfile, None) #skip header line
stockList = csv.reader(csvfile,
delimiter=',')
listProfile=[ProfileTitle]
for row in stockList:
result = getProfile(row[0],row[1])
print(result)
listProfile.append(result)
#break #test once
#save result
f =
open("TwStockListProfile.csv","w")
w = csv.writer(f, lineterminator='\n')
w.writerows(listProfile)
f.close()
#performance calculation
stopTime = datetime.datetime.now()
elapsedTime = stopTime - startTime
print('start time=',startTime)
print('stop time=',stopTime)
print('elapsed =',elapsedTime)
|
沒有留言:
張貼留言