>>> import pandas as pd
>>> file_path = './qna.csv'
>>> df = pd.read_csv(file_path, encoding='ms949')
>>> print(df)
ITEM_CD ... CONTENTS
0 62938619 ... 88사이즈 밖에 없는데 99사이즈도 입어도 되나요 많이 넉넉하게 나왔나요 선물 할...
1 62491750 ... 다음방송은 언제인가요?
2 62006040 ... 44사이즈도 만들어 주세요\n바지는 정말~~~~예쁘고 입고 싶은 스탈인데 55사이...
pandas.read_excel(file)
>>> df = pd.read_excel('./django.xlsx', header=1)
>>> print(df)
Django 프로젝트 간트차트 Unnamed: 1 Unnamed: 2 ... Unnamed: 72 Unnamed: 73 Unnamed: 74
0 작성자 : 정다혜 NaN NaN ... NaN NaN NaN
1 작성일 : 2019년 11월 22일 (금) NaN NaN ... NaN NaN NaN
2 NaN NaN NaN ... NaN NaN NaN
3 NaN 작업 이름 기간 ... NaN 2019년 11월 24일 NaN
4 NaN NaN NaN ... 23.0 24 25.0
5 1 사전공부 21 일 ... NaN NaN NaN
6 2 Django + python 공부 11 일 ... NaN NaN NaN
7 3 Vue.js + javascirpt 공부 6 일 ... NaN NaN NaN
>>> df = pd.read_excel('./django.xlsx')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/io/excel/_base.py", line 304, in read_excel
io = ExcelFile(io, engine=engine)
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/io/excel/_base.py", line 824, in __init__
self._reader = self._engines[engine](self._io)
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/io/excel/_xlrd.py", line 20, in __init__
import_optional_dependency("xlrd", extra=err_msg)
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/compat/_optional.py", line 92, in import_optional_dependency
raise ImportError(msg) from None
ImportError: Missing optional dependency 'xlrd'. Install xlrd >= 1.0.0 for Excel support Use pip or conda to install xlrd.
$ pip install xlrd
pandas.read_json(file)
>>> df = pd.read_json('./itemInfo.json')
>>> print(df)
code serialID resCode resMsg result description status errorCode message
benefitInfo 1 20200429114333 {'noInterestMonth': 10, 'isFreeDelivery': True} NaN 200 NaN
cateInfo 1 20200429114333 {'llargeCategoryId': 'G00007', 'llargeCategory... NaN 200 NaN
channelCode 1 20200429114333 50001001 NaN 200 NaN
detailInfo 1 20200429114333 {'itemType': 'CJMALL', 'slPrc': 188000, 'clpSl... NaN 200 NaN
exceptionCode 1 20200429114333 00 NaN 200 NaN
exceptionMsg 1 20200429114333 정상 NaN 200 NaN
imagesInfo 1 20200429114333 {'timeStamp': 1567555709718, 'itemImages': ['/... NaN 200 NaN
itemButton 1 20200429114333 {'giftDeliv': False, 'type': 'BUY', 'cart': Fa... NaN 200 NaN
marketingBanners 1 20200429114333 [] NaN 200 NaN
mobilePlus 1 20200429114333 0 NaN 200 NaN
shareEventInfo 1 20200429114333 None NaN 200 NaN
videoInfo 1 20200429114333 {'internetLive': None, 'broadcastItemType': 'E... NaN 200 NaN
pandas.read_html(url or htmlfile)
>>> url = './corona.html'
>>> tables = pd.read_html(url)
>>> print(len(tables))
4
>>> for i in range(len(tables)):
... print("tables[%s]" % i)
... print(tables[i])
... print("\n")
...
tables[0]
지역 확진환자수 비율
0 대구 6856 63.5%
1 경북 1366 12.6%
2 경기 680 6.3%
3 서울 637 5.9%
4 검역 458 4.2%
5 충남 143 1.3%
6 부산 138 1.2%
7 경남 117 1.0%
8 인천 95 0.8%
9 강원 53 0.4%
tables[1]
지역 확진환자수 비율
0 세종 46 0.4%
1 충북 45 0.4%
2 울산 43 0.4%
3 대전 40 0.3%
4 광주 30 0.2%
5 전북 18 0.1%
6 전남 15 0.1%
7 제주 13 0.1%
tables[2]
국가 총확진자 실질확진자 사망률(수)
0 미국 113011529918 888912 5.9%(66,224)
1 스페인 2165821366 74234 11.6%(25,100)
2 이탈리아 2093281900 100704 13.7%(28,710)
3 영국 1822604806 154225 15.4%(28,131)
4 독일 164967890 29155 4.1%(6,812)
5 프랑스 130979794 92496 18.9%(24,760)
6 터키 1243751983 62780 2.7%(3,336)
7 러시아 1240549623 107819 1.0%(1,222)
tables[3]
국가 총확진자 실질확진자 사망률(수)
0 브라질 965594970 49402 7.0%(6,750)
1 이란 96448802 12942 6.4%(6,156)
2 중국 828772 557 5.6%(4,633)
3 캐나다 567143057 30428 6.3%(3,566)
4 벨기에 49517485 29541 15.7%(7,765)
5 페루 425342075 28900 2.8%(1,200)
6 네덜란드 40236445 35249 12.4%(4,987)
7 인도 35776733 27557 3.4%(1,223)
>>> df = tables[1]
>>> df
지역 확진환자수 비율
0 세종 46 0.4%
1 충북 45 0.4%
2 울산 43 0.4%
3 대전 40 0.3%
4 광주 30 0.2%
5 전북 18 0.1%
6 전남 15 0.1%
7 제주 13 0.1%
>>> df.set_index(['지역'], inplace=True)
>>> df
확진환자수 비율
지역
세종 46 0.4%
충북 45 0.4%
울산 43 0.4%
대전 40 0.3%
광주 30 0.2%
전북 18 0.1%
전남 15 0.1%
제주 13 0.1%
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/io/html.py", line 1100, in read_html
displayed_only=displayed_only,
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/io/html.py", line 891, in _parse
parser = _parser_dispatch(flav)
File "/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/pandas/io/html.py", line 848, in _parser_dispatch
raise ImportError("lxml not found, please install it")
ImportError: lxml not found, please install it
$ pip install lxml
$ pip install beautifulsoup4
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
url = "https://en.wikipedia.org/wiki/List_of_American_exchange-traded_funds"
resp = requests.get(url, verify=False)
soup = BeautifulSoup(resp.text, 'lxml')
rows = soup.select('div > ul > li')
etfs = {}
for row in rows:
try:
etf_name = re.findall('^(.*) \(NYSE', rows[10].text)
etf_market = re.findall('\((.*)\|', row.text)
etf_ticker = re.findall('NYSE Arca\|(.*)\)', row.text)
if(len(etf_ticker)>0) & (len(etf_market)>0):
etfs[etf_ticker[0]] = [etf_market[0], etf_name[0]]
except AttributeError as err:
pass
print(etfs)
{'ITOT': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'IWV': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'SCHB': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'FNDB': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'VT': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'VTI': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'VXUS': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'VTHR': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'DIA': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'RSP': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'IOO': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'IVV': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'SPY': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'VOO': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'IWM': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'OEF': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'CVY': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'RPG': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'RPV': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'IWB': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'PKW': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'PRF': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'SPLV': ['NYSE Arca', 'iShares Core S&P Total US Stock Mkt'], 'SCHX': ['NYSE Arca', 'iShares Core S&P Total U ...']}
df = pd.DataFrame(etfs)
print(df)
ITOT IWV SCHB FNDB ... ICB RRF USDU WDTI
0 NYSE Arca NYSE Arca NYSE Arca NYSE Arca ... NYSE Arca NYSE Arca NYSE Arca NYSE Arca
1 iShares Core S&P Total US Stock Mkt iShares Core S&P Total US Stock Mkt iShares Core S&P Total US Stock Mkt iShares Core S&P Total US Stock Mkt ... iShares Core S&P Total US Stock Mkt iShares Core S&P Total US Stock Mkt iShares Core S&P Total US Stock Mkt iShares Core S&P Total US Stock Mkt
requests.exceptions.SSLError: HTTPSConnectionPool(host='en.wikipedia.org', port=443): Max retries exceeded with url: /wiki/List_of_American_exchange-traded_funds (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1051)')))
>>> resp = requests.get(url, verify=False)
/Users/jeongdaye/.pyenv/versions/pandas/lib/python3.7/site-packages/urllib3/connectionpool.py:986: InsecureRequestWarning: Unverified HTTPS request is being made to host 'en.wikipedia.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
InsecureRequestWarning,