β» μμΈ μ΄λ¦°λ°μ΄ν° κ΄μ₯ ννμ΄μ§(https://data.seoul.go.kr/) λ₯Ό ν΅ν΄ μ€μ΅νμ΅λλ€
β μμΈ μ΄λ¦°λ°μ΄ν° κ΄μ₯ - API λ°κΈ λ° λ°μ΄ν° μμ§
π ν΅ν©κ²μ - λΆλμ° μ μμΈκ°
π Open API
π μΈμ¦ν€ μ μ²
π μΈμ¦ν€ λ°κΈ
π λ°κΈλ μΈμ¦ν€ λ₯Ό λ³΅μ¬ ν api μ νμ©ν μ μλ€
π μν URL μμ / μμ²μΈμ / μΆλ ₯κ° νμΈ
π Open API νΈμΆ ν λμ€λ μΆλ ₯κ°μ νμΈν μ μλ€.
π List_total_count κ° 1,000μ΄ λμ κ²½μ°, Open APIλ 1νμ 1,000건μ λμ μ μμΌλ―λ‘ λΆλ¦¬ν΄μ νΈμΆ (λ°λ³΅λ¬Έ νμ!!)
π μνλ 쑰건μ λ§μΆ° μν ν μ€νΈ
π μμμ λ§μΆ°μ μ λ ₯ν΄μΌ μνλ μ λ³΄κ° λμ΄
π μ¬μ©νκ³ μ νλ μΈμ΄μ λ§κ² κ°μ΄λλ₯Ό λ€μ΄λ‘λ λ°λλ€
π Data νν - xml / json
π ννλ xml / json μ€μ μ νν ν, μν URL νμμ λ§κ² μμ±νλ€
π μ΄ν, Google Colab μ μ μ©νμ¬ api ν¬λ‘€λ§μ μ§ννλ€
π λ°μ΄ν° νμ© λ°©μ
Data λ³ν (xml, json) π pandasμ dataframe ννλ‘ λ³ν π csv νμΌ λ§λ€κΈ° π DB(database) μ μ μ₯
β Google Colab μ URL μ μ©νκΈ°
π μμΈμ λΆλμ° XML API μμ
π λΌμ΄λΈλ¬λ¦¬ λΆλ¬μ€κΈ°
# library
import lxml
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import math
service_key = 'λ°κΈλ°μ μΈμ¦ ν€'
url = f'http://openapi.seoul.go.kr:8088/{service_key}/xml/tbLnOpendataRtmsV/1/5/'
print(url)
π μΈμ¦ ν€ κ΄λ¦¬ μ£ΌμνκΈ°! (κ°λ° μ λ ΈμΆνμ§ λ§ κ²)
π API μμ² νμΈνκΈ°
response = requests.get(url)
print(response.content)
π XML ννλ‘ λ³ν (Parsing)
soup = BeautifulSoup(response.content, "lxml") # XML Parsing
print(soup)
π XML ννλ₯Ό Pandas DataFrameμΌλ‘ λ³ν
# μΆλ ₯κ° λ³μ μ€μ
years = soup.find_all('acc_year') # μ μλ
μ
sgg_cds = soup.find_all('sgg_cd') # μμΉκ΅¬μ½λ
sgg_nms = soup.find_all('sgg_nm') # μμΉκ΅¬λͺ
bjdong_cds = soup.find_all('bjdong_cd') # λ²μ λμ½λ
bjdong_nms = soup.find_all('bjdong_nm') # λ²μ λλͺ
land_gbns = soup.find_all('land_gbn') # μ§λ²κ΅¬λΆ
land_gbn_nms = soup.find_all('land_gbn_nm') # μ§λ²κ΅¬λΆλͺ
land_gbn_nms = soup.find_all('land_gbn_nm') # μ§λ²κ΅¬λΆλͺ
bonbeons = soup.find_all('bonbeon') # λ³Έλ²
bubeons = soup.find_all('bubeon') # λΆλ²
bldg_nms = soup.find_all('bldg_nm') # 건물λͺ
deal_ymds = soup.find_all('deal_ymd') # κ³μ½μΌ
obj_amts = soup.find_all('obj_amt') # 물건κΈμ‘(λ§μ)
bldg_areas = soup.find_all('bldg_area') # 건물면μ (γ‘)
tot_areas = soup.find_all('tot_area') # ν μ§λ©΄μ (γ‘)
floors = soup.find_all('floor') # μΈ΅
right_gbns = soup.find_all('right_gbn') # κΆλ¦¬κ΅¬λΆ
cntl_ymds = soup.find_all('cntl_ymd') # μ·¨μμΌ
build_years = soup.find_all('build_years') # 건μΆλ
λ
house_types = soup.find_all('house_type') # 건물μ©λ
req_gbn = soup.find_all('req_gbn') # μ κ³ κ΅¬λΆ
rdealer_lawdnms = soup.find_all('rdealer_lawdnm') # μ κ³ ν κ°μ
곡μΈμ€κ°μ¬ μꡰꡬλͺ
# λ°λ³΅λ¬Έ νμ©
year_list = []
sgg_cd_list = []
bldg_nm_list = []
obj_amt_list = []
house_type_list = []
rdealer_lawdnm_list = []
for year, sgg_cd, bldg_nm, obj_amt, house_type, rdealer_lawdnm in zip(years, sgg_cds, bldg_nms, obj_amts, house_types, rdealer_lawdnms):
year_list.append(year.get_text())
sgg_cd_list.append(sgg_cd.get_text())
bldg_nm_list.append(bldg_nm.get_text())
obj_amt_list.append(obj_amt.get_text())
house_type_list.append(house_type.get_text())
rdealer_lawdnm_list.append(rdealer_lawdnm.get_text())
df = pd.DataFrame({
"acc_year": year_list,
"sgg_cd": sgg_cd_list,
"bldg_nm" : bldg_nm_list,
"obj_amt": obj_amt_list,
"house_type" : house_type_list,
"rdealer_lawdnm": rdealer_lawdnm_list
})
df
π JSON ννλ₯Ό Pandas DataFrame μΌλ‘ λ³ν (μΆμ² β β β )
service_key = 'μΈμ¦λ°μ μΈμ¦ ν€'
url = f'http://openapi.seoul.go.kr:8088/{service_key}/json/tbLnOpendataRtmsV/1/5/'
print(url)
req = requests.get(url)
content = req.json()
print(content)
## key κ° νμΈ
content.keys()
# dict_keys(['tbLnOpendataRtmsV'])
# key κ° νμ©νμ¬ λ΄μ© νμΈ
content['tbLnOpendataRtmsV']
# rowμ ν΄λΉνλ λ΄μ© νμΈ
content['tbLnOpendataRtmsV']['row']
# pandas dataframe ννλ‘ λ§λ€κΈ°
pd.DataFrame(content['tbLnOpendataRtmsV']['row'])
'곡곡λ°μ΄ν° API' μΉ΄ν κ³ λ¦¬μ λ€λ₯Έ κΈ
Streamlit / BigQuery λ₯Ό νμ©ν λ°°ν¬ (API) (0) | 2023.05.22 |
---|---|
κ΅¬κΈ ν΄λΌμ°λ BigQuery νμ©μ μν μΈν (API) (0) | 2023.05.22 |