๐ ์ค์ต ์์
๐ ์์ธ์ ๋ถ๋์ฐ ์ค๊ฑฐ๋๊ฐ API ํฌ๋กค๋ง์ผ๋ก ๊ฐ์ ธ์ค๊ธฐ
๐ JSON ํํ์ ๋ฐ์ดํฐ๋ฅผ pandas ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํํ๊ธฐ
๐ ๋ฐ์ดํฐํ๋ ์์ BigQuery์ ์ ์ฒด ๋ฐ์ดํฐ๋ฅผ ์ ์ฅํ๊ธฐ
๐ ์ ์ฅ๋ ๋ฐ์ดํฐํ๋ ์์ BigQuery์์ ์ผ๋ถ ์ปฌ๋ผ๋ง ๋ถ๋ฌ์ค๊ธฐ
๐ ์ค์ต 1 - API ํฌ๋กค๋ง์์ BigQuery๋ก ๋ฐ์ดํฐ ์ ์ฅ
๐ฒ .streamlit/secrets.toml ์ ์ด๊ณ ์๋์ ๊ฐ์ด ์ค์ ํ๊ธฐ
๐ฒ seoul_api_key : ์์ธ ์ด๋ฆฐ๋ฐ์ดํฐ ๊ด์ฅ api key ๋ฅผ ์๋ฏธ
๐ฒ gcp_service_account ์๋ ๋ด์ฉ์ api key ๋ฅผ json ํ์ผ๋ก ์ด๋ฉด ํ์ธ ๊ฐ๋ฅ
# .streamlit/secrets.toml
[public_data_api]
seoul_api_key = 'your_api_key'
[gcp_service_account]
type = "service_account"
project_id = "your_project_id"
private_key_id = "your_private_key_id"
private_key = "your_private_key"
client_email = "your_client_email"
client_id = "your_client_id"
auth_uri = "https://accounts.google.com/o/oauth2/auth"
token_uri = "https://oauth2.googleapis.com/token"
auth_provider_x509_cert_url = "https://www.googleapis.com/oauth2/v1/certs"
client_x509_cert_url = "your_client_x509_cert_url"
๐ฒ utils.py ๋ฅผ ์์ฑํ๊ณ ์๋์ ๊ฐ์ด ์ค์ ์ ์ ์ฅํ๊ธฐ
import streamlit as st
from google.oauth2 import service_account
SERVICE_KEY = st.secrets.public_data_api.seoul_api_key
# Create API client.
credentials = service_account.Credentials.from_service_account_info(
# Very Important Point
st.secrets["gcp_service_account"]
)
๐ฒ aptCrewling.py ๋ฅผ ์๋์ ๊ฐ์ด ์์ฑํ๊ธฐ
# -*- coding:utf-8 -*-
import requests
import pandas as pd
# Google Cloud
from google.cloud import bigquery
import pandas_gbq
# API Key Settings
from utils import credentials, SERVICE_KEY
client = bigquery.Client(credentials=credentials)
def aptCrawling(SERVICE_KEY):
data = None
for j in range(1,2):
url = f'http://openapi.seoul.go.kr:8088/{SERVICE_KEY}/json/tbLnOpendataRtmsV/{1+((j-1)*1000)}/{j*1000}'
print(url)
req = requests.get(url)
content = req.json()
con = content['tbLnOpendataRtmsV']['row']
result = pd.DataFrame(con)
data = pd.concat([data, result])
data = data.reset_index(drop=True)
data['DEAL_YMD'] = pd.to_datetime(data['DEAL_YMD'], format=("%Y%m%d"))
return data
def save2BQ(data):
table_name = "seoul.realestate"
project_id = "project_id ์
๋ ฅํ๊ธฐ"
# Save the DataFrame to BigQuery
pandas_gbq.to_gbq(data,
table_name,
project_id=project_id, if_exists='replace')
if __name__ == "__main__":
data = aptCrawling(SERVICE_KEY)
save2BQ(data)
๐ฒ ์๋์ ๊ฐ์ด ์คํํด๋ณด๊ธฐ
๐ฒ BigQuery ์ฝ์์์ ํ์ธํด๋ณด๊ธฐ
๐ ์ค์ต 2 - Bigquery์์ ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ
๐ฒ app.py ๋ฅผ ์ ์ํ๊ธฐ
# streamlit_app.py
import streamlit as st
from google.cloud import bigquery
import seaborn as sns
import pandas as pd
import pandas_gbq
from utils import credentials, SERVICE_KEY
client = bigquery.Client(credentials=credentials)
# Perform query.
# Uses st.experimental_memo to only rerun when the query changes or after 10 min.
@st.cache_data(ttl=600)
def run_query(cols, name):
st.write("Load DataFrame")
sql = f"SELECT {cols} FROM project_id ์
๋ ฅํ๋ ๋ถ๋ถ.seoul.{name}"
df = client.query(sql).to_dataframe()
st.dataframe(df)
def main():
tableNames = st.selectbox("ํ
์ด๋ธ ์ ํ", ("realestate", "iris"))
if tableNames == "iris":
run_query(cols="*", name="iris")
else:
sql = """
SELECT STRING_AGG(column_name)
FROM `project_id ์
๋ ฅํ๋ ๋ถ๋ถ.seoul.INFORMATION_SCHEMA.COLUMNS`
where table_name = 'realestate'
group by table_name
"""
df = client.query(sql).to_dataframe()
all_cols = df.values[0][0].split(",")
columns = st.multiselect("์ปฌ๋ผ๋ช
์ ํ", all_cols, default=all_cols)
temp_Strings = ", ".join(columns)
run_query(temp_Strings, tableNames)
if __name__ == "__main__":
main()
๐ฒ ์๋์ ๊ฐ์ด streamlit ์ ์คํํด๋ณด๊ธฐ
๐ฒ streamlit ์ฑ ํ์ธํ๊ธฐ
๐ ํ ์ด๋ธ / ์ปฌ๋ผ๋ช ์ ํ์ ๋ฐ๋ผ ์ถ์ถ๋ ํ ์ด๋ธ์ด ๋ฌ๋ผ์ง๋๋ก ์ค์ ํจ
'๊ณต๊ณต๋ฐ์ดํฐ API' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
๊ตฌ๊ธ ํด๋ผ์ฐ๋ BigQuery ํ์ฉ์ ์ํ ์ธํ (API) (0) | 2023.05.22 |
---|---|
์์ธ์ ๋ถ๋์ฐ ์ ์์ธ๊ฐ ๋ฐ์ดํฐ ์์ง (๊ณต๊ณต๋ฐ์ดํฐ API ํ์ฉ) (0) | 2023.05.18 |