๋ณธ๋ฌธ ๋ฐ”๋กœ๊ฐ€๊ธฐ

๊ณต๊ณต๋ฐ์ดํ„ฐ API

Streamlit / BigQuery ๋ฅผ ํ™œ์šฉํ•œ ๋ฐฐํฌ (API)

๋ฐ˜์‘ํ˜•

๐ŸŽˆ ์‹ค์Šต ์ˆœ์„œ

๐Ÿ‘‰ ์„œ์šธ์‹œ ๋ถ€๋™์‚ฐ ์‹ค๊ฑฐ๋ž˜๊ฐ€ API ํฌ๋กค๋ง์œผ๋กœ ๊ฐ€์ ธ์˜ค๊ธฐ

๐Ÿ‘‰ JSON ํ˜•ํƒœ์˜ ๋ฐ์ดํ„ฐ๋ฅผ pandas ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜ํ•˜๊ธฐ

๐Ÿ‘‰ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์„ BigQuery์— ์ „์ฒด ๋ฐ์ดํ„ฐ๋ฅผ ์ €์žฅํ•˜๊ธฐ

๐Ÿ‘‰ ์ €์žฅ๋œ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์„ BigQuery์—์„œ ์ผ๋ถ€ ์ปฌ๋Ÿผ๋งŒ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ

 

๐ŸŽˆ ์‹ค์Šต 1 - API ํฌ๋กค๋ง์—์„œ BigQuery๋กœ ๋ฐ์ดํ„ฐ ์ €์žฅ

๐ŸŽฒ  .streamlit/secrets.toml  ์„ ์—ด๊ณ  ์•„๋ž˜์™€ ๊ฐ™์ด ์„ค์ •ํ•˜๊ธฐ

๐ŸŽฒ  seoul_api_key  : ์„œ์šธ ์—ด๋ฆฐ๋ฐ์ดํ„ฐ ๊ด‘์žฅ  api key  ๋ฅผ ์˜๋ฏธ

๐ŸŽฒ  gcp_service_account  ์•„๋ž˜ ๋‚ด์šฉ์€  api key  ๋ฅผ  json  ํŒŒ์ผ๋กœ ์—ด๋ฉด ํ™•์ธ ๊ฐ€๋Šฅ

# .streamlit/secrets.toml
[public_data_api]
seoul_api_key = 'your_api_key'

[gcp_service_account]
type = "service_account"
project_id = "your_project_id"
private_key_id = "your_private_key_id"
private_key = "your_private_key"
client_email = "your_client_email"
client_id = "your_client_id"
auth_uri = "https://accounts.google.com/o/oauth2/auth"
token_uri = "https://oauth2.googleapis.com/token"
auth_provider_x509_cert_url = "https://www.googleapis.com/oauth2/v1/certs"
client_x509_cert_url = "your_client_x509_cert_url"

๐ŸŽฒ  utils.py  ๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ์•„๋ž˜์™€ ๊ฐ™์ด ์„ค์ •์„ ์ €์žฅํ•˜๊ธฐ

import streamlit as st
from google.oauth2 import service_account

SERVICE_KEY = st.secrets.public_data_api.seoul_api_key

# Create API client.
credentials = service_account.Credentials.from_service_account_info(
    # Very Important Point
    st.secrets["gcp_service_account"]
)

๐ŸŽฒ  aptCrewling.py  ๋ฅผ ์•„๋ž˜์™€ ๊ฐ™์ด ์ž‘์„ฑํ•˜๊ธฐ

# -*- coding:utf-8 -*-
import requests
import pandas as pd

# Google Cloud
from google.cloud import bigquery
import pandas_gbq

# API Key Settings
from utils import credentials, SERVICE_KEY
client = bigquery.Client(credentials=credentials)

def aptCrawling(SERVICE_KEY):
    data = None
    for j in range(1,2):
        url = f'http://openapi.seoul.go.kr:8088/{SERVICE_KEY}/json/tbLnOpendataRtmsV/{1+((j-1)*1000)}/{j*1000}'
        print(url)
        req = requests.get(url)
        content = req.json()
        con = content['tbLnOpendataRtmsV']['row']
        result = pd.DataFrame(con)
        data = pd.concat([data, result])
    data = data.reset_index(drop=True)
    data['DEAL_YMD'] = pd.to_datetime(data['DEAL_YMD'], format=("%Y%m%d"))

    return data

def save2BQ(data):
    table_name = "seoul.realestate"
    project_id = "project_id ์ž…๋ ฅํ•˜๊ธฐ"

    # Save the DataFrame to BigQuery
    pandas_gbq.to_gbq(data,
                      table_name,
                      project_id=project_id, if_exists='replace')

if __name__ == "__main__":
    data = aptCrawling(SERVICE_KEY)
    save2BQ(data)

๐ŸŽฒ ์•„๋ž˜์™€ ๊ฐ™์ด ์‹คํ–‰ํ•ด๋ณด๊ธฐ

๐ŸŽฒ BigQuery ์ฝ˜์†”์—์„œ ํ™•์ธํ•ด๋ณด๊ธฐ

ํ…Œ์ด๋ธ”์ด ์ ์šฉ๋œ ๊ฒƒ์„ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Œ

 

๐ŸŽˆ ์‹ค์Šต 2 - Bigquery์—์„œ ๋ฐ์ดํ„ฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ

๐ŸŽฒ  app.py  ๋ฅผ ์ •์˜ํ•˜๊ธฐ

# streamlit_app.py

import streamlit as st
from google.cloud import bigquery
import seaborn as sns
import pandas as pd
import pandas_gbq

from utils import credentials, SERVICE_KEY
client = bigquery.Client(credentials=credentials)

# Perform query.
# Uses st.experimental_memo to only rerun when the query changes or after 10 min.
@st.cache_data(ttl=600)
def run_query(cols, name):
    st.write("Load DataFrame")
    sql = f"SELECT {cols} FROM project_id ์ž…๋ ฅํ•˜๋Š” ๋ถ€๋ถ„.seoul.{name}"
    df = client.query(sql).to_dataframe()

    st.dataframe(df)

def main():
    tableNames = st.selectbox("ํ…Œ์ด๋ธ” ์„ ํƒ", ("realestate", "iris"))
    if tableNames == "iris":
        run_query(cols="*", name="iris")
    else:
        sql = """
        SELECT STRING_AGG(column_name)
        FROM `project_id ์ž…๋ ฅํ•˜๋Š” ๋ถ€๋ถ„.seoul.INFORMATION_SCHEMA.COLUMNS`
        where table_name = 'realestate'
        group by table_name
        """

        df = client.query(sql).to_dataframe()
        all_cols = df.values[0][0].split(",")
        columns = st.multiselect("์ปฌ๋Ÿผ๋ช… ์„ ํƒ", all_cols, default=all_cols)
        temp_Strings = ", ".join(columns)
        run_query(temp_Strings, tableNames)

if __name__ == "__main__":
    main()

๐ŸŽฒ ์•„๋ž˜์™€ ๊ฐ™์ด  streamlit  ์„ ์‹คํ–‰ํ•ด๋ณด๊ธฐ

๐ŸŽฒ  streamlit  ์•ฑ ํ™•์ธํ•˜๊ธฐ

        ๐Ÿ‘‰ ํ…Œ์ด๋ธ” / ์ปฌ๋Ÿผ๋ช… ์„ ํƒ์— ๋”ฐ๋ผ ์ถ”์ถœ๋œ ํ…Œ์ด๋ธ”์ด ๋‹ฌ๋ผ์ง€๋„๋ก ์„ค์ •ํ•จ

๋ฐ˜์‘ํ˜•