Coding/Python

[python] ์œ ํŠœ๋ธŒ ๋Œ“๊ธ€ ํฌ๋กค๋ง(api ์‚ฌ์šฉ)

๊น€์œ ๋‹ˆ์ฝ˜ 2022. 4. 6. 21:22

์œ ํŠœ๋ธŒ api ๊ฐ€์„œ ํ‚ค๊ฐ’์„ ๋ฐœ๊ธ‰๋ฐ›์€ ํ›„, 

 

 

์•„๋ž˜์™€ ๊ฐ™์ด ์„ค์น˜ํ•ด์ฃผ๊ณ , 

pip install google-api-python-client
import re
import datetime
from tqdm import tqdm

 

tvNdrama ์ฑ„๋„์˜ ์žฌ์ƒ ๋ฆฌ์ŠคํŠธ๋ฅผ ํ™•์ธํ•˜๊ณ , 

ํŠน์ • ์žฌ์ƒ๋ฆฌ์ŠคํŠธ์˜ ๋Œ“๊ธ€์„ ๋ชจ๋‘ ๊ฐ€์ ธ์˜ค๊ณ  ์‹ถ์„๋•Œ

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# from oauth2client.tools import argparser

DEVELOPER_KEY = '     ' # ๋‚ด ์œ ํŠœ๋ธŒ API ํ‚ค 
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

search_response = youtube.search().list(
    q = 'tvN drama', #์ฑ„๋„๋ช… 
    order = 'relevance',
    part = 'snippet',
    maxResults = 50
    ).execute()
# print(search_response)

channel_id = search_response['items'][0]['snippet']['channelId']
# print(channel_id)

playlists = youtube.playlists().list(
    channelId = channel_id,
    part = 'snippet',
    maxResults = 50
    ).execute()
#์ฑ„๋„์—์„œ ์šด์˜ํ•˜๋Š” ํ”Œ๋ ˆ์ด๋ฆฌ์ŠคํŠธ 

import pandas as pd

ids = []
titles = []

for i in playlists['items']:
    ids.append(i['id'])
    titles.append(i['snippet']['title'])
    
    
    
df = pd.DataFrame([ids, titles]).T
df.columns = ['PlayLists', 'Titles']
for a in tqdm(playlists['items']):
    ids.append(a['id'])
    titles.append(a['snippet']['title'])

    drama = a['id']
    playlist_videos = youtube.playlistItems().list(
        playlistId = drama, 
        part = 'snippet',
        maxResults = 100)

    playlistitems_list_response = playlist_videos.execute()

    video_names = []
    video_ids = []
    date = []

    # print(playlistitems_list_response)

    for v in playlistitems_list_response['items']:
        video_names.append(v['snippet']['title'])
        video_ids.append(v['snippet']['resourceId']['videoId'])
        date.append(v['snippet']['publishedAt'])


#     npt = playlistitems_list_response['nextPageToken']
    while 'nextPageToken' in playlistitems_list_response.keys():
        playlist_videos = youtube.playlistItems().list(
        playlistId = drama, 
        part = 'snippet',
        maxResults = 100,
        pageToken = playlistitems_list_response['nextPageToken']
        )

        playlistitems_list_response = playlist_videos.execute()

        for v in playlistitems_list_response['items']:
            video_names.append(v['snippet']['title'])
            video_ids.append(v['snippet']['resourceId']['videoId'])
            date.append(v['snippet']['publishedAt'])

        if 'nextPageToken' in playlistitems_list_response.keys():
            npt = playlistitems_list_response['nextPageToken']
        else:
            break

    vdf = pd.DataFrame([date, video_names, video_ids]).T
    vdf.columns = ['Date', 'Title', 'Ids']


    ### ์ฝ”๋ฉ˜ํŠธ ####


    c_vid = []
    c_reviews=[]
    c_likes = []
    c_date = []
    c_id = []

    # print(len(video_id))

    try:
        for v in video_id:
            cm = youtube.commentThreads().list(
                 videoId = v,
                 order = "relevance",
                 part = "snippet",
                 maxResults = 10000
                 ).execute()

            # print(cm)
            for i in cm['items']:
                c_vid.append(i['snippet']['videoId'])
                c_date.append(i['snippet']['topLevelComment']['snippet']['publishedAt'])    
                c_id.append(i['snippet']['topLevelComment']['snippet']['authorDisplayName'])
                c_reviews.append(i['snippet']['topLevelComment']['snippet']['textOriginal'])
                c_likes.append(i['snippet']['topLevelComment']['snippet']['likeCount'])

            #     print(i)

            while 'nextPageToken' in cm.keys():
                cm = youtube.commentThreads().list(
                videoId = v,
                order = "relevance",
                part = "snippet",
                maxResults = 10000,
                pageToken = cm['nextPageToken']
                ).execute()

                for i in cm['items']:
                    c_vid.append(i['snippet']['videoId'])
                    c_date.append(i['snippet']['topLevelComment']['snippet']['publishedAt'])    
                    c_id.append(i['snippet']['topLevelComment']['snippet']['authorDisplayName'])
                    c_reviews.append(i['snippet']['topLevelComment']['snippet']['textOriginal'])
                    c_likes.append(i['snippet']['topLevelComment']['snippet']['likeCount'])

                if 'nextPageToken' in cm.keys():
                    npt=cm['nextPageToken']
                else:
                    break
    except:
        pass
    c_df = pd.DataFrame([c_vid,c_date, c_id, c_reviews, c_likes]).T
    c_df.columns=['Ids', 'comment date', 'user id','review', 'likes']


    ####ํ•ฉ์น˜๊ธฐ#####
    df = pd.merge(vdf, c_df, on = 'Ids')


    csv_name = 'tvNdrama_'+ a['snippet']['title'] + datetime.datetime.now().strftime('%Y%m%d') +'.csv'
    df.to_csv("./Excel_Data/"+csv_name, mode = 'w', index=False)
    ## ๋น„๋””์˜ค ๋ฆฌ์ŠคํŠธ ์ •๋ณด(์˜์ƒ ๋ฆฌ์ŠคํŠธ, ์ข‹์•„์š” ๊ฐฏ์ˆ˜ ๋“ฑ) ###

    video_names = []
    date = []
    video_id = []
    category_id=[]
    views=[]
    likes=[]
    comments=[]
    mins =[]
    seconds=[]
    v_title=[]

    for u in range(len(vdf)):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics", 
            id = vdf['Ids'][u]
        )
        response = request.execute()
    #     print(response['items'])

        if response['items']==[]:
            date.append('-')
            v_title.append('-')
            video_id.append('-')
            category_id.append("-")
            views.append("-")
            likes.append("-")
            comments.append("-")
        else:
            date.append(response['items'][0]['snippet']['publishedAt'])
            v_title.append(response['items'][0]['snippet']['title'])
            video_id.append(response['items'][0]['id'])
            category_id.append(response['items'][0]['snippet']['categoryId'])
            views.append(response['items'][0]['statistics']['viewCount'])
            likes.append(response['items'][0]['statistics']['likeCount'])
            comments.append(response['items'][0]['statistics']['commentCount'])


    drama_df = pd.DataFrame([date, v_title,video_id,category_id,views,likes,comments]).T
    drama_df.columns=['date','title','video_id','category_id','views','likes','comments']