์ ํ๋ธ api ๊ฐ์ ํค๊ฐ์ ๋ฐ๊ธ๋ฐ์ ํ,
์๋์ ๊ฐ์ด ์ค์นํด์ฃผ๊ณ ,
pip install google-api-python-client
import re
import datetime
from tqdm import tqdm
tvNdrama ์ฑ๋์ ์ฌ์ ๋ฆฌ์คํธ๋ฅผ ํ์ธํ๊ณ ,
ํน์ ์ฌ์๋ฆฌ์คํธ์ ๋๊ธ์ ๋ชจ๋ ๊ฐ์ ธ์ค๊ณ ์ถ์๋
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# from oauth2client.tools import argparser
DEVELOPER_KEY = ' ' # ๋ด ์ ํ๋ธ API ํค
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
search_response = youtube.search().list(
q = 'tvN drama', #์ฑ๋๋ช
order = 'relevance',
part = 'snippet',
maxResults = 50
).execute()
# print(search_response)
channel_id = search_response['items'][0]['snippet']['channelId']
# print(channel_id)
playlists = youtube.playlists().list(
channelId = channel_id,
part = 'snippet',
maxResults = 50
).execute()
#์ฑ๋์์ ์ด์ํ๋ ํ๋ ์ด๋ฆฌ์คํธ
import pandas as pd
ids = []
titles = []
for i in playlists['items']:
ids.append(i['id'])
titles.append(i['snippet']['title'])
df = pd.DataFrame([ids, titles]).T
df.columns = ['PlayLists', 'Titles']
for a in tqdm(playlists['items']):
ids.append(a['id'])
titles.append(a['snippet']['title'])
drama = a['id']
playlist_videos = youtube.playlistItems().list(
playlistId = drama,
part = 'snippet',
maxResults = 100)
playlistitems_list_response = playlist_videos.execute()
video_names = []
video_ids = []
date = []
# print(playlistitems_list_response)
for v in playlistitems_list_response['items']:
video_names.append(v['snippet']['title'])
video_ids.append(v['snippet']['resourceId']['videoId'])
date.append(v['snippet']['publishedAt'])
# npt = playlistitems_list_response['nextPageToken']
while 'nextPageToken' in playlistitems_list_response.keys():
playlist_videos = youtube.playlistItems().list(
playlistId = drama,
part = 'snippet',
maxResults = 100,
pageToken = playlistitems_list_response['nextPageToken']
)
playlistitems_list_response = playlist_videos.execute()
for v in playlistitems_list_response['items']:
video_names.append(v['snippet']['title'])
video_ids.append(v['snippet']['resourceId']['videoId'])
date.append(v['snippet']['publishedAt'])
if 'nextPageToken' in playlistitems_list_response.keys():
npt = playlistitems_list_response['nextPageToken']
else:
break
vdf = pd.DataFrame([date, video_names, video_ids]).T
vdf.columns = ['Date', 'Title', 'Ids']
### ์ฝ๋ฉํธ ####
c_vid = []
c_reviews=[]
c_likes = []
c_date = []
c_id = []
# print(len(video_id))
try:
for v in video_id:
cm = youtube.commentThreads().list(
videoId = v,
order = "relevance",
part = "snippet",
maxResults = 10000
).execute()
# print(cm)
for i in cm['items']:
c_vid.append(i['snippet']['videoId'])
c_date.append(i['snippet']['topLevelComment']['snippet']['publishedAt'])
c_id.append(i['snippet']['topLevelComment']['snippet']['authorDisplayName'])
c_reviews.append(i['snippet']['topLevelComment']['snippet']['textOriginal'])
c_likes.append(i['snippet']['topLevelComment']['snippet']['likeCount'])
# print(i)
while 'nextPageToken' in cm.keys():
cm = youtube.commentThreads().list(
videoId = v,
order = "relevance",
part = "snippet",
maxResults = 10000,
pageToken = cm['nextPageToken']
).execute()
for i in cm['items']:
c_vid.append(i['snippet']['videoId'])
c_date.append(i['snippet']['topLevelComment']['snippet']['publishedAt'])
c_id.append(i['snippet']['topLevelComment']['snippet']['authorDisplayName'])
c_reviews.append(i['snippet']['topLevelComment']['snippet']['textOriginal'])
c_likes.append(i['snippet']['topLevelComment']['snippet']['likeCount'])
if 'nextPageToken' in cm.keys():
npt=cm['nextPageToken']
else:
break
except:
pass
c_df = pd.DataFrame([c_vid,c_date, c_id, c_reviews, c_likes]).T
c_df.columns=['Ids', 'comment date', 'user id','review', 'likes']
####ํฉ์น๊ธฐ#####
df = pd.merge(vdf, c_df, on = 'Ids')
csv_name = 'tvNdrama_'+ a['snippet']['title'] + datetime.datetime.now().strftime('%Y%m%d') +'.csv'
df.to_csv("./Excel_Data/"+csv_name, mode = 'w', index=False)
## ๋น๋์ค ๋ฆฌ์คํธ ์ ๋ณด(์์ ๋ฆฌ์คํธ, ์ข์์ ๊ฐฏ์ ๋ฑ) ###
video_names = []
date = []
video_id = []
category_id=[]
views=[]
likes=[]
comments=[]
mins =[]
seconds=[]
v_title=[]
for u in range(len(vdf)):
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
id = vdf['Ids'][u]
)
response = request.execute()
# print(response['items'])
if response['items']==[]:
date.append('-')
v_title.append('-')
video_id.append('-')
category_id.append("-")
views.append("-")
likes.append("-")
comments.append("-")
else:
date.append(response['items'][0]['snippet']['publishedAt'])
v_title.append(response['items'][0]['snippet']['title'])
video_id.append(response['items'][0]['id'])
category_id.append(response['items'][0]['snippet']['categoryId'])
views.append(response['items'][0]['statistics']['viewCount'])
likes.append(response['items'][0]['statistics']['likeCount'])
comments.append(response['items'][0]['statistics']['commentCount'])
drama_df = pd.DataFrame([date, v_title,video_id,category_id,views,likes,comments]).T
drama_df.columns=['date','title','video_id','category_id','views','likes','comments']
'Coding > Python' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[python] ์ ํ๋ธ ๋๊ธ ํฌ๋กค๋ง(api ์์ด) (0) | 2022.04.06 |
---|---|
[Python] Numpy (0) | 2022.03.18 |
[Python] ๋ฐ์ดํฐ ์๊ฐํ (0) | 2022.03.16 |
[Python] ์๋ฌ : python-3.x Couldn't find Class NSProcessInfo (0) | 2022.03.16 |
[Python] Module (0) | 2022.03.07 |