from xbot import print, sleep
import re
import requests
import pymysql
from lxml import etree
class GetMovieData:
def __init__(self):
self.conn = pymysql.connect(
host='43.143.30.32',
port=3306,
user='yingdao',
passwd='9527',
db='ydtest'
)
self.cursor = self.conn.cursor()
def save_to_db(self, detail):
sql = "INSERT INTO movie(电影名称, 上映年份, 制片地区, 评分, 导演, 票房, 提交人) VALUES (%s,%s,%s,%s,%s,%s,%s)"
try:
self.cursor.executemany(sql, detail)
except Exception as e:
self.conn.rollback()
print('数据插入异常:', e)
else:
self.conn.commit()
print('数据写入成功')
@staticmethod
def get_detail(page_text: str) -> list:
tree = etree.HTML(page_text)
tr_list = tree.xpath('//tbody[@class="row-hover"]/tr')
detail = []
for tr in tr_list:
show_year = '无数据'
country = '无数据'
show_year_list = tr.xpath('./td[@class="column-1"]/text()')
if show_year_list:
show_year_initial = show_year_list[0]
show_year = int(re.findall(r'\d+', show_year_initial)[0])
country = re.findall(r'\W+', show_year_initial)[0]
name = '无数据'
ratings = '无数据'
name_list = tr.xpath('./td[@class="column-2"]/text()')
if name_list:
name_initial = name_list[0].replace(')', '').split('(')
name = name_initial[0]
ratings = name_initial[-1]
director_list = tr.xpath('./td[@class="column-3"]/text()')
director = director_list[0] if director_list else '无数据'
box_office_list = tr.xpath('./td[@class="column-4"]//text()')
box_office = int(re.findall(r'\d+', box_office_list[0])[0]) if box_office_list else -1
detail.append((name, show_year, country, ratings, director, box_office, 'DDillon'))
return detail
@staticmethod
def get_page_text() -> str:
url = 'http://www.boxofficecn.com/the-red-box-office'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
}
for i in range(5):
response = requests.get(url=url, headers=headers)
if response.status_code == 200:
return response.text
sleep(5)
return 'ERROR'
def my_main(self):
# 获取页面代码
page_text = self.get_page_text()
if page_text != 'ERROR':
# 解析数据
detail = self.get_detail(page_text=page_text)
# 将数据写入数据库
self.save_to_db(detail=detail)
# sql = 'show columns from movie'
# self.cursor.execute(sql)
# print(self.cursor.fetchall())
self.cursor.close()
self.conn.close()
def main(args):
get_movie_data = GetMovieData()
get_movie_data.my_main()