import logging
import time
import urllib
from io import StringIO

import pandas as pd
import tushare as ts
from pandas import DataFrame

from stock_sql.sql_env import *
import requests
from datetime import datetime, timedelta
from stock_sql.orm.key_word import KeyWord
from bs4 import BeautifulSoup
import ssl

id = 10002256
key = '5f44dc7f5b641d5be7e2ef5409611db9'

logging.basicConfig(
    filename='src/key_word.log',  # 日志文件名
    level=logging.DEBUG,  # 日志级别
    format='%(asctime)s - %(levelname)s - %(message)s',  # 日志格式
    force=True
)
pro = ts.pro_api('09f391cea7cefe5233e34b755ecf064e62a6c753c355513acd7ee4ac')


def weibo():
    # 发送 GET 请求
    logging.info("request weibo")
    url = f'https://cn.apihz.cn/api/xinwen/weibo.php?id={id}&key={key}'  # 替换为你的目标 URL
    response = requests.get(url)
    add_key = []
    # 检查请求是否成功
    if response.status_code == 200:
        try:
            data = response.json()
            update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            for item in data['data']:
                key_word = KeyWord()
                key_word.src = '微博'
                key_word.type = '综合'
                key_word.title = item['title']
                # key_word.content = ''
                # key_word.url = ''
                key_word.cache_time = data['time2']
                key_word.update_time = update_time
                add_key.append(key_word)
        except Exception as e:
            logging.error(f"返回的数据错误：{e}")
    else:
        logging.error(f"请求失败，状态码: {response.status_code}")
    return add_key


def baidu():
    logging.info("request baidu")
    # 发送 GET 请求
    # url = f'https://cn.apihz.cn/api/xinwen/weibo.php?id={id}&key={key}'  # 替换为你的目标 URL
    url = f"https://cn.apihz.cn/api/xinwen/baidu.php?id={id}&key={key}"
    response = requests.get(url)
    add_key = []
    # 检查请求是否成功
    if response.status_code == 200:
        try:
            data = response.json()
            update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            for item in data['data']:
                key_word = KeyWord()
                key_word.src = '百度'
                key_word.type = '综合'
                key_word.title = item['word']
                key_word.content = item['desc']
                # key_word.url = ''
                key_word.cache_time = data['time2']
                key_word.update_time = update_time
                add_key.append(key_word)

        except Exception as e:
            logging.error(f"返回的数据错误：{e}")
    else:
        logging.error(f"请求失败，状态码: {response.status_code}")
    return add_key


def weibo_hot_raise():
    logging.info("request weibo_hot_raise")
    # 发送 GET 请求
    url = f'https://cn.apihz.cn/api/xinwen/weibo2.php?id={id}&key={key}'  # 替换为你的目标 URL
    response = requests.get(url)
    add_key = []
    # 检查请求是否成功
    if response.status_code == 200:
        try:
            data = response.json()
            update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            for item in data['data']:
                key_word = KeyWord()
                key_word.src = '微博'
                key_word.type = '实时上升'
                key_word.title = item['title']
                # key_word.content = ''
                # key_word.url = ''
                key_word.cache_time = data['time2']
                key_word.update_time = update_time
                add_key.append(key_word)

        except Exception as e:
            logging.error(f"返回的数据错误：{e}")
    else:
        logging.error(f"请求失败，状态码: {response.status_code}")
    return add_key


def douyin():
    logging.info("request douyin")
    # 发送 GET 请求
    # https://cn.apihz.cn/api/xinwen/douyin.php
    url = f'https://cn.apihz.cn/api/xinwen/douyin.php?id={id}&key={key}'  # 替换为你的目标 URL
    response = requests.get(url)
    add_key = []
    # 检查请求是否成功
    if response.status_code == 200:
        try:
            data = response.json()
            update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            for item in data['data']:
                key_word = KeyWord()
                key_word.src = '抖音'
                key_word.type = '综合'
                key_word.title = item['title']
                # key_word.content = ''
                # key_word.url = ''
                key_word.cache_time = data['time2']
                key_word.update_time = update_time
                add_key.append(key_word)

        except Exception as e:
            logging.error(f"返回的数据错误：{e}")
    else:
        logging.error(f"请求失败，状态码: {response.status_code}")
    return add_key


def toutiao():
    logging.info("request toutiao")
    # 发送 GET 请求
    # https://cn.apihz.cn/api/xinwen/douyin.php
    url = f'https://cn.apihz.cn/api/xinwen/toutiao.php?id={id}&key={key}'  # 替换为你的目标 URL
    response = requests.get(url)
    # 检查请求是否成功
    add_key = []
    if response.status_code == 200:
        try:
            data = response.json()
            update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            for item in data['data']:
                key_word = KeyWord()
                key_word.src = '头条'
                key_word.type = '综合'
                key_word.title = item['title']
                # key_word.content = ''
                # key_word.url = ''
                key_word.cache_time = data['time2']
                key_word.update_time = update_time
                add_key.append(key_word)

        except Exception as e:
            logging.error(f"返回的数据错误：{e}")
    else:
        logging.error(f"请求失败，状态码: {response.status_code}")
    return add_key


def bilibili():
    logging.info("request bilibili")
    url = f'https://cn.apihz.cn/api/bang/bilibili1.php?id={id}&key={key}'  # 替换为你的目标 URL
    response = requests.get(url)
    # 检查请求是否成功
    add_key = []
    if response.status_code == 200:
        try:
            data = response.json()
            update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            for item in data['data']:
                key_word = KeyWord()
                key_word.src = 'bilibili'
                key_word.type = '综合'
                key_word.title = item['title']
                key_word.content = item['desc']
                # key_word.url = ''
                key_word.cache_time = data['time2']
                key_word.update_time = update_time
                add_key.append(key_word)

        except Exception as e:
            logging.error(f"返回的数据错误：{e}")
    else:
        logging.error(f"请求失败，状态码: {response.status_code}")
    return add_key


def zc():
    add_key = []
    update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    for i in range(3):
        try:
            context = ssl._create_unverified_context()
            url_page = f"https://www.gov.cn/yaowen/liebiao/home_{i}.htm"
            page = urllib.request.urlopen(url_page, context=context)
            soup = BeautifulSoup(page, 'html.parser')
            h4_list = soup.find(attrs={'class': 'list_1'}).findAll('h4')
            for h4 in h4_list:
                text = h4.a.text
                href = h4.a["href"]
                date = h4.span.text
                url = href.replace("./", "https://www.gov.cn/yaowen/liebiao/")
                key_word = KeyWord()
                key_word.src = 'zc'
                key_word.type = '政策要闻'
                key_word.title = text
                key_word.url = url
                key_word.cache_time = date
                key_word.update_time = update_time
                add_key.append(key_word)
            time.sleep(8)
        except Exception as e:
            logging.error(f"web scratch error page：{i} content:{e}")
    return add_key


def etf():
    add_key = []
    for i in [[513100, "国泰纳斯达克100"], [513390, "博时纳斯达克100"], [513300, "华夏纳斯达克100ETF"],
              [159660, "汇添富纳斯达克100"]]:
        try:
            url_page = f"https://www.haoetf.com/qdii/{i[0]}"
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

            page = requests.get(url_page, headers=headers)
            df_list = pd.read_html(StringIO(page.text))  # this parses all the tables in webpages to a list
            df = df_list[3]
            stock_list = []
            for index, row in df.iterrows():
                if index > 10:
                    break
                stock_etf = StockETF()
                stock_etf.etf_code = i[0]
                stock_etf.etf_name = i[1]
                stock_etf.etf_date = row['日期']
                stock_etf.close = row['收盘价']
                stock_etf.t_1_value = row['T-1日净值']
                stock_etf.t_1_nav = row['T-1日估值']
                stock_etf.nav_error = row['估值误差'].replace("%", "")
                stock_etf.t_1_premium = row['T-1日溢价率'].replace("%", "")
                stock_etf.count_number = row['份额(万份)']
                stock_etf.t_1_index_incre = row['T-1日指数涨跌'].replace("%", "")
                if stock_session.query(StockETF).filter(StockETF.etf_date == stock_etf.etf_date).filter(
                        StockETF.etf_code == stock_etf.etf_code).count() == 0:
                    stock_list.append(stock_etf)
            stock_session.add_all(stock_list)
            time.sleep(2)
        except Exception as e:
            stock_session.rollback()
            logging.error(f"web scratch error page：{i} content:{e}")
        stock_session.commit()
    return add_key


def exchange_rate():
    data = pd.read_csv("src/rate.csv")
    data["pct"] = data["pct"].str.replace('%', '')
    res = data.to_sql('exchange_rate', stock_db, index=False, if_exists='append', chunksize=5000)
    print(res)


def instert_data_2_db(add_list: list[KeyWord]):
    for item in add_list:
        # count = stock_session.query(KeyWord).filter(KeyWord.src == item.src).filter(KeyWord.type == item.type).filter(
        #     KeyWord.cache_time == item.cache_time).filter(
        #     KeyWord.update_time != item.update_time).count()
        count = stock_session.query(KeyWord).filter(KeyWord.src == item.src).filter(KeyWord.type == item.type).filter(
            KeyWord.title == item.title).count()
        if count != 0:
            continue
        stock_session.add(item)
    stock_session.commit()


def wallstreetcn():
    # 定义开始日期和结束日期
    start_date = datetime(2025, 1, 1)
    end_date = datetime(2025, 1, 2)
    # 使用 timedelta 遍历每一天
    current_date = start_date
    while current_date <= end_date:
        time.sleep(3)
        current_date += timedelta(days=1)  # 增加一天
        add_list = []
        content_set = []
        key_word_list = []
        up = current_date.strftime('%Y-%m-%d 00:00:00')
        down = current_date.strftime('%Y-%m-%d 23:59:59')
        df_top: DataFrame = pro.news(src='wallstreetcn', start_date=up, end_date=down)
        for index, row in df_top.iterrows():
            a_time = row["datetime"]
            content = row["content"]
            title = row["title"]
            if content in content_set:
                continue
            else:
                content_set.append(content)
                add_list.append([a_time, content, title])
                key_word = KeyWord()
                key_word.src = 'tushare'
                key_word.type = '华尔街'
                key_word.title = title
                key_word.content = content
                key_word_list.append(key_word)

        for item in key_word_list:
            # count = stock_session.query(KeyWord).filter(KeyWord.src == item.src).filter(KeyWord.type == item.type).filter(
            #     KeyWord.cache_time == item.cache_time).filter(
            #     KeyWord.update_time != item.update_time).count()
            count = stock_session.query(KeyWord).filter(KeyWord.src == item.src).filter(
                KeyWord.type == item.type).filter(
                KeyWord.cache_time == item.cache_time).count()
            if count != 0:
                continue
            stock_session.add(item)
        stock_session.commit()

    return key_word_list


if __name__ == '__main__':
    try:
        
        # exchange_rate()
        add_list = []
        add_list.extend(weibo())
        #add_list.extend(weibo_hot_raise())
        add_list.extend(baidu())
        add_list.extend(douyin())
        add_list.extend(toutiao())
        add_list.extend(bilibili())
        add_list.extend(zc())
        instert_data_2_db(add_list)
        etf()


    except Exception as e:
        print(f"exec error: {e}")
        logging.error(f"exec error: {e}")
