import openai
import os
import time
import json
import pdb
from tqdm import tqdm
import argparse
import sys
import subprocess

with open('output_flag.txt', "w") as file:
    file.write("0")
    
# sys.stdout = open('output.txt', 'a')

# 创建解析器
parser = argparse.ArgumentParser(description='stock name')

# 添加命令行参数
parser.add_argument('--stock_id', type=str, help='传入要处理的股票id')

# 解析命令行参数
args = parser.parse_args()

# 获取命令行参数的值
stock_id = args.stock_id

full_name_dict = {
    'DUK': 'Duke Energy Corporation',
    'TSLA': 'Tesla, Inc.',
    'MSFT': 'Microsoft Corporation',
    'ABT': 'Abbott Laboratories',
    'WELL': 'Welltower Inc.',
    'AMZN': 'Amazon.com, Inc.',
    'TMO': 'Thermo Fisher Scientific Inc.',
    'MMM': '3M Company',
    'WMT': 'Walmart Inc.',
    'DIS': 'The Walt Disney Company',
    'JPM': 'JPMorgan Chase & Co.',
    'UNP': 'Union Pacific Corporation',
    'PLD': 'Prologis, Inc.',
    'SCHW': 'The Charles Schwab Corporation',
    'ECL': 'Ecolab Inc.',
    'DE': 'Deere & Company',
    'BHP': 'BHP Group Limited',
    'XEL': 'Xcel Energy Inc.',
    'SO': 'The Southern Company',
    'TM': 'Toyota Motor Corporation',
    'ABBV': 'AbbVie Inc.',
    'PEP': 'PepsiCo, Inc.',
    'XOM': 'Exxon Mobil Corporation',
    'TTE': 'TotalEnergies SE',
    'PM': 'Philip Morris International Inc.',
    'UNH': 'UnitedHealth Group Incorporated',
    'V': 'Visa Inc.',
    'SBAC': 'SBA Communications Corporation',
    'SHW': 'The Sherwin-Williams Company',
    'SNP': 'China Petroleum & Chemical Corporation (Sinopec)',
    'NKE': 'NIKE, Inc.',
    'UL': 'Unilever PLC',
    'RTX': 'Raytheon Technologies Corporation',
    'CVX': 'Chevron Corporation',
    'GE': 'General Electric Company',
    'ADBE': 'Adobe Inc.',
    'HON': 'Honeywell International Inc.',
    'ASML': 'ASML Holding N.V.',
    'SRE': 'Sempra Energy',
    'JNJ': 'Johnson & Johnson',
    'JD': 'JD.com, Inc.',
    'PFE': 'Pfizer Inc.',
    'MCD': "McDonald's Corporation",
    'AEP': 'American Electric Power Company, Inc.',
    'EQIX': 'Equinix, Inc.',
    'TGT': 'Target Corporation',
    'RDS-B': 'Royal Dutch Shell plc',
    'PG': 'The Procter & Gamble Company',
    'C': 'Citigroup Inc.',
    'NFLX': 'Netflix, Inc.',
    'EQNR': 'Equinor ASA',
    'VZ': 'Verizon Communications Inc.',
    'COP': 'ConocoPhillips',
    'CAT': 'Caterpillar Inc.',
    'TMUS': 'T-Mobile US, Inc.',
    'AWK': 'American Water Works Company, Inc.',
    'CCI': 'Crown Castle International Corp.',
    'BBL': 'BHP Group plc',
    'EXC': 'Exelon Corporation',
    'WFC': 'Wells Fargo & Company',
    'COST': 'Costco Wholesale Corporation',
    'MS': 'Morgan Stanley',
    'EL': 'The Estée Lauder Companies Inc.',
    'SPG': 'Simon Property Group, Inc.',
    'DLR': 'Digital Realty Trust, Inc.',
    'PYPL': 'PayPal Holdings, Inc.',
    'DEO': 'Diageo plc',
    'PSA': 'Public Storage',
    'NGG': 'National Grid plc',
    'ACN': 'Accenture plc',
    'FB': 'Meta Platforms, Inc. (formerly Facebook, Inc.)',
    'BRK-A': 'Berkshire Hathaway Inc.',
    'NVDA': 'NVIDIA Corporation',
    'O': 'Realty Income Corporation',
    'LOW': "Lowe's Companies, Inc.",
    'CMCSA': 'Comcast Corporation',
    'NEM': 'Newmont Corporation',
    'MA': 'Mastercard Incorporated',
    'GOOG': 'Alphabet Inc. (formerly Google Inc.)',
    'D': 'Dominion Energy, Inc.',
    'BP': 'BP p.l.c.',
    'T': 'AT&T Inc.',
    'BAC': 'Bank of America Corporation',
    'NVO': 'Novo Nordisk A/S',
    'BA': 'The Boeing Company',
    'ORCL': 'Oracle Corporation',
    'AAPL': 'Apple Inc.',
    'ADP': 'Automatic Data Processing, Inc.',
    'TSM': 'Taiwan Semiconductor Manufacturing Company Limited',
    'UPS': 'United Parcel Service, Inc.',
    'VALE': 'Vale S.A.',
    'DHR': 'Danaher Corporation',
    'HD': 'The Home Depot, Inc.',
    'AVGO': 'Broadcom Inc.',
    'FCX': 'Freeport-McMoRan Inc.',
    'KO': 'The Coca-Cola Company',
    'CHTR': 'Charter Communications, Inc.',
    'BABA': 'Alibaba Group Holding Limited',
    'SBUX': 'Starbucks Corporation',
    'NVS': 'Novartis AG',
    'LLY': 'Eli Lilly and Company',
    'CSCO': 'Cisco Systems, Inc.',
    'AMT': 'American Tower Corporation',
    'PTR': 'PetroChina Company Limited',
    'APD': 'Air Products and Chemicals, Inc.',
    'SNAP': 'Snap Inc.',
    'RIO': 'Rio Tinto Group',
    'NEE': 'NextEra Energy, Inc.',
    'ENB': 'Enbridge Inc.'
}


# Set OpenAI API key
# openai.api_key = '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'
# openai.api_key = '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'
# openai.api_key = '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'
openai.api_key = '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'

# Define system prompt and prompt template
sys_prompt = 'As a stock trading news analyst, you are a helpful and precise assistant. Your task is to analyze the correlation between news and the given stock, sentiment polarity of the news, importance of the news, the impact of the news on stock prices, and the duration of the news impact.'
prompt_template = "[Stock Name]\n{stock_name}\n\n[News Content]\n{news_content}\n\n[Publish Time]\n{publish_time}\n\n[System]\n{prompt}\n"
default_prompt = """I need you to analyze the provided stock-related news from four dimensions:
1. Correlation between the news and the given stock: Rate the correlation on a scale of 0 to 10, where a higher score indicates a stronger correlation between the news and the given stock.
2. Sentiment polarity of the news: Rate the sentiment polarity on a scale of -1 to 1, where a value closer to -1 indicates stronger negative sentiment and a value closer to 1 indicates stronger positive sentiment.
3. Importance of the news event: Rate the importance on a scale of 0 to 10, where a higher score indicates higher importance of the news event.
4. Impact of the news on stock prices: Rate the impact on a scale of 0 to 10, where a higher score indicates a greater impact of the news on stock prices.
5. Duration of the news impact: Rate the duration on a scale of 0 to 10, where a higher score indicates a longer potential duration of the news impact.
(When you encounter a situation where analysis is not possible, please try to avoid assigning all-zero scores and instead make an effort to analyze the text content and derive scores accordingly. Only when analysis is truly impossible should you assign a score of 0 to all factors.)
(Please refrain from providing an analysis and simply provide the answer according to the following format.)

Output format:
Correlation: <Correlation score between the news and the stock>
Sentiment: <Sentiment polarity score of the news>
Importance: <Importance score of the news event>
Impact: <Impact score of the news on stock prices>
Duration: <Duration score of the news impact>
"""

def create_folder(folder_path, folder_name):
    new_folder_path = os.path.join(folder_path, folder_name)
    try:
        os.mkdir(new_folder_path)
        print(f"文件夹 '{folder_name}' 已成功创建在文件夹 '{folder_path}' 中。")
    except FileExistsError:
        print(f"文件夹 '{folder_name}' 已存在于文件夹 '{folder_path}' 中。")
    return new_folder_path

def get_directory_names(folder_path):
    items = os.listdir(folder_path)
    directories = [item for item in items if os.path.isdir(os.path.join(folder_path, item))]
    return directories

def get_file_names(folder_path):
    items = os.listdir(folder_path)
    files = [item for item in items if os.path.isfile(os.path.join(folder_path, item))]
    return files

def read_file(file_path):
    lines = []
    try:
        with open(file_path, 'r', encoding='utf8') as file:
            for line in file:
                json_obj = json.loads(line.strip())
                lines.append(json_obj)
    except FileNotFoundError:
        print(f"文件 '{file_path}' 不存在。")
    except Exception as e:
        print(f"读取文件 '{file_path}' 时发生错误：{str(e)}")
    return lines

def open_output_file(outfile, inlines):
    try:
        if os.path.exists(outfile):
            num_lines = len(open(outfile, 'r').readlines())
            inlines = inlines[num_lines:]  # 切片赋值
        else:
            inlines = inlines
        outs = open(outfile, 'a', encoding='utf8')
        return outs, inlines  # 返回 inlines
    except PermissionError:
        print(f"无法操作文件 '{outfile}'。")
        return None, inlines  # 返回 inlines



# def analyze_stock_news(out_folder_path, data_path, stock_name):
#     outputfolder = create_folder(out_folder_path, stock_name)
#     folder_path = os.path.join(data_path, stock_name)
#     date_list = get_file_names(folder_path)
#     sub_pbar = tqdm(total=len(date_list), desc=stock_name)

#     for date in date_list:
#         outfile = os.path.join(outputfolder, f'{date}.jsonl')
#         inputfile = os.path.join(folder_path, date)
#         inlines = read_file(inputfile)
#         inlines = inlines[:20]
#         outs, inlines = open_output_file(outfile, inlines)  # 接收返回的 inlines
#         if outs is None:
#             print(f"无法打开文件 '{outfile}' 进行覆盖写入。")
#             continue  # 跳过当前迭代，处理下一个日期
#         index = 0

#         while index < len(inlines):
#             row = inlines[index]
#             text = row['text']
#             p_time = row['created_at']

#             user_prompt = prompt_template.format(stock_name=full_name_dict[stock_name], news_content=text, publish_time=p_time, prompt=default_prompt)

#             while True:
#                 try:
#                     response = openai.Completion.create(
#                         engine="text-davinci-002",
#                         prompt=user_prompt,
#                         temperature=0
#                     )
#                     break
#                 except openai.error.ServiceUnavailableError:
#                     print('等待服务器响应...')
#                     time.sleep(5)
#             pdb.set_trace()
#             outputs = response['choices'][0]['text']
#             print(outputs)

#             information = json.loads(outputs)
#             correlation = information["Correlation"]
#             sentiment = information["Sentiment"]
#             importance = information["Importance"]
#             impact = information["Impact"]
#             duration = information["Duration"]

#             new = {
#                 "text": row['text'],
#                 "datetime": row['created_at'],
#                 "Correlation": correlation,
#                 "Sentiment": sentiment,
#                 "Importance": importance,
#                 "Impact": impact,
#                 "Duration": duration,
#             }
#             outs.write(json.dumps(new) + '\n')
#             index += 1
#         outs.close()
#         sub_pbar.update(1)  # 更新进度条，每次迭代更新一个单位
#     sub_pbar.close()


def analyze_stock_news(out_folder_path, data_path, stock_name):
    
    outputfolder = create_folder(out_folder_path, stock_name)
    folder_path = os.path.join(data_path, stock_name)
    date_list = get_file_names(folder_path)
    sub_pbar = tqdm(total=len(date_list), desc=stock_name)
    for date in date_list:
        outfile = os.path.join(outputfolder, f'{date}.jsonl')
        inputfile = os.path.join(folder_path, date)
        inlines = read_file(inputfile)
        inlines = inlines[:20]
        outs, inlines = open_output_file(outfile, inlines)  # 接收返回的 inlines
        if outs is None:
            print(f"无法打开文件 '{outfile}' 进行覆盖写入。")
            continue  # 跳过当前迭代，处理下一个日期
        index = 0
        while index < len(inlines):
            row = inlines[index]
            text = row['text']
            p_time = row['created_at']

            user_prompt = prompt_template.format(stock_name=full_name_dict[stock_name], news_content=text, publish_time=p_time, prompt=default_prompt)

            while True:
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[
                            {"role": "system", "content": sys_prompt},
                            {"role": "user", "content": user_prompt},
                        ],
                        temperature=0
                    )
                    break
                except openai.error.ServiceUnavailableError:
                    print('等待服务器响应...')
                    time.sleep(3)
            outputs = completion.choices[0].message.content
            print(outputs)

            information = [item for item in outputs.split("\n") if item != ""]
            correlation = information[0].split(":")[-1]
            sentiment = information[1].split(":")[-1]
            importance = information[2].split(":")[-1]
            impact = information[3].split(":")[-1]
            duration = information[4].split(":")[-1]
            new = {
                "text": row['text'],
                "datetime": row['created_at'],
                "Correlation": correlation,
                "Sentiment": sentiment,
                "Importance": importance,
                "Impact": impact,
                "Duration": duration,
            }
            outs.write(json.dumps(new) + '\n')
            index += 1
        outs.close()
        sub_pbar.update(1)  # 更新进度条，每次迭代更新一个单位
    sub_pbar.close()

def analyze_all_stocks(data_path, out_folder_path):
    stock_names = get_directory_names(data_path)
    pbar = tqdm(total=len(stock_names), desc="Main")
    for stock_name in stock_names:
        analyze_stock_news(out_folder_path, data_path, stock_name)
        pbar.update(1)  # 更新进度条，每次迭代更新一个单位
    pbar.close()


data_path = 'my_data/message/american/preprocessed'
out_folder_path = 'CMIN_gpt_data/us'
# analyze_all_stocks(data_path, out_folder_path)




analyze_stock_news(out_folder_path, data_path, stock_id)
print("————————————————————————————————————结束啦————————————————————————————————————————————————")
# 设置指示变量
finish = 1

# 调用 Shell 文件，并传递参数
# subprocess.run(['bash', 'script.sh', str(finish)], check=True)
# 将最后一次print的结果保存到文件中

with open('output_flag.txt', "w") as file:
    file.write(f"{finish}")