# coding=utf-8 import asyncio import json import time from functools import partial, wraps from json import JSONDecodeError import requests from glom import glom from playwright.async_api import Browser from abs_spider import AbstractAiSeoSpider from domain.ai_seo import AiAnswer, AiSearchResult from utils import create_logger, css_to_dict from utils.image_utils import crop_image_left,remove_bottom_part import re logger = create_logger(__name__) class WenxiaoyanSpider(AbstractAiSeoSpider): def __init__(self, browser: Browser, prompt: str, keyword: str): super().__init__(browser, prompt, keyword) def get_home_url(self) -> str: return '' def get_platform_id(self) -> int: return 8 def get_platform_name(self) -> str: return 'wenxiaoyan' def get_answer(self,question): url = "https://yiyanapp.baidu.com/chat/completions?appmode=nor&appname=newapp&cfrom=1027594s¤tLLMModel=auto&ds_lv=0&ds_stc=0.0000&from=1027594s&llmMode=auto&matrixstyle=0&network=1_0&sid=148615_1-999987_2-999992_1-999991_1-999999_71-164345_3-999989_1-165121_1-999990_1-999988_1-167537_2-999994_2-999993_1&st=2&ua=1170_2532_iphone_4.20.0.10_0&uid=808BA951B6493F89F2D28A62C7438A9332566F6AAOHBGTHSHBE&ut=iPhone13%2C2_17.6.1&zid=U8HLkcXuwbe2-gJK1hw1Goz5QuMxZdMQJSqN2Fs15rnxRNfQ727QKdNMr45YRMLu2XeEydINMJvMu1WKw5cU5kg&jt=31%24eyJrIj4iOCI0Iix5IkciQEdGRUVITkpITlBMSyJJIkFqIjwiNTw7Ojo9Qz89Q0VARiI%2BIjYzIlEiSlFQT09SWDQyNzs2NyIzIit5IkYiPiI8IjYiTiJHRUlRSEtKTyJLImsiPSI0PD9AelRyb2NPUWNrZFc2RTBHLCw4YjN3eDJhdlpQU3FQUlBQcipfMWhdRURWSEk1NTFjWy5oO0JncUl5cEo2K2xZcmtsK255Z1k7VCxGYjZdOz1hai5iaTkuMFtSb1U2bWhQXFFOUngvN1lqY2Y3WU1QZF4xYGM8MkpBWVJxPHp2Nzs6bmNVUUV3ZjFsa0xaXl9bTnNkOzFuQFRVMm9qb1Vxalc5UFJCRj5gVTowZmwtbkldcXFCN0MvbDlvMlM0Tk9cWkJzPDExVmJXNUl3Wkc5aGhLYUQ6NGxlcDVpSk0uMklPK3opVGN6dzlTNzdlR1pLZTBzbz1XdWRSNUZWdkxIP2tVYFl5RVZ2Vkoqa3hwK180ZitudWdHY3BLTzdPP0xNY3cyc0RUMFxgYEZtLF9ibF1kK2JjZ3N3Uipxa0locj5tX1xyWVB4RXkyQngvLC9RLFE%2FdFZCNlN0dFFZVy0uLTtYWixFLjBNfGNWbCxhR05fb19tbjhrVCloakdHS0thY0JYcj5lZUJyVmgsYC0xR01oPjlafFAwfDo7amZraj88P0FEPz9GSHd4RkZNT0xJT1FNLU5SU2MzIn0%3D" headers = { "Host": "yiyanapp.baidu.com", "Accept": "text/event-stream", # "Acs-Token": "1753247060952_1753249454932_Sx0qWvhpSEqXzeFVZfWjhajgI9acfaMFCrYjZzGN7ixGcXuCVLvn0QPuy/sSb1aJd5sutmLZqf6lJXHjsj3/Ls0hixZCVNgp5zSKGpxaOxK+DRWKizPtHHB2jDLYRGXsFIdtJfiqs37Ju/oieeKHevteS5Xrs1w+qE5gpc+z3AcQ9HG1pjbysU37KEUKy8n9UTg0xhpfslG0kat8tRM02up4+QsBDHoci9AP+9zYSRXd/Ay1Uh5u0AHeHY2nRNMv6+txzzdfED7IlPbWj1a7xsF3ak7pCqvWdjLelCXgVBWvc/wJwQw4hLr5Sfr9AeJSNN2Q7+GCL3yM4OVypUsEyBziASqAiLTe87j9JPTx/IanvUxg0HgxmVZJarY91Pm0puQhBVUR+DSIuj/zrdpCxdffx1m+0Lq9JskPhYXw4KZn8jwukybqCSGXbAjq7vrePNuuFgVXVStruZ4C0X94NYtv85sMpRdEw1TN8lbI7kt2B/1fdASUF94Nrbg7fx9+Ihvl4S9AoY5t/g1RVZqfhQ==", # "X-Bd-Alive": "bbd8e3a50a890b80110fcff0fd674290", "Accept-Language": "zh-CN,zh-Hans;q=0.9", "Sec-Fetch-Mode": "cors", "Accept-Encoding": "gzip, deflate, br", "Origin": "https://yiyanapp.baidu.com", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 light/1.0 newapp/4.20.0.10 (Baidu; P2 17.6.1)", "Referer": "https://yiyanapp.baidu.com/talk/chat", "Connection": "keep-alive", "Content-Type": "application/json", "Sec-Fetch-Dest": "empty", "Cookie": self.session_info.get("cookie") # "Cookie": "ab_sr=1.0.1_N2RiZTIzMmFjNmNmZmJlMjQ2NDYyNDk1NTUxMDQ5NzY0OTMwOTdkNDk2NWI1Mjk4NDE2OTc4NjZlOTViZDQwMjc4ZmRhMWU1M2VkNzFlMDhmZGI3M2I5OGE4NTE2MmRjZTQ2YTYwNDM2ZjQ1YWUzM2E1MGFlZjhiZjg0MTQ2NWUwZTUxOWRlMjM0YWI2M2U5NGZlYTYwNTdjNzE4ZTVkNGMyNDJlOWZhN2M2ZDM2OGM3ZTIzZTM3NDM5YTViZWQ4; __bid_n=197e279d29a43687ff36d1; BDUSS=UdpRXBJdzU4VzVXcklWb2Y4cnVQRFhvWmQ0WWUweWpoMHNEd356OVluVlRlcTVvSVFBQUFBJCQAAAAAAAAAAAEAAACcb4sUu~rGxrPJssUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFPthmhT7YZoZ; passtheme=light; BAIDUCUID=Ya-oalOgvi0javtz_8BHf0axH8je828Elav7ijiSB8_Qu2ir_8vValtVWu039QRR0zXmA; SP_FW_VER=4.220.1; matrixstyle=0; BAIDUID=976311C6B3BF9DB2D54171E6D3D928DF:FG=1; ST=2", } payload = { "content": "苹果", "sessionId": "", "contentType": "text", "parentId": "", "inputSource": "user_input", "querySource": "user_input", "context": [], "plugins": {}, "chooses": None, "models": { "dispatch": "", "force": "", "select": "auto", "replyMode": "normal" }, "newBot": True, "from": "app", "promptId": "", "botId": "", "characterId": "1034", "environment": { "clarify": {"enable": 1, "stage": 1}, "isNewAB": 1, "depthAnalysis": 0, "depthMsgId": "", "live": { "visOnline": {"pdt": "10170", "per": "4189", "audio_ctrl": "{\"mid\":\"\",\"sampling_rate\":24000}", "spd": "5"} }, "interveneId": "", "hostTag": "main_chat", "individual": "0", "composition": None, "memory": {"noMemoryExtraction": 0}, "sceneFrom": "", "isReplyModeChange": 0 } } payload["content"] = question dic = {} try: r = requests.post(url, headers=headers, json=payload, stream=True, timeout=30) answer = "" reference = None messageId = '' parentId = '' for line in r.iter_lines(): if line: try: text = line.decode("utf-8") if text.startswith("data:"): print(json.loads(text[5:])) if json.loads(text[5:]).get("data", "").get("data", "").get("reference"): reference = json.loads(text[5:]).get("data", "").get("data", "").get("reference") messageId = json.loads(text[5:]).get("data", "").get("data", "").get("messageId") parentId = json.loads(text[5:]).get("data", "").get("data", "").get("parentId") answer += json.loads(text[5:]).get("data", "").get("data", "").get("content", "") except: pass # results.append({"question": question, "answer": answer}) print(f"✅ 回答:{answer}...\n") dic['messageId'] = messageId dic['parentId'] = parentId dic['reference'] = reference dic['answer'] = answer except Exception as e: print(f"❌ 请求失败:{e}") return dic def get_url(self,idList): url = "https://yiyanapp.baidu.com/api/share/create?appmode=nor&appname=newapp&cfrom=1027594s¤tLLMModel=auto&ds_lv=0&ds_stc=0.0000&from=1027594s&llmMode=auto&matrixstyle=0&network=1_0&sid=148615_1-999987_2-999992_1-159581_2-999991_1-164345_3-160429_1-999989_1-165121_1-999990_1-162727_1-999999_71-999988_1-163324_4-999994_2-999993_1-160037_5&st=2&ua=1170_2532_iphone_4.20.0.10_0&uid=808BA951B6493F89F2D28A62C7438A9332566F6AAOHBGTHSHBE&ut=iPhone13%252C2_17.6.1&zid=U8HLkcXuwbe2-gJK1hw1Goz5QuMxZdMQJSqN2Fs15rnzb0Vss7pxP-zc-wBRYuJI8qmsMDWWRwkcd-EivBG0P8Q" headers = { "Host": "yiyanapp.baidu.com", "Accept": "application/json, text/plain, */*", "Sec-Fetch-Site": "same-origin", # "Acs-Token": "1751775388769_1751866566877_gA4PTJauK4tNXXnPDSKIalOniXNhLT69M57uSBnbJshpFO5a8U6IqSMFUutHHsp1kgQT6GO9xupKSeNt7b+XxTGChWyb1qqFcD3N1GjNXr4qGGIugnL0b8oZEr4TSSDVSYElOcM03UeUp/jdNCZrLPP9xVflotEcnSLWnFR3yqXPtRwnebq9rngoPqtExqw/xzd5QrUe6HQbwqA/AUvIsg/wFFrpqWo+kTPSfWlpUfJ80QmT5wkoApRY8ATrhn2QqV4UPhox+qQFqYykNQtGe5LlMfRDVIXAKQtGmsU3E5DCD8JvTNiUSr6WhqJlYdIFAwoHelZLMQ6M4yASxlseZxvj4ad/eFI78cto6KWFUaqmlYv+jMDvHY9TSSuFu0goGImjGlmBRhXH/9+opBN2rlk1kJQ0OykPOe1Xy5Bh1X+ikajGyFx4ZF2y0T/rgJA07Unyk9zNFObEL8+Q3CCIugBZausPV3v3nBthXH7tptFczZMMy3Zyfk5QwlDm75qtajvi63/qV2Krv7N10nlQUw==", "Accept-Language": "zh-CN,zh-Hans;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Sec-Fetch-Mode": "cors", "Content-Type": "application/json", "Origin": "https://yiyanapp.baidu.com", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 light/1.0 newapp/4.20.0.10 (Baidu; P2 17.6.1)", "Referer": "https://yiyanapp.baidu.com/talk/chat", "Connection": "keep-alive", "Sec-Fetch-Dest": "empty", "Cookie": self.session_info.get("cookie") # "Cookie": "ab_sr=1.0.1_N2RiZTIzMmFjNmNmZmJlMjQ2NDYyNDk1NTUxMDQ5NzY0OTMwOTdkNDk2NWI1Mjk4NDE2OTc4NjZlOTViZDQwMjc4ZmRhMWU1M2VkNzFlMDhmZGI3M2I5OGE4NTE2MmRjZTQ2YTYwNDM2ZjQ1YWUzM2E1MGFlZjhiZjg0MTQ2NWUwZTUxOWRlMjM0YWI2M2U5NGZlYTYwNTdjNzE4ZTVkNGMyNDJlOWZhN2M2ZDM2OGM3ZTIzZTM3NDM5YTViZWQ4; __bid_n=197e279d29a43687ff36d1; BDUSS=UdpRXBJdzU4VzVXcklWb2Y4cnVQRFhvWmQ0WWUweWpoMHNEd356OVluVlRlcTVvSVFBQUFBJCQAAAAAAAAAAAEAAACcb4sUu~rGxrPJssUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFPthmhT7YZoZ; passtheme=light; BAIDUCUID=Ya-oalOgvi0javtz_8BHf0axH8je828Elav7ijiSB8_Qu2ir_8vValtVWu039QRR0zXmA; SP_FW_VER=4.220.1; matrixstyle=0; BAIDUID=976311C6B3BF9DB2D54171E6D3D928DF:FG=1; ST=2", } payload = { "idList": "7691531,7691532", "watermark": "xlSKFmsf", "vp": 0 } payload["idList"] = idList resp = requests.post(url, headers=headers, json=payload) print(resp.json()) print("返回内容:", resp.json().get("data").get("url")) return resp.json().get("data").get("url") async def _do_spider(self,) -> AiAnswer: self._init_data() self.search_result_count = 0 print("self.prompt",self.prompt) result = self.get_answer(self.prompt) idList = None if result.get("reference"): idList = result.get("parentId")+","+ result.get("messageId") if idList: await self.browser_page.goto(self.get_url(idList), timeout=600000) await asyncio.sleep(3) chat_input_element = self.browser_page.locator('//*[@id="35"]/div[1]/div/div/div[2]') await chat_input_element.click() # 获取回答元素 answer = self.browser_page.locator('//*[@id="app"]/div/div[1]').nth(-1) box = await answer.bounding_box() # 设置视口大小 await self.browser_page.set_viewport_size({ 'width': 1920, 'height': int(box['height']) + 500 }) # 截图 screenshot_path = self._get_screenshot_path() await self.browser_page.screenshot(path=screenshot_path) #匹配citation:中的数字 citation = list() citations = re.findall(r'citation:(\d+)', result.get("answer")) if citations: citation = list(set(citations)) ai_search_result_list = [] for index,search_result in enumerate(result.get("reference").get("searchInfo")): url = search_result.get('url', '') title = search_result.get('title', '') body = search_result.get('snippet', '') publish_time = search_result.get('published_at', '') host_name = search_result.get('site_name', '未知') if str(index+1) in citation: is_referenced = "1" else: is_referenced = "0" ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name, is_referenced=is_referenced) if ai_result.title and ai_result.url: ai_search_result_list.append(ai_result) logger.debug(f"ai参考资料: [{host_name}]{title}({url})") self.ai_answer.search_result = ai_search_result_list self.search_result_count = len(self.ai_answer.search_result) # 切割图片 remove_bottom_part(screenshot_path, 82) self.ai_answer.answer = result.get("answer") self.ai_answer.screenshot_file = screenshot_path return self.ai_answer