# coding=utf-8 import asyncio import json import time from functools import partial, wraps from json import JSONDecodeError import requests from glom import glom from playwright.async_api import Browser from abs_spider import AbstractAiSeoSpider from domain.ai_seo import AiAnswer, AiSearchResult from utils import create_logger, css_to_dict from utils.image_utils import crop_image_left,remove_bottom_part import re logger = create_logger(__name__) class WenxiaoyanSpider(AbstractAiSeoSpider): def __init__(self, browser: Browser, prompt: str, keyword: str): super().__init__(browser, prompt, keyword) def get_home_url(self) -> str: return '' def get_platform_id(self) -> int: return 8 def get_platform_name(self) -> str: return 'wenxiaoyan' def get_answer(self,question): url = "https://yiyanapp.baidu.com/chat/completions?appmode=nor&appname=newapp&cfrom=1027594s¤tLLMModel=auto&ds_lv=0&ds_stc=0.0000&from=1027594s&llmMode=auto&matrixstyle=0&network=1_0&sid=148615_1-999987_2-999992_1-159581_2-999991_1-164345_3-160429_1-999989_1-165121_1-999990_1-162727_1-999999_71-999988_1-163324_4-999994_2-999993_1-160037_5&st=2&ua=1170_2532_iphone_4.20.0.10_0&uid=808BA951B6493F89F2D28A62C7438A9332566F6AAOHBGTHSHBE&ut=iPhone13%2C2_17.6.1&zid=U8HLkcXuwbe2-gJK1hw1Goz5QuMxZdMQJSqN2Fs15rnzb0Vss7pxP-zc-wBRYuJI8qmsMDWWRwkcd-EivBG0P8Q&jt=" # ⚠️ 注意替换 jt 参数为你抓包所得 headers = { "Host": "yiyanapp.baidu.com", "Accept": "text/event-stream", "Sec-Fetch-Site": "same-origin", # "Acs-Token": "1751775388769_1751851559961_bhZQ4YI+QFmUuvgW70dSF4p6RO6kwt11kM1YLLkcALYQfKLMVkL4CaQihHY3UdL0U59IEuGQ18+yD/87YkdFFJjvRFtzv5V4w566SOzaV4Tl6MgUbtEXWZ1AIXHPKMOvdwofdSm4kY1DzhUDlUirlwEBtgg57atBjuGENz2ku0gIlOq7WF/KO9B8rZ8A+wZvDqBg64h0nWDr+OG7ggPF0m18dztme9wUmTomJ71gIxt6pfiIWP/ICkk0voivZe5/WZ2+n6H1jEiXrbZV2naPksyYjA9alnvptf5PU09c3+9ZzFa7Wh+iou+eKCxjjTv99f8BsQNAs0NVITq3szndR9ZAjqk7Tx5Pf1wskjrafPx4Gk5/tYj9PBEgIapBmvPmf1HBfNwI2zZbVHoEZnMuMllQ1AzDyFiVIdxkGAak3FmsIL/4k8XfcXxaB86CDVk2rKwOSDudYaDDSH5DkP/iWnFFUxAs6Y0/DZglPrMfYpFYgdnS+TD2/Qm5QXbsN1pBmmWjTuvzy9zjP5ykRhaqVg==", # ⚠️ 替换为抓包所得 Acs-Token # "X-Bd-Alive": "86ff04fba2edb36666d4fbd126554961", "Accept-Language": "zh-CN,zh-Hans;q=0.9", "Sec-Fetch-Mode": "cors", "Accept-Encoding": "gzip, deflate, br", "Origin": "https://yiyanapp.baidu.com", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 light/1.0 newapp/4.20.0.10 (Baidu; P2 17.6.1)", "Referer": "https://yiyanapp.baidu.com/talk/chat", "Connection": "keep-alive", "Content-Type": "application/json", "Cookie": self.session_info.get("cookie") } payload = { "parentId" : "7955151", "inputSource" : "user_input", "querySource" : "user_input", "context" : [ "7955151", "7955150", "7955113", "7955112", "7955075", "7955074", "7955051", "7955050", "7955045", "7955044", "7955015", "7955014", "7954965", "7954964", "7954865", "7954864", "7954863", "7954862", "7954831", "7954830" ], "contentType" : "text", "from" : "app", "plugins" : { }, "models" : { "dispatch" : "", "force" : "", "select" : "auto", "replyMode" : "normal" }, "sessionId" : "1159258", "chooses" : None, "promptId" : "", "botId" : "144038", "characterId" : "1034", "environment" : { "isNewAB" : 1, "depthAnalysis" : 0, "hostTag" : "main_chat", "composition" : None, "sceneFrom" : "", "individual" : "0", "live" : { "visOnline" : { "per" : "4189", "pdt" : "10170", "audio_ctrl" : "{\"mid\":\"\",\"sampling_rate\":24000}", "spd" : "5" } }, "isReplyModeChange" : 0, "memory" : { "noMemoryExtraction" : 0 }, "depthMsgId" : "", "clarify" : { "enable" : 1, "stage" : 1 }, "interveneId" : "" }, "content" : "小米" } payload["content"] = question dic = {} try: r = requests.post(url, headers=headers, json=payload, stream=True, timeout=30) answer = "" reference = None messageId = '' parentId = '' for line in r.iter_lines(): if line: try: text = line.decode("utf-8") if text.startswith("data:"): print(json.loads(text[5:])) if json.loads(text[5:]).get("data", "").get("data", "").get("reference"): reference = json.loads(text[5:]).get("data", "").get("data", "").get("reference") messageId = json.loads(text[5:]).get("data", "").get("data", "").get("messageId") parentId = json.loads(text[5:]).get("data", "").get("data", "").get("parentId") answer += json.loads(text[5:]).get("data", "").get("data", "").get("content", "") except: pass # results.append({"question": question, "answer": answer}) print(f"✅ 回答:{answer}...\n") dic['messageId'] = messageId dic['parentId'] = parentId dic['reference'] = reference dic['answer'] = answer except Exception as e: print(f"❌ 请求失败:{e}") return dic # results.append({"question": question, "answer": "请求失败"}) # ✅ 4. 批量问题 # questions = [ # "苹果手机咋样!", # # ] # # # ✅ 5. 结果列表 # results = [] # # # ✅ 6. 主循环:发送每个问题 # for q in questions: # print(f"📨 提问中:{q}") # payload["content"] = q # try: # r = requests.post(url, headers=headers, json=payload, stream=True, timeout=30) # answer = "" # for line in r.iter_lines(): # if line: # try: # text = line.decode("utf-8") # if text.startswith("data:"): # print(json.loads(text[5:])) # answer += json.loads(text[5:]).get("data", "").get("data", "").get("content", "") # except: # pass # results.append({"question": q, "answer": answer}) # print(f"✅ 回答:{answer}...\n") # time.sleep(2) # 加一点延时防止频率过快 # except Exception as e: # print(f"❌ 请求失败:{e}") # results.append({"question": q, "answer": "请求失败"}) def get_url(self,idList): url = "https://yiyanapp.baidu.com/api/share/create?appmode=nor&appname=newapp&cfrom=1027594s¤tLLMModel=auto&ds_lv=0&ds_stc=0.0000&from=1027594s&llmMode=auto&matrixstyle=0&network=1_0&sid=148615_1-999987_2-999992_1-159581_2-999991_1-164345_3-160429_1-999989_1-165121_1-999990_1-162727_1-999999_71-999988_1-163324_4-999994_2-999993_1-160037_5&st=2&ua=1170_2532_iphone_4.20.0.10_0&uid=808BA951B6493F89F2D28A62C7438A9332566F6AAOHBGTHSHBE&ut=iPhone13%252C2_17.6.1&zid=U8HLkcXuwbe2-gJK1hw1Goz5QuMxZdMQJSqN2Fs15rnzb0Vss7pxP-zc-wBRYuJI8qmsMDWWRwkcd-EivBG0P8Q" headers = { "Host": "yiyanapp.baidu.com", "Accept": "application/json, text/plain, */*", "Sec-Fetch-Site": "same-origin", # "Acs-Token": "1751775388769_1751866566877_gA4PTJauK4tNXXnPDSKIalOniXNhLT69M57uSBnbJshpFO5a8U6IqSMFUutHHsp1kgQT6GO9xupKSeNt7b+XxTGChWyb1qqFcD3N1GjNXr4qGGIugnL0b8oZEr4TSSDVSYElOcM03UeUp/jdNCZrLPP9xVflotEcnSLWnFR3yqXPtRwnebq9rngoPqtExqw/xzd5QrUe6HQbwqA/AUvIsg/wFFrpqWo+kTPSfWlpUfJ80QmT5wkoApRY8ATrhn2QqV4UPhox+qQFqYykNQtGe5LlMfRDVIXAKQtGmsU3E5DCD8JvTNiUSr6WhqJlYdIFAwoHelZLMQ6M4yASxlseZxvj4ad/eFI78cto6KWFUaqmlYv+jMDvHY9TSSuFu0goGImjGlmBRhXH/9+opBN2rlk1kJQ0OykPOe1Xy5Bh1X+ikajGyFx4ZF2y0T/rgJA07Unyk9zNFObEL8+Q3CCIugBZausPV3v3nBthXH7tptFczZMMy3Zyfk5QwlDm75qtajvi63/qV2Krv7N10nlQUw==", "Accept-Language": "zh-CN,zh-Hans;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Sec-Fetch-Mode": "cors", "Content-Type": "application/json", "Origin": "https://yiyanapp.baidu.com", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 light/1.0 newapp/4.20.0.10 (Baidu; P2 17.6.1)", "Referer": "https://yiyanapp.baidu.com/talk/chat", "Connection": "keep-alive", "Sec-Fetch-Dest": "empty", "Cookie": self.session_info.get("cookie") } payload = { "idList": "7924487,7924488", "watermark": "qYbvOVmx", "vp": 0 } payload["idList"] = idList resp = requests.post(url, headers=headers, json=payload) print(resp.json()) print("返回内容:", resp.json().get("data").get("url")) return resp.json().get("data").get("url") async def _do_spider(self,) -> AiAnswer: self._init_data() self.search_result_count = 0 print("self.prompt",self.prompt) result = self.get_answer(self.prompt) idList = None if result.get("reference"): idList = result.get("parentId")+","+ result.get("messageId") if idList: await self.browser_page.goto(self.get_url(idList), timeout=600000) await asyncio.sleep(3) chat_input_element = self.browser_page.locator('//*[@id="35"]/div[1]/div/div/div[2]') await chat_input_element.click() # 获取回答元素 answer = self.browser_page.locator('//*[@id="app"]/div/div[1]').nth(-1) box = await answer.bounding_box() # 设置视口大小 await self.browser_page.set_viewport_size({ 'width': 1920, 'height': int(box['height']) + 500 }) # 截图 screenshot_path = self._get_screenshot_path() await self.browser_page.screenshot(path=screenshot_path) #匹配citation:中的数字 citation = list() citations = re.findall(r'citation:(\d+)', result.get("answer")) if citations: citation = list(set(citations)) ai_search_result_list = [] for index,search_result in enumerate(result.get("reference").get("searchInfo")): url = search_result.get('url', '') title = search_result.get('title', '') body = search_result.get('snippet', '') publish_time = search_result.get('published_at', '') host_name = search_result.get('site_name', '未知') if str(index+1) in citation: is_referenced = "1" else: is_referenced = "0" ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name, is_referenced=is_referenced) if ai_result.title and ai_result.url: ai_search_result_list.append(ai_result) logger.debug(f"ai参考资料: [{host_name}]{title}({url})") self.ai_answer.search_result = ai_search_result_list self.search_result_count = len(self.ai_answer.search_result) # 切割图片 remove_bottom_part(screenshot_path, 82) self.ai_answer.answer = result.get("answer") self.ai_answer.screenshot_file = screenshot_path return self.ai_answer