初始化

4b70bcb7 · huangiyu · 4b70bcb7 · 4b70bcb7 · 4b70bcb7
Commit 4b70bcb7 authored Feb 02, 2024 by huangiyu
Hide whitespace changes
Inline Side-by-side

Showing with 455 additions and 0 deletions

main.py main.py +16 -0

stealth.min.js stealth.min.js +0 -0

京东搜索_自动版.py 京东搜索_自动版.py +439 -0

No files found.
--- a/main.py
+++ b/main.py
+# 这是一个示例 Python 脚本。
+
+# 按 Shift+F10 执行或将其替换为您的代码。
+# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
+
+
+def print_hi(name):
+    # 在下面的代码行中使用断点来调试脚本。
+    print(f'Hi, {name}')  # 按 Ctrl+F8 切换断点。
+
+
+# 按间距中的绿色按钮以运行脚本。
+if __name__ == '__main__':
+    print_hi('PyCharm')
+
+# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助
--- a/stealth.min.js
+++ b/stealth.min.js
--- a/京东搜索_自动版.py
+++ b/京东搜索_自动版.py
+import time
+from urllib import request
+import cv2
+import numpy as np
+import pyautogui
+from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver import ChromeOptions
+import pymysql
+from lxml import etree
+# 登陆账号
+from selenium.webdriver.common.by import By
+import openpyxl
+import time
+import random
+import json
+from openpyxl import Workbook
+from PIL import Image
+import re
+
+from bs4 import BeautifulSoup
+
+# from utils.spider import jd
+mysql_zdzs_proxy_host = '59.110.219.171'
+mysql_zdzs_proxy_user = 'zgcindex'
+mysql_zdzs_proxy_password = 'zgcprice2019'
+mysql_zdzs_proxy_database = 'zdzs_proxy'
+username = 'xxx'
+# 登陆密码
+password = 'xxx'
+# mysql_zdzs_proxy_host = '59.110.219.171'
+# mysql_zdzs_proxy_user = 'zgcindex'
+# mysql_zdzs_proxy_password = 'zgcprice2019'
+# mysql_zdzs_proxy_database = 'zdzs_proxy'
+# 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整，利用微信截图得到xy的具体位置
+# 登录的
+slide_x_position, slid_y_position = 850, 459
+# 验证的
+slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
+# 偏移度加值(根据电脑分辨率情况)
+offset_increase = 0
+while True:
+    try:
+        conn = pymysql.connect(
+            host=mysql_zdzs_proxy_host,
+            user=mysql_zdzs_proxy_user,
+            password=mysql_zdzs_proxy_password,
+            database=mysql_zdzs_proxy_database,
+            charset="utf8mb4",
+            cursorclass=pymysql.cursors.DictCursor,
+            autocommit=True
+        )
+        break
+    except Exception as e:
+        print(e)
+        time.sleep(5)
+        pass
+cur = conn.cursor()
+
+
+def mysql_zdzs_proxy(sql=None):
+    print(sql)
+    try:
+        cur.execute(sql)
+        result = cur.fetchall()
+        return result
+    except pymysql.err.IntegrityError as e:
+        # cur.close()
+        return []
+    except Exception as e:
+        return []
+
+
+def return_js():
+    return open('stealth.min.js', 'r').read()
+
+
+class SlideUtils:
+
+    @staticmethod
+    def find_pic(background, slide):
+        """
+        获取背景图与滑块图的最佳位置
+        """
+        # 读取图片
+        background_rgb = cv2.imread(background)
+        # 灰度处理
+        background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
+        # 读取滑块灰度图片
+        slide_gray = cv2.imread(slide, 0)
+        # 匹配滑块位置
+        res = cv2.matchTemplate(background_gray, slide_gray, cv2.TM_CCOEFF_NORMED)
+        # 获取最佳与最差匹配
+        value = cv2.minMaxLoc(res)
+        return value[2][0]
+
+    @staticmethod
+    def slide_by_pyautogui(x, y, offset, offset_increase):
+        """
+        使用pyautogui实现滑块并自定义轨迹方程
+        """
+        print(f"睡眠0.5秒后点击移动")
+        time.sleep(0.5)
+        xx = x + offset + offset_increase
+        pyautogui.moveTo(x, y, duration=0.1)
+        pyautogui.mouseDown()
+        y += random.randint(9, 19)
+        pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
+        y += random.randint(-9, 0)
+        pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
+        y += random.randint(0, 8)
+        pyautogui.moveTo(xx, y, duration=0.3)
+        pyautogui.mouseUp()
+
+    @staticmethod
+    def slide_by_pyautogui2(x, y, offset, offset_increase):
+        """
+        使用pyautogui实现滑块并自定义轨迹方程
+        """
+        # print(f"睡眠2秒后点击移动")
+        # time.sleep(2)
+        xx = x + offset + offset_increase
+        pyautogui.moveTo(x, y, duration=0.1)
+        pyautogui.mouseDown()
+        y += random.randint(9, 19)
+        pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
+        y += random.randint(-9, 0)
+        pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
+        y += random.randint(0, 8)
+        pyautogui.moveTo(xx, y, duration=0.3)
+        # print("睡眠2秒后松开")
+        # time.sleep(0.3)
+        pyautogui.mouseUp()
+        # print(f"睡眠2秒后点击移动")
+        # time.sleep(2)
+        # xx = x + offset
+        # pyautogui.moveTo(x, y, duration=0.1)
+        # pyautogui.mouseDown()
+        # # y += random.randint(9, 19)
+        # # pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
+        # # y += random.randint(-9, 0)
+        # # pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
+        # # y += random.randint(0, 8)
+        # pyautogui.moveTo(xx, y, duration=0.3)
+        # print("睡眠2秒后松开")
+        # time.sleep(2)
+        # pyautogui.mouseUp()
+
+
+def detect_circular_distortion(image_path, circle_radius=65):
+    # 加载图像并转换为灰度图
+    img = cv2.imread(image_path)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    # 确定圆形区域的中心和半径
+    circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
+
+    # 创建一个遮罩，只包含圆形区域
+    mask = np.zeros_like(gray)
+    cv2.circle(mask, circle_center, circle_radius, 255, thickness=-1)
+
+    # 应用遮罩到图像
+    masked_img = cv2.bitwise_and(gray, gray, mask=mask)
+
+    # 初始化ORB检测器
+    orb = cv2.ORB_create(500)
+
+    # 检测并计算圆形区域和整个图像的关键点和描述符
+    kp1, des1 = orb.detectAndCompute(gray, None)
+    kp2, des2 = orb.detectAndCompute(masked_img, None)
+
+    # 匹配描述符
+    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+    matches = bf.match(des1, des2)
+
+    # 计算匹配点的平均距离，作为扭曲程度的一个简单估计
+    if matches:
+        distances = [match.distance for match in matches]
+        average_distance = sum(distances) / len(distances)
+    else:
+        average_distance = float('inf')
+
+    return average_distance
+
+
+def find_best_match_angle(background_img_path, rotating_img_path):
+    # 加载背景图片和验证码图片
+    background = Image.open(background_img_path)
+    captcha = Image.open(rotating_img_path)
+
+    # 确保背景图和验证码图的尺寸符合要求
+    assert background.size == (320, 200), "背景图尺寸必须是 320x200"
+    assert captcha.size == (130, 130), "验证码图尺寸必须是 130x130"
+
+    # 确定将验证码放置在背景图中的位置
+    insert_position = ((background.width - captcha.width) // 2, (background.height - captcha.height) // 2)
+    best_score = None
+    best_angle = None
+    best_position = None
+    best_file_name = None
+    # 每10度旋转一次，总共旋转36次
+    for angle in range(-360, 0, 1):
+        # 旋转验证码图片
+        rotated_captcha = captcha.rotate(angle, expand=False)
+
+        # 创建新的背景图副本用于合成
+        new_background = background.copy()
+
+        # 将旋转后的验证码图片放置到背景图的指定位置
+        new_background.paste(rotated_captcha, insert_position, rotated_captcha)
+
+        # 保存合成后的图片
+
+        new_background.save(f"captcha_{abs(angle)}.png")
+        distortion_level = detect_circular_distortion(f"captcha_{abs(angle)}.png")
+        if angle == -360:
+            best_score = distortion_level
+        print(f'中心区域扭曲程度估计（数值越小表示越接近未扭曲）: {distortion_level}')
+        # res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED)
+        # _, max_val, _, max_loc = cv2.minMaxLoc(res)
+        #
+        if best_score > distortion_level:
+            best_score = distortion_level
+            best_angle = angle
+            # best_position = max_loc
+            best_file_name = f"captcha_{abs(angle)}.png"
+    print(best_score)
+    print(best_angle)
+    print(best_position)
+    print(best_file_name)
+    return abs(best_angle), best_position
+
+
+def get_cookie():
+    option = ChromeOptions()
+    option.binary_location = 'D:\ChromeMaYi\guge\chrome.exe'
+    # 这个不能开，开了识别不了图片
+    # option.add_argument('--headless') # 无头模式，可不启用界面显示运行
+    option.add_experimental_option('excludeSwitches', ['enable-automation'])
+    driver = webdriver.Chrome(options=option)
+    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+        "source": return_js()
+    })
+    option.add_argument("--disable-blink-features=AutomationControlled")
+    driver.set_window_size(1200, 700)
+    # driver.maximize_window()
+    driver.get('https://passport.jd.com/uc/login')
+    # 切換账号密码登录
+    # driver.find_element(by=By.CLASS_NAME, value='login-tab-r').click()
+    # 设置账号密码
+    driver.find_element(by=By.ID, value='loginname').send_keys(username)
+    driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
+    time.sleep(0.5)
+    # 登录
+    driver.find_element(by=By.ID, value='loginsubmit').click()
+    print("睡眠0.5秒后开始滑动登录")
+    time.sleep(0.5)
+    # 一直循环直到登录成功位置，超过一百次就算失败。
+    count = 0
+    while 1:
+        count = count + 1
+        print(f"开始第{count}次尝试")
+        if count > 100:
+            print("登录京东失败！")
+            return
+        # 获取验证码图片
+        # 用于找到登录图片的大图
+        try:
+            background = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-bigimg"]/img')
+        except NoSuchElementException:
+            # 未查找到登陆图片则认为成功
+            print("京东登录成功！")
+            break
+        # 用来找到登录图片的小滑块
+        slide = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-smallimg"]/img')
+        background_url = background.get_attribute("src")
+        slide_url = slide.get_attribute("src")
+        background_img = 'background_img.png'
+        slide_img = 'slide_img.png'
+        # 下载背景大图保存到本地
+        request.urlretrieve(background_url, background_img)
+        # 下载滑块保存到本地
+        request.urlretrieve(slide_url, slide_img)
+        # 获取最佳x偏移量
+        x = SlideUtils.find_pic(background_img, slide_img)
+        # print(f'本地最佳偏移量: {x}')
+        # 计算缩放
+        # 获取下载背景图宽度
+        w1 = cv2.imread(background_img).shape[1]
+        # 获取网页背景图宽度
+        w2 = background.size['width']
+        # 计算实际页面x偏移量
+        x = (x * w2 / w1)
+        # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
+        SlideUtils.slide_by_pyautogui(slide_x_position, slid_y_position, x, offset_increase)
+        time.sleep(3)
+    query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
+    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE  data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
+    query_spider = mysql_zdzs_proxy(query_spider_sql)
+    for row in query_spider:
+        # while True:
+        #     row = {
+        #         'url':'https://search.jd.com/search?keyword=COMIX%20B3749&enc=utf-8',
+        #         'url':'https://cfe.m.jd.com/privatedomain/risk_handler/03101900/?returnurl=http%3A%2F%2Fsearch.jd.com%2Fsearch%3Fkeyword%3DCOMIX%2520B3749%26enc%3Dutf-8&evtype=2&rpid=rp-191349676-10509-17059999771768'
+        #     }
+        url = row['url']
+        print(url)
+        driver.get(url)
+        time.sleep(1)
+        text = driver.page_source
+        while ('快速验证' in text and '验证一下，购物无忧' in text) or '前方拥挤，请刷新重试' in text:
+            driver.get(url)
+            time.sleep(1)
+            pass
+            # 获取验证码图片
+            try:
+                print("需要验证。")
+                print("睡眠1秒后点击“快速验证”按钮")
+                driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
+                time.sleep(2)
+                background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
+                background_url = background.get_attribute("src")
+                # 用来找到登录图片的小滑块
+                slide = driver.find_element(by=By.XPATH, value='//*[@id="small_img"]')
+                slide_url = slide.get_attribute("src")
+                background_img = 'background_img.png'
+                slide_img = 'slide_img.png'
+                # 下载背景大图保存到本地
+                request.urlretrieve(background_url, background_img)
+                # 下载滑块保存到本地
+                request.urlretrieve(slide_url, slide_img)
+                # 获取最佳x偏移量
+                x = SlideUtils.find_pic(background_img, slide_img)
+                # print(f'本地最佳偏移量: {x}')
+                # 计算缩放
+                # 获取下载背景图宽度
+                w1 = cv2.imread(background_img).shape[1]
+                # 获取网页背景图宽度
+                w2 = background.size['width']
+                # 计算实际页面x偏移量
+                x = (x * w2 / w1)
+                # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
+                # time.sleep(0.5)
+                SlideUtils.slide_by_pyautogui(slide_x_position_quick_verification, slid_y_position_quick_verification,
+                                              x, offset_increase+5)
+                time.sleep(3)
+                text = driver.page_source
+                continue
+            except NoSuchElementException:
+                print("未查找到验证图片背景图，可能是旋转图片，换一种方式获取div")
+                try:
+                    background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
+                    background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
+                        'background-image: url("', "")
+                    # 用来找到登录图片的小滑块
+                    slide = driver.find_element(by=By.XPATH, value='//*[@id="img-rotate-div"]/img')
+                    slide_url = slide.get_attribute("src")
+                    background_img = 'background_img.png'
+                    slide_img = 'slide_img.png'
+                    # 下载背景大图保存到本地
+                    request.urlretrieve(background_url, background_img)
+                    # 下载滑块保存到本地
+                    request.urlretrieve(slide_url, slide_img)
+                    best_angle, best_match_score = find_best_match_angle(background_img, slide_img)
+                    print(f"最佳匹配角度为: {best_angle} 度")
+                    # time.sleep(0.5)
+
+                    # w1 = cv2.imread(background_img).shape[1]
+                    # # 获取网页背景图宽度
+                    # w2 = background.size['width']
+                    x = 228 / 360 * best_angle
+                    x = int(x)
+                    print(f"移动: {x} ")
+                    SlideUtils.slide_by_pyautogui2(slide_x_position_quick_verification,
+                                                   slid_y_position_quick_verification,
+                                                   x,
+                                                   offset_increase)
+                    time.sleep(3)
+                    text = driver.page_source
+
+                except NoSuchElementException:
+                    print("未查找到验证图片背景图，睡眠1秒后重试。")
+                    driver.get(url)
+                    text = driver.page_source
+                    continue
+                except Exception as e:
+                    print(e)
+                    text = driver.page_source
+                    continue
+            except Exception as e:
+                print(e)
+                text = driver.page_source
+                continue
+        # 判断是否跳转了登录
+        if '<title>京东-欢迎登录</title>' in text:
+            return get_cookie()
+        Response = etree.HTML(text)
+
+        dataList = []
+        for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
+
+            is_self_operate_div = ''
+
+            if x.xpath(".//div[@class='p-name p-name-type-2']"):
+                shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
+                    ".//span[@class='J_im_icon']/a/@title") else ''
+
+                dataList.append(
+                    {
+                        'is_self_operate_div': is_self_operate_div,
+                        'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
+                        'sku_id': str(x.xpath("./@data-sku")[0]),
+                        'shop_name': shop_name,
+                        'sku_name': '‘'.join(
+                            ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
+                                "'")).strip(),
+                        'sku_price': ''
+                    }
+
+                )
+        insert_sql = f"""
+            INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
+            VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
+        """
+        mysql_zdzs_proxy(insert_sql)
+
+        print("保存成功")
+        pass
+    # 登录成功
+    cookie = ''
+    for i in driver.get_cookies():
+        cookie = cookie + i['name'] + '=' + i['value'] + ';'
+    driver.quit()
+    return cookie
+
+
+if __name__ == "__main__":
+    print(get_cookie())
+    # print(find_best_match_angle('background_img.png', 'slide_img.png'))