1

87f54da1 · huangiyu · 2f4d1e79 · 87f54da1
Commit 87f54da1 authored Apr 19, 2024 by huangiyu
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 461 deletions

京东搜索_手动验证版.py 京东搜索_手动验证版.py +2 -461

No files found.
--- a/京东搜索_手动验证版.py
+++ b/京东搜索_手动验证版.py
@@ -4,7 +4,6 @@
 import time
 from urllib import request
 import cv2
-from putCookies import updateCookie
 import numpy as np
 import pyautogui
 from selenium import webdriver
@@ -26,7 +25,6 @@ import re
 from bs4 import BeautifulSoup
-# from utils.spider import jd
 mysql_zdzs_proxy_host = '59.110.219.171'
 mysql_zdzs_proxy_user = 'zgcindex'
 mysql_zdzs_proxy_password = 'zgcprice2019'
@@ -37,15 +35,6 @@ username = config.username
 # 登陆密码
 password = config.password
-# mysql_zdzs_proxy_host = '59.110.219.171'
-# mysql_zdzs_proxy_user = 'zgcindex'
-# mysql_zdzs_proxy_password = 'zgcprice2019'
-# mysql_zdzs_proxy_database = 'zdzs_proxy'
-# 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整，利用微信截图得到xy的具体位置
-# 登录的
-# slide_x_position, slid_y_position = 850, 459
-# # 验证的
-# slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
 # 偏移度加值(根据电脑分辨率情况)
 offset_increase = 0
 while True:
@@ -80,212 +69,6 @@ def mysql_zdzs_proxy(sql=None):
        return []
-def return_js():
-    return open('stealth.min.js', 'r').read()
-class SlideUtils:
-    @staticmethod
-    def find_pic(background, slide):
-        background_rgb = cv2.imread(background)
-        background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
-        slide_gray = cv2.imread(slide, 0)
-        # 应用Canny边缘检测增强特征
-        background_edges = cv2.Canny(background_gray, 100, 200)
-        slide_edges = cv2.Canny(slide_gray, 100, 200)
-        # 应用模板匹配
-        res = cv2.matchTemplate(background_edges, slide_edges, cv2.TM_CCOEFF_NORMED)
-        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
-        return max_loc[0]
-    @staticmethod
-    def slide_by_pyautogui(x, y, offset, offset_increase):
-        """
-        使用pyautogui实现滑块并自定义轨迹方程
-        """
-        print(f"睡眠2秒后点击移动")
-        # time.sleep(2)
-        xx = x + offset + offset_increase
-        pyautogui.moveTo(x, y, duration=0.1)
-        pyautogui.mouseDown()
-        y += random.randint(9, 19)
-        pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
-        y += random.randint(-9, 0)
-        pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
-        y += random.randint(0, 8)
-        pyautogui.moveTo(xx, y, duration=0.3)
-        pyautogui.moveTo(xx+ random.randint(-3, 3), y+30, duration=0.3)
-        # time.sleep(1.3)
-        pyautogui.mouseUp()
-    @staticmethod
-    def slide_by_pyautogui2(x, y, offset, offset_increase):
-        """
-        使用pyautogui实现滑块并自定义轨迹方程
-        """
-        print(f"睡眠2秒后点击移动")
-        time.sleep(2)
-        xx = x + offset + offset_increase
-        pyautogui.moveTo(x, y, duration=0.1)
-        pyautogui.mouseDown()
-        y += random.randint(9, 19)
-        pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
-        y += random.randint(-9, 0)
-        pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
-        y += random.randint(0, 8)
-        pyautogui.moveTo(xx, y, duration=0.3)
-        pyautogui.moveTo(xx+ random.randint(-3, 3), y+30, duration=0.3)
-        # print("睡眠2秒后松开")
-        time.sleep(1.3)
-        pyautogui.mouseUp()
-        # print(f"睡眠2秒后点击
-        # time.sleep(2)
-        # xx = x + offset
-        # pyautogui.moveTo(x, y, duration=0.1)
-        # pyautogui.mouseDown()
-        # # y += random.randint(9, 19)
-        # # pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
-        # # y += random.randint(-9, 0)
-        # # pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
-        # # y += random.randint(0, 8)
-        # pyautogui.moveTo(xx, y, duration=0.3)
-        # print("睡眠2秒后松开")
-        # time.sleep(2)
-        # pyautogui.mouseUp()
-def detect_circular_distortion(image_path, circle_radius=65):
-    # 加载图像并转换为灰度图
-    img = cv2.imread(image_path)
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    # 确定圆形区域的中心和半径
-    circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
-    # 创建一个遮罩，只包含圆形区域
-    mask = np.zeros_like(gray)
-    cv2.circle(mask, circle_center, circle_radius, 255, thickness=-1)
-    # 应用遮罩到图像
-    masked_img = cv2.bitwise_and(gray, gray, mask=mask)
-    # 初始化ORB检测器
-    orb = cv2.ORB_create(500)
-    # 检测并计算圆形区域和整个图像的关键点和描述符
-    kp1, des1 = orb.detectAndCompute(gray, None)
-    kp2, des2 = orb.detectAndCompute(masked_img, None)
-    # 匹配描述符
-    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
-    matches = bf.match(des1, des2)
-    # 计算匹配点的平均距离，作为扭曲程度的一个简单估计
-    if matches:
-        distances = [match.distance for match in matches]
-        average_distance = sum(distances) / len(distances)
-    else:
-        average_distance = float('inf')
-    return average_distance
-def any_of_elements_present(*locators):
-    """检查给定的任意一个元素是否出现在DOM中"""
-    def _predicate(driver):
-        for locator in locators:
-            if EC.presence_of_element_located(locator)(driver):
-                return True
-        return False
-    return _predicate
-def find_best_match_angle(background_img_path, rotating_img_path):
-    # 加载背景图片和验证码图片
-    background = Image.open(background_img_path)
-    captcha = Image.open(rotating_img_path)
-    # 确保背景图和验证码图的尺寸符合要求
-    assert background.size == (320, 200), "背景图尺寸必须是 320x200"
-    assert captcha.size == (130, 130), "验证码图尺寸必须是 130x130"
-    # 确定将验证码放置在背景图中的位置
-    insert_position = ((background.width - captcha.width) // 2, (background.height - captcha.height) // 2)
-    best_score = None
-    best_angle = None
-    best_position = None
-    best_file_name = None
-    # 每10度旋转一次，总共旋转36次
-    for angle in range(-360, 0, 1):
-        # 旋转验证码图片
-        rotated_captcha = captcha.rotate(angle, expand=False)
-        # 创建新的背景图副本用于合成
-        new_background = background.copy()
-        # 将旋转后的验证码图片放置到背景图的指定位置
-        new_background.paste(rotated_captcha, insert_position, rotated_captcha)
-        # 将PIL图像转换为NumPy数组
-        numpy_image = np.array(new_background)
-        # 将RGB格式的图像转换为BGR格式，因为OpenCV默认使用BGR格式
-        bgr_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
-        # 保存合成后的图片
-        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
-        # 确定圆形区域的中心和半径
-        circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
-        # 创建一个遮罩，只包含圆形区域
-        mask = np.zeros_like(gray)
-        cv2.circle(mask, circle_center, 65, 255, thickness=-1)
-        # 应用遮罩到图像
-        masked_img = cv2.bitwise_and(gray, gray, mask=mask)
-        # 初始化ORB检测器
-        orb = cv2.ORB_create(500)
-        # 检测并计算圆形区域和整个图像的关键点和描述符
-        kp1, des1 = orb.detectAndCompute(gray, None)
-        kp2, des2 = orb.detectAndCompute(masked_img, None)
-        # 匹配描述符
-        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
-        matches = bf.match(des1, des2)
-        # 计算匹配点的平均距离，作为扭曲程度的一个简单估计
-        if matches:
-            distances = [match.distance for match in matches]
-            average_distance = sum(distances) / len(distances)
-        else:
-            average_distance = float('inf')
-        # new_background.save(f"captcha_{abs(angle)}.png")
-        distortion_level = average_distance
-        if angle == -360:
-            best_score = distortion_level
-        # print(f'中心区域扭曲程度估计（数值越小表示越接近未扭曲）: {angle} {distortion_level}')
-        # res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED)
-        # _, max_val, _, max_loc = cv2.minMaxLoc(res)
-        #
-        if best_score > distortion_level:
-            best_score = distortion_level
-            best_angle = angle
-            # best_position = max_loc
-            best_file_name = f"captcha_{abs(angle)}.png"
-    print(best_score)
-    print(best_angle)
-    print(best_position)
-    print(best_file_name)
-    return abs(best_angle), best_position
 def get_cookie():
    option = ChromeOptions()
    option.binary_location = 'D:\ChromeMaYi\guge\chrome.exe'
@@ -293,80 +76,17 @@ def get_cookie():
    # option.add_argument('--headless') # 无头模式，可不启用界面显示运行
    option.add_experimental_option('excludeSwitches', ['enable-automation'])
    driver = webdriver.Chrome(options=option)
-    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
-        "source": return_js()
-    })
    option.add_argument("--disable-blink-features=AutomationControlled")
    driver.set_window_size(1200, 600)
    # driver.maximize_window()
    driver.get('https://passport.jd.com/uc/login')
-    # 切換账号密码登录
-    # driver.find_element(by=By.CLASS_NAME, value='login-tab-r').click()
-    # 设置账号密码
-    driver.find_element(by=By.ID, value='loginname').send_keys(username)
-    driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
-    time.sleep(0.5)
-    # 登录
-    driver.find_element(by=By.ID, value='loginsubmit').click()
-    print("睡眠0.5秒后开始滑动登录")
-    time.sleep(0.5)
    # 一直循环直到登录成功位置，超过一百次就算失败。
    count = 0
    while 1:
        count = count + 1
-        print(f"开始第{count}次尝试")
+        print(f"需要登录")
-        if count > 100:
-            print("登录京东失败！")
-            return
-        # 获取验证码图片
-        # 用于找到登录图片的大图
-        try:
-            background = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-bigimg"]/img')
-        except NoSuchElementException:
-            # 未查找到登陆图片则认为成功
-            print("京东登录成功！")
-            break
-        # 用来找到登录图片的小滑块
-        slide = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-smallimg"]/img')
-        background_url = background.get_attribute("src")
-        slide_url = slide.get_attribute("src")
-        background_img = 'background_img.png'
-        slide_img = 'slide_img.png'
-        # 下载背景大图保存到本地
-        request.urlretrieve(background_url, background_img)
-        # 下载滑块保存到本地
-        request.urlretrieve(slide_url, slide_img)
-        # 获取最佳x偏移量
-        x = SlideUtils.find_pic(background_img, slide_img)
-        # print(f'本地最佳偏移量: {x}')
-        # 计算缩放
-        # 获取下载背景图宽度
-        w1 = cv2.imread(background_img).shape[1]
-        # 获取网页背景图宽度
-        w2 = background.size['width']
-        # 计算实际页面x偏移量
-        x = (x * w2 / w1)
-        # 找到要获取位置的元素，比如通过其ID
-        element = driver.find_element_by_class_name("JDJRV-slide-btn")
-        # 获取元素的X和Y位置
-        x_position = element.location['x'] + 50
-        y_position = element.location['y'] + 100
-        print("元素的X位置：", x_position)
-        print("元素的Y位置：", y_position)
-        # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
-        SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
        time.sleep(1)
-    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
-    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE  data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD'  order by docID desc limit 10000"
-    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and  data_batch ='202403051815460001' order by id" \
-    #                    f" limit 100000"
-    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' limit 1000"
    query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and  data_batch ='202404180000000004' limit 30000"
-    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and  data_batch ='202404180000000001' limit 1000"
-    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE  data_batch ='202404180000000001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD'"
    query_spider = mysql_zdzs_proxy(query_spider_sql)
    index = 0
    len_ = len(query_spider)
@@ -389,36 +109,7 @@ def get_cookie():
            print("跳转首页了，睡眠10秒")
            time.sleep(10)
            driver.get(url)
-        # element_present = WebDriverWait(driver, 10).until(
-        #     any_of_elements_present((By.ID, "J_main"), (By.CLASS_NAME, "verifyBtn"))
-        # )
-        # if element_present:
-        #     print("至少有一个元素加载完成")
-        # else:
-        #     print("元素未加载完成")
-        # try:
-        #     WebDriverWait(driver, 2).until(
-        #         EC.presence_of_element_located((By.ID, "J_main"))
-        #     )
-        # except:
-        #     try:
-        #         WebDriverWait(driver, 5).until(
-        #             EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
-        #         )
-        #     except NoSuchElementException:
-        #         print("网页加载异常")
-        #         driver.get(url)
-        #         time.sleep(2)
-        #         continue
-        #     except Exception as e:
-        #         print("网页加载异常")
-        #         driver.get(url)
-        #         time.sleep(2)
-        #         continue
-        #     finally:
-        #         print("网页加载完成")
-        # finally:
-        #     print("网页加载完成")
        text = driver.page_source
        while_count = 0
        while (
@@ -426,152 +117,6 @@ def get_cookie():
            print(f"需要验证，睡眠1秒")
            time.sleep(1)
            text = driver.page_source
-            # while_count += 1
-            # if while_count > 1:
-            #     driver.get(url)
-            #     time.sleep(1)
-            # pass
-            # # 获取验证码图片
-            # try:
-            #     print("需要验证。")
-            #     print("开始点击“快速验证”按钮")
-            #     try:
-            #         driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
-            #     except:
-            #         print("点击“快速验证”按钮异常")
-            #     try:
-            #         WebDriverWait(driver, 5).until(
-            #             EC.presence_of_element_located((By.XPATH, '//*[@id="cpc_img"]'))
-            #         )
-            #     except:
-            #         print("未查找到验证图片背景图，可能是旋转图片，换一种方式获取div")
-            #         pass
-            #     while_count_2 = 0
-            #     while while_count_2 < 30 and (
-            #             '快速验证' in text and '验证一下，购物无忧' in text) or '前方拥挤，请刷新重试' in text or 'class="title">加载中...</span></div></div><!----></div>' in text:
-            #         while_count_2 += 1
-            #         background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
-            #         background_url = background.get_attribute("src")
-            #         # 用来找到登录图片的小滑块
-            #         slide = driver.find_element(by=By.XPATH, value='//*[@id="small_img"]')
-            #         slide_url = slide.get_attribute("src")
-            #         background_img = 'background_img.png'
-            #         slide_img = 'slide_img.png'
-            #         # 下载背景大图保存到本地
-            #         request.urlretrieve(background_url, background_img)
-            #         # 下载滑块保存到本地
-            #         request.urlretrieve(slide_url, slide_img)
-            #         # 获取最佳x偏移量
-            #         x = SlideUtils.find_pic(background_img, slide_img)
-            #         # print(f'本地最佳偏移量: {x}')
-            #         # 计算缩放
-            #         # 获取下载背景图宽度
-            #         w1 = cv2.imread(background_img).shape[1]
-            #         # 获取网页背景图宽度
-            #         w2 = background.size['width']
-            #         # 计算实际页面x偏移量
-            #         x = (x * w2 / w1)
-            #         # 找到要获取位置的元素，比如通过其ID
-            #         element = driver.find_element_by_class_name("bg-blue")
-            #
-            #         # 获取元素的X和Y位置
-            #         x_position = element.location['x'] + 50
-            #         y_position = element.location['y'] + 100
-            #
-            #         print("元素的X位置：", x_position)
-            #         print("元素的Y位置：", y_position)
-            #         # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
-            #         # time.sleep(0.5)
-            #         SlideUtils.slide_by_pyautogui(x_position, y_position,
-            #                                       x, offset_increase)
-            #         try:
-            #             WebDriverWait(driver, 2).until(
-            #                 EC.presence_of_element_located((By.ID, "J_main"))
-            #             )
-            #             text = driver.page_source
-            #             continue
-            #         except:
-            #             print("没成功，重试")
-            #         finally:
-            #             print("网页加载完成")
-            #         text = driver.page_source
-            #         continue
-            # except NoSuchElementException:
-            #     # print("未查找到验证图片背景图，可能是旋转图片，换一种方式获取div")
-            #     try:
-            #         while_count_2 = 0
-            #         while while_count_2 < 30:
-            #             while_count_2 += 1
-            #             background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
-            #             background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
-            #                 'background-image: url("', "")
-            #             # 用来找到登录图片的小滑块
-            #             slide = driver.find_element(by=By.XPATH, value='//*[@id="img-rotate-div"]/img')
-            #             slide_url = slide.get_attribute("src")
-            #             background_img = 'background_img.png'
-            #             slide_img = 'slide_img.png'
-            #             # 下载背景大图保存到本地
-            #             request.urlretrieve(background_url, background_img)
-            #             # 下载滑块保存到本地
-            #             request.urlretrieve(slide_url, slide_img)
-            #             best_angle, best_match_score = find_best_match_angle(background_img, slide_img)
-            #             print(f"最佳匹配角度为: {best_angle} 度")
-            #             # time.sleep(0.5)
-            #
-            #             # w1 = cv2.imread(background_img).shape[1]
-            #             # # 获取网页背景图宽度
-            #             # w2 = background.size['width']
-            #             x = 228 / 360 * best_angle
-            #             x = int(x)
-            #             print(f"移动: {x} ")
-            #             # 找到要获取位置的元素，比如通过其ID
-            #             element = driver.find_element_by_id("slider-div")
-            #
-            #             # 获取元素的X和Y位置
-            #             x_position = element.location['x'] + 50
-            #             y_position = element.location['y'] + 100
-            #
-            #             print("元素的X位置：", x_position)
-            #             print("元素的Y位置：", y_position)
-            #             # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
-            #             # time.sleep(0.5)
-            #             SlideUtils.slide_by_pyautogui2(x_position,
-            #                                            y_position,
-            #                                            x + 2,
-            #                                            offset_increase)
-            #             try:
-            #                 WebDriverWait(driver, 2).until(
-            #                     EC.presence_of_element_located((By.ID, "J_main"))
-            #                 )
-            #                 text = driver.page_source
-            #                 break
-            #             except:
-            #                 try:
-            #                     WebDriverWait(driver, 2).until(
-            #                         EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
-            #                     )
-            #                 except NoSuchElementException:
-            #                     print("网页加载异常")
-            #                     text = driver.page_source
-            #                     break
-            #                 finally:
-            #                     print("网页加载完成")
-            #             finally:
-            #                 print("网页加载完成")
-            #         text = driver.page_source
-            #
-            #     except NoSuchElementException:
-            #         driver.get(url)
-            #         text = driver.page_source
-            #         continue
-            #     except Exception as e:
-            #         print(e)
-            #         text = driver.page_source
-            #         continue
-            # except Exception as e:
-            #     print(e)
-            #     text = driver.page_source
-            #     continue
        # 判断是否跳转了登录
        if '<title>京东-欢迎登录</title>' in text:
            return get_cookie()
@@ -610,9 +155,6 @@ def get_cookie():
        mysql_zdzs_proxy(insert_sql)
        print("保存成功")
-        updateCookie(driver)
        pass
    # 登录成功
    cookie = ''
@@ -627,4 +169,3 @@ def get_cookie():
 if __name__ == "__main__":
    print(get_cookie())
-    # print(find_best_match_angle('background_img.png', 'slide_img.png'))