初始化

f2ffc331 · huangiyu · 5f70947c · f2ffc331
Commit f2ffc331 authored Feb 21, 2024 by huangiyu
Show whitespace changes
Inline Side-by-side

Showing with 146 additions and 36 deletions

京东搜索_自动版.py 京东搜索_自动版.py +146 -36

No files found.
--- a/京东搜索_自动版.py
+++ b/京东搜索_自动版.py
@@ -9,6 +9,8 @@ import pyautogui
 from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver import ChromeOptions
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
 import pymysql
 from lxml import etree
 # 登陆账号
@@ -37,9 +39,9 @@ password = 'xxx'
 # mysql_zdzs_proxy_database = 'zdzs_proxy'
 # 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整，利用微信截图得到xy的具体位置
 # 登录的
-slide_x_position, slid_y_position = 850, 459
-# 验证的
-slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
+# slide_x_position, slid_y_position = 850, 459
+# # 验证的
+# slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
 # 偏移度加值(根据电脑分辨率情况)
 offset_increase = 0
 while True:
@@ -82,20 +84,19 @@ class SlideUtils:

    @staticmethod
    def find_pic(background, slide):
-        """
-        获取背景图与滑块图的最佳位置
-        """
-        # 读取图片
        background_rgb = cv2.imread(background)
-        # 灰度处理
        background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
-        # 读取滑块灰度图片
        slide_gray = cv2.imread(slide, 0)
-        # 匹配滑块位置
-        res = cv2.matchTemplate(background_gray, slide_gray, cv2.TM_CCOEFF_NORMED)
-        # 获取最佳与最差匹配
-        value = cv2.minMaxLoc(res)
-        return value[2][0]
+
+        # 应用Canny边缘检测增强特征
+        background_edges = cv2.Canny(background_gray, 100, 200)
+        slide_edges = cv2.Canny(slide_gray, 100, 200)
+
+        # 应用模板匹配
+        res = cv2.matchTemplate(background_edges, slide_edges, cv2.TM_CCOEFF_NORMED)
+        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
+
+        return max_loc[0]

    @staticmethod
    def slide_by_pyautogui(x, y, offset, offset_increase):
@@ -111,8 +112,8 @@ class SlideUtils:
        pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
        y += random.randint(-9, 0)
        pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
-        y += random.randint(0, 8)
-        pyautogui.moveTo(xx, y, duration=1.5)
+        y += random.randint(2, 5)
+        pyautogui.moveTo(xx, y, duration=0.3)
        pyautogui.mouseUp()

    @staticmethod
@@ -129,12 +130,12 @@ class SlideUtils:
        pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
        y += random.randint(-9, 0)
        pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
-        y += random.randint(0, 8)
-        pyautogui.moveTo(xx, y, duration=1.5)
+        y += random.randint(0, 2)
+        pyautogui.moveTo(xx, y, duration=0.5)
        # print("睡眠2秒后松开")
        # time.sleep(0.3)
        pyautogui.mouseUp()
-        # print(f"睡眠2秒后点击移动")
+        # print(f"睡眠2秒后点击
        # time.sleep(2)
        # xx = x + offset
        # pyautogui.moveTo(x, y, duration=0.1)
@@ -185,7 +186,14 @@ def detect_circular_distortion(image_path, circle_radius=65):

    return average_distance

-
+def any_of_elements_present(*locators):
+    """检查给定的任意一个元素是否出现在DOM中"""
+    def _predicate(driver):
+        for locator in locators:
+            if EC.presence_of_element_located(locator)(driver):
+                return True
+        return False
+    return _predicate
 def find_best_match_angle(background_img_path, rotating_img_path):
    # 加载背景图片和验证码图片
    background = Image.open(background_img_path)
@@ -245,7 +253,7 @@ def get_cookie():
        "source": return_js()
    })
    option.add_argument("--disable-blink-features=AutomationControlled")
-    driver.set_window_size(1200, 700)
+    driver.set_window_size(1200, 600)
    # driver.maximize_window()
    driver.get('https://passport.jd.com/uc/login')
    # 切換账号密码登录
@@ -294,12 +302,21 @@ def get_cookie():
        w2 = background.size['width']
        # 计算实际页面x偏移量
        x = (x * w2 / w1)
+        # 找到要获取位置的元素，比如通过其ID
+        element = driver.find_element_by_class_name("JDJRV-slide-btn")
+
+        # 获取元素的X和Y位置
+        x_position = element.location['x']+50
+        y_position = element.location['y']+100
+
+        print("元素的X位置：", x_position)
+        print("元素的Y位置：", y_position)
        # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
-        SlideUtils.slide_by_pyautogui(slide_x_position, slid_y_position, x, offset_increase)
-        time.sleep(3)
+        SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
+        time.sleep(1)
    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
    # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE  data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
-    query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 1000"
+    query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 100"
    query_spider = mysql_zdzs_proxy(query_spider_sql)
    for row in query_spider:
        # while True:
@@ -310,17 +327,54 @@ def get_cookie():
        url = row['url']
        print(url)
        driver.get(url)
+        # element_present = WebDriverWait(driver, 10).until(
+        #     any_of_elements_present((By.ID, "J_main"), (By.CLASS_NAME, "verifyBtn"))
+        # )
+        # if element_present:
+        #     print("至少有一个元素加载完成")
+        # else:
+        #     print("元素未加载完成")
+        try:
+            WebDriverWait(driver, 2).until(
+                EC.presence_of_element_located((By.ID, "J_main"))
+            )
+        except:
+            try:
+                WebDriverWait(driver, 5).until(
+                    EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
+                )
+            except NoSuchElementException:
+                print("网页加载异常")
+                driver.get(url)
+                time.sleep(2)
+                continue
+            finally:
+                print("网页加载完成")
+        finally:
+            print("网页加载完成")
        text = driver.page_source
-        while ('快速验证' in text and '验证一下，购物无忧' in text) or '前方拥挤，请刷新重试' in text:
+        while_count = 0
+        while ('快速验证' in text and '验证一下，购物无忧' in text) or '前方拥挤，请刷新重试' in text or 'class="title">加载中...</span></div></div><!----></div>' in text:
+            while_count += 1
+            if while_count > 1:
                driver.get(url)
-            time.sleep(1.5)
+                time.sleep(1)
            pass
            # 获取验证码图片
            try:
                print("需要验证。")
-                print("睡眠1秒后点击“快速验证”按钮")
+                print("开始点击“快速验证”按钮")
+                try:
                    driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
-                time.sleep(2)
+                except:
+                    print("点击“快速验证”按钮异常")
+                try:
+                    WebDriverWait(driver, 5).until(
+                        EC.presence_of_element_located((By.XPATH, '//*[@id="cpc_img"]'))
+                    )
+                except:
+                    print("未查找到验证图片背景图，可能是旋转图片，换一种方式获取div")
+                    pass
                background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
                background_url = background.get_attribute("src")
                # 用来找到登录图片的小滑块
@@ -342,15 +396,43 @@ def get_cookie():
                w2 = background.size['width']
                # 计算实际页面x偏移量
                x = (x * w2 / w1)
+                # 找到要获取位置的元素，比如通过其ID
+                element = driver.find_element_by_class_name("bg-blue")
+
+                # 获取元素的X和Y位置
+                x_position = element.location['x']+50
+                y_position = element.location['y']+100
+
+                print("元素的X位置：", x_position)
+                print("元素的Y位置：", y_position)
                # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
                # time.sleep(0.5)
-                SlideUtils.slide_by_pyautogui(slide_x_position_quick_verification, slid_y_position_quick_verification,
-                                              x, offset_increase+5)
-                time.sleep(3)
+                SlideUtils.slide_by_pyautogui(x_position, y_position,
+                                              x, offset_increase)
+                try:
+                    WebDriverWait(driver, 2).until(
+                        EC.presence_of_element_located((By.ID, "J_main"))
+                    )
                    text = driver.page_source
                    continue
+                except:
+                    try:
+                        driver.get(url)
+                        WebDriverWait(driver, 5).until(
+                            EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
+                        )
                    except NoSuchElementException:
-                print("未查找到验证图片背景图，可能是旋转图片，换一种方式获取div")
+                        print("网页加载异常")
+                        text = driver.page_source
+                        continue
+                    finally:
+                        print("网页加载完成")
+                finally:
+                    print("网页加载完成")
+                text = driver.page_source
+                continue
+            except NoSuchElementException:
+                # print("未查找到验证图片背景图，可能是旋转图片，换一种方式获取div")
                try:
                    background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
                    background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
@@ -374,15 +456,43 @@ def get_cookie():
                    x = 228 / 360 * best_angle
                    x = int(x)
                    print(f"移动: {x} ")
-                    SlideUtils.slide_by_pyautogui2(slide_x_position_quick_verification,
-                                                   slid_y_position_quick_verification,
+                    # 找到要获取位置的元素，比如通过其ID
+                    element = driver.find_element_by_id("slider-div")
+
+                    # 获取元素的X和Y位置
+                    x_position = element.location['x']+50
+                    y_position = element.location['y']+100
+
+                    print("元素的X位置：", x_position)
+                    print("元素的Y位置：", y_position)
+                    # 其中x为屏幕左上角至滑块中心的横向像素值，y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
+                    # time.sleep(0.5)
+                    SlideUtils.slide_by_pyautogui2(x_position,
+                                                   y_position,
                                                   x,
                                                   offset_increase)
-                    time.sleep(3)
+                    try:
+                        WebDriverWait(driver, 2).until(
+                            EC.presence_of_element_located((By.ID, "J_main"))
+                        )
+                        text = driver.page_source
+                        continue
+                    except:
+                        try:
+                            WebDriverWait(driver, 5).until(
+                                EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
+                            )
+                        except NoSuchElementException:
+                            print("网页加载异常")
+                            text = driver.page_source
+                            continue
+                        finally:
+                            print("网页加载完成")
+                    finally:
+                        print("网页加载完成")
                    text = driver.page_source

                except NoSuchElementException:
-                    print("未查找到验证图片背景图，睡眠1秒后重试。")
                    driver.get(url)
                    text = driver.page_source
                    continue