Commit f2ffc331 authored by huangiyu's avatar huangiyu

初始化

parent 5f70947c
...@@ -9,6 +9,8 @@ import pyautogui ...@@ -9,6 +9,8 @@ import pyautogui
from selenium import webdriver from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ChromeOptions from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pymysql import pymysql
from lxml import etree from lxml import etree
# 登陆账号 # 登陆账号
...@@ -37,9 +39,9 @@ password = 'xxx' ...@@ -37,9 +39,9 @@ password = 'xxx'
# mysql_zdzs_proxy_database = 'zdzs_proxy' # mysql_zdzs_proxy_database = 'zdzs_proxy'
# 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整,利用微信截图得到xy的具体位置 # 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整,利用微信截图得到xy的具体位置
# 登录的 # 登录的
slide_x_position, slid_y_position = 850, 459 # slide_x_position, slid_y_position = 850, 459
# 验证的 # # 验证的
slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512 # slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
# 偏移度加值(根据电脑分辨率情况) # 偏移度加值(根据电脑分辨率情况)
offset_increase = 0 offset_increase = 0
while True: while True:
...@@ -82,20 +84,19 @@ class SlideUtils: ...@@ -82,20 +84,19 @@ class SlideUtils:
@staticmethod @staticmethod
def find_pic(background, slide): def find_pic(background, slide):
"""
获取背景图与滑块图的最佳位置
"""
# 读取图片
background_rgb = cv2.imread(background) background_rgb = cv2.imread(background)
# 灰度处理
background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY) background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
# 读取滑块灰度图片
slide_gray = cv2.imread(slide, 0) slide_gray = cv2.imread(slide, 0)
# 匹配滑块位置
res = cv2.matchTemplate(background_gray, slide_gray, cv2.TM_CCOEFF_NORMED) # 应用Canny边缘检测增强特征
# 获取最佳与最差匹配 background_edges = cv2.Canny(background_gray, 100, 200)
value = cv2.minMaxLoc(res) slide_edges = cv2.Canny(slide_gray, 100, 200)
return value[2][0]
# 应用模板匹配
res = cv2.matchTemplate(background_edges, slide_edges, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
return max_loc[0]
@staticmethod @staticmethod
def slide_by_pyautogui(x, y, offset, offset_increase): def slide_by_pyautogui(x, y, offset, offset_increase):
...@@ -111,8 +112,8 @@ class SlideUtils: ...@@ -111,8 +112,8 @@ class SlideUtils:
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28) pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0) y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100) pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8) y += random.randint(2, 5)
pyautogui.moveTo(xx, y, duration=1.5) pyautogui.moveTo(xx, y, duration=0.3)
pyautogui.mouseUp() pyautogui.mouseUp()
@staticmethod @staticmethod
...@@ -129,12 +130,12 @@ class SlideUtils: ...@@ -129,12 +130,12 @@ class SlideUtils:
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28) pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0) y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100) pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8) y += random.randint(0, 2)
pyautogui.moveTo(xx, y, duration=1.5) pyautogui.moveTo(xx, y, duration=0.5)
# print("睡眠2秒后松开") # print("睡眠2秒后松开")
# time.sleep(0.3) # time.sleep(0.3)
pyautogui.mouseUp() pyautogui.mouseUp()
# print(f"睡眠2秒后点击移动") # print(f"睡眠2秒后点击
# time.sleep(2) # time.sleep(2)
# xx = x + offset # xx = x + offset
# pyautogui.moveTo(x, y, duration=0.1) # pyautogui.moveTo(x, y, duration=0.1)
...@@ -185,7 +186,14 @@ def detect_circular_distortion(image_path, circle_radius=65): ...@@ -185,7 +186,14 @@ def detect_circular_distortion(image_path, circle_radius=65):
return average_distance return average_distance
def any_of_elements_present(*locators):
"""检查给定的任意一个元素是否出现在DOM中"""
def _predicate(driver):
for locator in locators:
if EC.presence_of_element_located(locator)(driver):
return True
return False
return _predicate
def find_best_match_angle(background_img_path, rotating_img_path): def find_best_match_angle(background_img_path, rotating_img_path):
# 加载背景图片和验证码图片 # 加载背景图片和验证码图片
background = Image.open(background_img_path) background = Image.open(background_img_path)
...@@ -245,7 +253,7 @@ def get_cookie(): ...@@ -245,7 +253,7 @@ def get_cookie():
"source": return_js() "source": return_js()
}) })
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 700) driver.set_window_size(1200, 600)
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
# 切換账号密码登录 # 切換账号密码登录
...@@ -294,12 +302,21 @@ def get_cookie(): ...@@ -294,12 +302,21 @@ def get_cookie():
w2 = background.size['width'] w2 = background.size['width']
# 计算实际页面x偏移量 # 计算实际页面x偏移量
x = (x * w2 / w1) x = (x * w2 / w1)
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_class_name("JDJRV-slide-btn")
# 获取元素的X和Y位置
x_position = element.location['x']+50
y_position = element.location['y']+100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置 # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
SlideUtils.slide_by_pyautogui(slide_x_position, slid_y_position, x, offset_increase) SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
time.sleep(3) time.sleep(1)
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 1000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 100"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
for row in query_spider: for row in query_spider:
# while True: # while True:
...@@ -310,17 +327,54 @@ def get_cookie(): ...@@ -310,17 +327,54 @@ def get_cookie():
url = row['url'] url = row['url']
print(url) print(url)
driver.get(url) driver.get(url)
# element_present = WebDriverWait(driver, 10).until(
# any_of_elements_present((By.ID, "J_main"), (By.CLASS_NAME, "verifyBtn"))
# )
# if element_present:
# print("至少有一个元素加载完成")
# else:
# print("元素未加载完成")
try:
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.ID, "J_main"))
)
except:
try:
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException:
print("网页加载异常")
driver.get(url)
time.sleep(2)
continue
finally:
print("网页加载完成")
finally:
print("网页加载完成")
text = driver.page_source text = driver.page_source
while ('快速验证' in text and '验证一下,购物无忧' in text) or '前方拥挤,请刷新重试' in text: while_count = 0
while ('快速验证' in text and '验证一下,购物无忧' in text) or '前方拥挤,请刷新重试' in text or 'class="title">加载中...</span></div></div><!----></div>' in text:
while_count += 1
if while_count > 1:
driver.get(url) driver.get(url)
time.sleep(1.5) time.sleep(1)
pass pass
# 获取验证码图片 # 获取验证码图片
try: try:
print("需要验证。") print("需要验证。")
print("睡眠1秒后点击“快速验证”按钮") print("开始点击“快速验证”按钮")
try:
driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click() driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
time.sleep(2) except:
print("点击“快速验证”按钮异常")
try:
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="cpc_img"]'))
)
except:
print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
pass
background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]') background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
background_url = background.get_attribute("src") background_url = background.get_attribute("src")
# 用来找到登录图片的小滑块 # 用来找到登录图片的小滑块
...@@ -342,15 +396,43 @@ def get_cookie(): ...@@ -342,15 +396,43 @@ def get_cookie():
w2 = background.size['width'] w2 = background.size['width']
# 计算实际页面x偏移量 # 计算实际页面x偏移量
x = (x * w2 / w1) x = (x * w2 / w1)
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_class_name("bg-blue")
# 获取元素的X和Y位置
x_position = element.location['x']+50
y_position = element.location['y']+100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置 # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# time.sleep(0.5) # time.sleep(0.5)
SlideUtils.slide_by_pyautogui(slide_x_position_quick_verification, slid_y_position_quick_verification, SlideUtils.slide_by_pyautogui(x_position, y_position,
x, offset_increase+5) x, offset_increase)
time.sleep(3) try:
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.ID, "J_main"))
)
text = driver.page_source text = driver.page_source
continue continue
except:
try:
driver.get(url)
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException: except NoSuchElementException:
print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div") print("网页加载异常")
text = driver.page_source
continue
finally:
print("网页加载完成")
finally:
print("网页加载完成")
text = driver.page_source
continue
except NoSuchElementException:
# print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
try: try:
background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]') background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace( background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
...@@ -374,15 +456,43 @@ def get_cookie(): ...@@ -374,15 +456,43 @@ def get_cookie():
x = 228 / 360 * best_angle x = 228 / 360 * best_angle
x = int(x) x = int(x)
print(f"移动: {x} ") print(f"移动: {x} ")
SlideUtils.slide_by_pyautogui2(slide_x_position_quick_verification, # 找到要获取位置的元素,比如通过其ID
slid_y_position_quick_verification, element = driver.find_element_by_id("slider-div")
# 获取元素的X和Y位置
x_position = element.location['x']+50
y_position = element.location['y']+100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# time.sleep(0.5)
SlideUtils.slide_by_pyautogui2(x_position,
y_position,
x, x,
offset_increase) offset_increase)
time.sleep(3) try:
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.ID, "J_main"))
)
text = driver.page_source
continue
except:
try:
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException:
print("网页加载异常")
text = driver.page_source
continue
finally:
print("网页加载完成")
finally:
print("网页加载完成")
text = driver.page_source text = driver.page_source
except NoSuchElementException: except NoSuchElementException:
print("未查找到验证图片背景图,睡眠1秒后重试。")
driver.get(url) driver.get(url)
text = driver.page_source text = driver.page_source
continue continue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment