Commit f2ffc331 authored by huangiyu's avatar huangiyu

初始化

parent 5f70947c
......@@ -9,6 +9,8 @@ import pyautogui
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pymysql
from lxml import etree
# 登陆账号
......@@ -37,9 +39,9 @@ password = 'xxx'
# mysql_zdzs_proxy_database = 'zdzs_proxy'
# 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整,利用微信截图得到xy的具体位置
# 登录的
slide_x_position, slid_y_position = 850, 459
# 验证的
slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
# slide_x_position, slid_y_position = 850, 459
# # 验证的
# slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
# 偏移度加值(根据电脑分辨率情况)
offset_increase = 0
while True:
......@@ -82,20 +84,19 @@ class SlideUtils:
@staticmethod
def find_pic(background, slide):
"""
获取背景图与滑块图的最佳位置
"""
# 读取图片
background_rgb = cv2.imread(background)
# 灰度处理
background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
# 读取滑块灰度图片
slide_gray = cv2.imread(slide, 0)
# 匹配滑块位置
res = cv2.matchTemplate(background_gray, slide_gray, cv2.TM_CCOEFF_NORMED)
# 获取最佳与最差匹配
value = cv2.minMaxLoc(res)
return value[2][0]
# 应用Canny边缘检测增强特征
background_edges = cv2.Canny(background_gray, 100, 200)
slide_edges = cv2.Canny(slide_gray, 100, 200)
# 应用模板匹配
res = cv2.matchTemplate(background_edges, slide_edges, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
return max_loc[0]
@staticmethod
def slide_by_pyautogui(x, y, offset, offset_increase):
......@@ -111,8 +112,8 @@ class SlideUtils:
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(xx, y, duration=1.5)
y += random.randint(2, 5)
pyautogui.moveTo(xx, y, duration=0.3)
pyautogui.mouseUp()
@staticmethod
......@@ -129,12 +130,12 @@ class SlideUtils:
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(xx, y, duration=1.5)
y += random.randint(0, 2)
pyautogui.moveTo(xx, y, duration=0.5)
# print("睡眠2秒后松开")
# time.sleep(0.3)
pyautogui.mouseUp()
# print(f"睡眠2秒后点击移动")
# print(f"睡眠2秒后点击
# time.sleep(2)
# xx = x + offset
# pyautogui.moveTo(x, y, duration=0.1)
......@@ -185,7 +186,14 @@ def detect_circular_distortion(image_path, circle_radius=65):
return average_distance
def any_of_elements_present(*locators):
"""检查给定的任意一个元素是否出现在DOM中"""
def _predicate(driver):
for locator in locators:
if EC.presence_of_element_located(locator)(driver):
return True
return False
return _predicate
def find_best_match_angle(background_img_path, rotating_img_path):
# 加载背景图片和验证码图片
background = Image.open(background_img_path)
......@@ -245,7 +253,7 @@ def get_cookie():
"source": return_js()
})
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 700)
driver.set_window_size(1200, 600)
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
# 切換账号密码登录
......@@ -294,12 +302,21 @@ def get_cookie():
w2 = background.size['width']
# 计算实际页面x偏移量
x = (x * w2 / w1)
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_class_name("JDJRV-slide-btn")
# 获取元素的X和Y位置
x_position = element.location['x']+50
y_position = element.location['y']+100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
SlideUtils.slide_by_pyautogui(slide_x_position, slid_y_position, x, offset_increase)
time.sleep(3)
SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
time.sleep(1)
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 1000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 100"
query_spider = mysql_zdzs_proxy(query_spider_sql)
for row in query_spider:
# while True:
......@@ -310,17 +327,54 @@ def get_cookie():
url = row['url']
print(url)
driver.get(url)
# element_present = WebDriverWait(driver, 10).until(
# any_of_elements_present((By.ID, "J_main"), (By.CLASS_NAME, "verifyBtn"))
# )
# if element_present:
# print("至少有一个元素加载完成")
# else:
# print("元素未加载完成")
try:
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.ID, "J_main"))
)
except:
try:
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException:
print("网页加载异常")
driver.get(url)
time.sleep(2)
continue
finally:
print("网页加载完成")
finally:
print("网页加载完成")
text = driver.page_source
while ('快速验证' in text and '验证一下,购物无忧' in text) or '前方拥挤,请刷新重试' in text:
driver.get(url)
time.sleep(1.5)
while_count = 0
while ('快速验证' in text and '验证一下,购物无忧' in text) or '前方拥挤,请刷新重试' in text or 'class="title">加载中...</span></div></div><!----></div>' in text:
while_count += 1
if while_count > 1:
driver.get(url)
time.sleep(1)
pass
# 获取验证码图片
try:
print("需要验证。")
print("睡眠1秒后点击“快速验证”按钮")
driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
time.sleep(2)
print("开始点击“快速验证”按钮")
try:
driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
except:
print("点击“快速验证”按钮异常")
try:
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="cpc_img"]'))
)
except:
print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
pass
background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
background_url = background.get_attribute("src")
# 用来找到登录图片的小滑块
......@@ -342,15 +396,43 @@ def get_cookie():
w2 = background.size['width']
# 计算实际页面x偏移量
x = (x * w2 / w1)
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_class_name("bg-blue")
# 获取元素的X和Y位置
x_position = element.location['x']+50
y_position = element.location['y']+100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# time.sleep(0.5)
SlideUtils.slide_by_pyautogui(slide_x_position_quick_verification, slid_y_position_quick_verification,
x, offset_increase+5)
time.sleep(3)
SlideUtils.slide_by_pyautogui(x_position, y_position,
x, offset_increase)
try:
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.ID, "J_main"))
)
text = driver.page_source
continue
except:
try:
driver.get(url)
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException:
print("网页加载异常")
text = driver.page_source
continue
finally:
print("网页加载完成")
finally:
print("网页加载完成")
text = driver.page_source
continue
except NoSuchElementException:
print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
# print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
try:
background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
......@@ -374,15 +456,43 @@ def get_cookie():
x = 228 / 360 * best_angle
x = int(x)
print(f"移动: {x} ")
SlideUtils.slide_by_pyautogui2(slide_x_position_quick_verification,
slid_y_position_quick_verification,
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_id("slider-div")
# 获取元素的X和Y位置
x_position = element.location['x']+50
y_position = element.location['y']+100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# time.sleep(0.5)
SlideUtils.slide_by_pyautogui2(x_position,
y_position,
x,
offset_increase)
time.sleep(3)
try:
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.ID, "J_main"))
)
text = driver.page_source
continue
except:
try:
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException:
print("网页加载异常")
text = driver.page_source
continue
finally:
print("网页加载完成")
finally:
print("网页加载完成")
text = driver.page_source
except NoSuchElementException:
print("未查找到验证图片背景图,睡眠1秒后重试。")
driver.get(url)
text = driver.page_source
continue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment