Commit 87f54da1 authored by huangiyu's avatar huangiyu

1

parent 2f4d1e79
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
import time import time
from urllib import request from urllib import request
import cv2 import cv2
from putCookies import updateCookie
import numpy as np import numpy as np
import pyautogui import pyautogui
from selenium import webdriver from selenium import webdriver
...@@ -26,7 +25,6 @@ import re ...@@ -26,7 +25,6 @@ import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# from utils.spider import jd
mysql_zdzs_proxy_host = '59.110.219.171' mysql_zdzs_proxy_host = '59.110.219.171'
mysql_zdzs_proxy_user = 'zgcindex' mysql_zdzs_proxy_user = 'zgcindex'
mysql_zdzs_proxy_password = 'zgcprice2019' mysql_zdzs_proxy_password = 'zgcprice2019'
...@@ -37,15 +35,6 @@ username = config.username ...@@ -37,15 +35,6 @@ username = config.username
# 登陆密码 # 登陆密码
password = config.password password = config.password
# mysql_zdzs_proxy_host = '59.110.219.171'
# mysql_zdzs_proxy_user = 'zgcindex'
# mysql_zdzs_proxy_password = 'zgcprice2019'
# mysql_zdzs_proxy_database = 'zdzs_proxy'
# 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整,利用微信截图得到xy的具体位置
# 登录的
# slide_x_position, slid_y_position = 850, 459
# # 验证的
# slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
# 偏移度加值(根据电脑分辨率情况) # 偏移度加值(根据电脑分辨率情况)
offset_increase = 0 offset_increase = 0
while True: while True:
...@@ -80,212 +69,6 @@ def mysql_zdzs_proxy(sql=None): ...@@ -80,212 +69,6 @@ def mysql_zdzs_proxy(sql=None):
return [] return []
def return_js():
return open('stealth.min.js', 'r').read()
class SlideUtils:
@staticmethod
def find_pic(background, slide):
background_rgb = cv2.imread(background)
background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
slide_gray = cv2.imread(slide, 0)
# 应用Canny边缘检测增强特征
background_edges = cv2.Canny(background_gray, 100, 200)
slide_edges = cv2.Canny(slide_gray, 100, 200)
# 应用模板匹配
res = cv2.matchTemplate(background_edges, slide_edges, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
return max_loc[0]
@staticmethod
def slide_by_pyautogui(x, y, offset, offset_increase):
"""
使用pyautogui实现滑块并自定义轨迹方程
"""
print(f"睡眠2秒后点击移动")
# time.sleep(2)
xx = x + offset + offset_increase
pyautogui.moveTo(x, y, duration=0.1)
pyautogui.mouseDown()
y += random.randint(9, 19)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(xx, y, duration=0.3)
pyautogui.moveTo(xx+ random.randint(-3, 3), y+30, duration=0.3)
# time.sleep(1.3)
pyautogui.mouseUp()
@staticmethod
def slide_by_pyautogui2(x, y, offset, offset_increase):
"""
使用pyautogui实现滑块并自定义轨迹方程
"""
print(f"睡眠2秒后点击移动")
time.sleep(2)
xx = x + offset + offset_increase
pyautogui.moveTo(x, y, duration=0.1)
pyautogui.mouseDown()
y += random.randint(9, 19)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(xx, y, duration=0.3)
pyautogui.moveTo(xx+ random.randint(-3, 3), y+30, duration=0.3)
# print("睡眠2秒后松开")
time.sleep(1.3)
pyautogui.mouseUp()
# print(f"睡眠2秒后点击
# time.sleep(2)
# xx = x + offset
# pyautogui.moveTo(x, y, duration=0.1)
# pyautogui.mouseDown()
# # y += random.randint(9, 19)
# # pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
# # y += random.randint(-9, 0)
# # pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
# # y += random.randint(0, 8)
# pyautogui.moveTo(xx, y, duration=0.3)
# print("睡眠2秒后松开")
# time.sleep(2)
# pyautogui.mouseUp()
def detect_circular_distortion(image_path, circle_radius=65):
# 加载图像并转换为灰度图
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 确定圆形区域的中心和半径
circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
# 创建一个遮罩,只包含圆形区域
mask = np.zeros_like(gray)
cv2.circle(mask, circle_center, circle_radius, 255, thickness=-1)
# 应用遮罩到图像
masked_img = cv2.bitwise_and(gray, gray, mask=mask)
# 初始化ORB检测器
orb = cv2.ORB_create(500)
# 检测并计算圆形区域和整个图像的关键点和描述符
kp1, des1 = orb.detectAndCompute(gray, None)
kp2, des2 = orb.detectAndCompute(masked_img, None)
# 匹配描述符
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
# 计算匹配点的平均距离,作为扭曲程度的一个简单估计
if matches:
distances = [match.distance for match in matches]
average_distance = sum(distances) / len(distances)
else:
average_distance = float('inf')
return average_distance
def any_of_elements_present(*locators):
"""检查给定的任意一个元素是否出现在DOM中"""
def _predicate(driver):
for locator in locators:
if EC.presence_of_element_located(locator)(driver):
return True
return False
return _predicate
def find_best_match_angle(background_img_path, rotating_img_path):
# 加载背景图片和验证码图片
background = Image.open(background_img_path)
captcha = Image.open(rotating_img_path)
# 确保背景图和验证码图的尺寸符合要求
assert background.size == (320, 200), "背景图尺寸必须是 320x200"
assert captcha.size == (130, 130), "验证码图尺寸必须是 130x130"
# 确定将验证码放置在背景图中的位置
insert_position = ((background.width - captcha.width) // 2, (background.height - captcha.height) // 2)
best_score = None
best_angle = None
best_position = None
best_file_name = None
# 每10度旋转一次,总共旋转36次
for angle in range(-360, 0, 1):
# 旋转验证码图片
rotated_captcha = captcha.rotate(angle, expand=False)
# 创建新的背景图副本用于合成
new_background = background.copy()
# 将旋转后的验证码图片放置到背景图的指定位置
new_background.paste(rotated_captcha, insert_position, rotated_captcha)
# 将PIL图像转换为NumPy数组
numpy_image = np.array(new_background)
# 将RGB格式的图像转换为BGR格式,因为OpenCV默认使用BGR格式
bgr_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
# 保存合成后的图片
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
# 确定圆形区域的中心和半径
circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
# 创建一个遮罩,只包含圆形区域
mask = np.zeros_like(gray)
cv2.circle(mask, circle_center, 65, 255, thickness=-1)
# 应用遮罩到图像
masked_img = cv2.bitwise_and(gray, gray, mask=mask)
# 初始化ORB检测器
orb = cv2.ORB_create(500)
# 检测并计算圆形区域和整个图像的关键点和描述符
kp1, des1 = orb.detectAndCompute(gray, None)
kp2, des2 = orb.detectAndCompute(masked_img, None)
# 匹配描述符
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
# 计算匹配点的平均距离,作为扭曲程度的一个简单估计
if matches:
distances = [match.distance for match in matches]
average_distance = sum(distances) / len(distances)
else:
average_distance = float('inf')
# new_background.save(f"captcha_{abs(angle)}.png")
distortion_level = average_distance
if angle == -360:
best_score = distortion_level
# print(f'中心区域扭曲程度估计(数值越小表示越接近未扭曲): {angle} {distortion_level}')
# res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED)
# _, max_val, _, max_loc = cv2.minMaxLoc(res)
#
if best_score > distortion_level:
best_score = distortion_level
best_angle = angle
# best_position = max_loc
best_file_name = f"captcha_{abs(angle)}.png"
print(best_score)
print(best_angle)
print(best_position)
print(best_file_name)
return abs(best_angle), best_position
def get_cookie(): def get_cookie():
option = ChromeOptions() option = ChromeOptions()
option.binary_location = 'D:\ChromeMaYi\guge\chrome.exe' option.binary_location = 'D:\ChromeMaYi\guge\chrome.exe'
...@@ -293,80 +76,17 @@ def get_cookie(): ...@@ -293,80 +76,17 @@ def get_cookie():
# option.add_argument('--headless') # 无头模式,可不启用界面显示运行 # option.add_argument('--headless') # 无头模式,可不启用界面显示运行
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": return_js()
})
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 600) driver.set_window_size(1200, 600)
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
# 切換账号密码登录
# driver.find_element(by=By.CLASS_NAME, value='login-tab-r').click()
# 设置账号密码
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录
driver.find_element(by=By.ID, value='loginsubmit').click()
print("睡眠0.5秒后开始滑动登录")
time.sleep(0.5)
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
while 1: while 1:
count = count + 1 count = count + 1
print(f"开始第{count}次尝试") print(f"需要登录")
if count > 100:
print("登录京东失败!")
return
# 获取验证码图片
# 用于找到登录图片的大图
try:
background = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-bigimg"]/img')
except NoSuchElementException:
# 未查找到登陆图片则认为成功
print("京东登录成功!")
break
# 用来找到登录图片的小滑块
slide = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-smallimg"]/img')
background_url = background.get_attribute("src")
slide_url = slide.get_attribute("src")
background_img = 'background_img.png'
slide_img = 'slide_img.png'
# 下载背景大图保存到本地
request.urlretrieve(background_url, background_img)
# 下载滑块保存到本地
request.urlretrieve(slide_url, slide_img)
# 获取最佳x偏移量
x = SlideUtils.find_pic(background_img, slide_img)
# print(f'本地最佳偏移量: {x}')
# 计算缩放
# 获取下载背景图宽度
w1 = cv2.imread(background_img).shape[1]
# 获取网页背景图宽度
w2 = background.size['width']
# 计算实际页面x偏移量
x = (x * w2 / w1)
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_class_name("JDJRV-slide-btn")
# 获取元素的X和Y位置
x_position = element.location['x'] + 50
y_position = element.location['y'] + 100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
time.sleep(1) time.sleep(1)
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' limit 1000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000001' limit 1000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202404180000000001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD'"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -389,36 +109,7 @@ def get_cookie(): ...@@ -389,36 +109,7 @@ def get_cookie():
print("跳转首页了,睡眠10秒") print("跳转首页了,睡眠10秒")
time.sleep(10) time.sleep(10)
driver.get(url) driver.get(url)
# element_present = WebDriverWait(driver, 10).until(
# any_of_elements_present((By.ID, "J_main"), (By.CLASS_NAME, "verifyBtn"))
# )
# if element_present:
# print("至少有一个元素加载完成")
# else:
# print("元素未加载完成")
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.ID, "J_main"))
# )
# except:
# try:
# WebDriverWait(driver, 5).until(
# EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
# )
# except NoSuchElementException:
# print("网页加载异常")
# driver.get(url)
# time.sleep(2)
# continue
# except Exception as e:
# print("网页加载异常")
# driver.get(url)
# time.sleep(2)
# continue
# finally:
# print("网页加载完成")
# finally:
# print("网页加载完成")
text = driver.page_source text = driver.page_source
while_count = 0 while_count = 0
while ( while (
...@@ -426,152 +117,6 @@ def get_cookie(): ...@@ -426,152 +117,6 @@ def get_cookie():
print(f"需要验证,睡眠1秒") print(f"需要验证,睡眠1秒")
time.sleep(1) time.sleep(1)
text = driver.page_source text = driver.page_source
# while_count += 1
# if while_count > 1:
# driver.get(url)
# time.sleep(1)
# pass
# # 获取验证码图片
# try:
# print("需要验证。")
# print("开始点击“快速验证”按钮")
# try:
# driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
# except:
# print("点击“快速验证”按钮异常")
# try:
# WebDriverWait(driver, 5).until(
# EC.presence_of_element_located((By.XPATH, '//*[@id="cpc_img"]'))
# )
# except:
# print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
# pass
# while_count_2 = 0
# while while_count_2 < 30 and (
# '快速验证' in text and '验证一下,购物无忧' in text) or '前方拥挤,请刷新重试' in text or 'class="title">加载中...</span></div></div><!----></div>' in text:
# while_count_2 += 1
# background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
# background_url = background.get_attribute("src")
# # 用来找到登录图片的小滑块
# slide = driver.find_element(by=By.XPATH, value='//*[@id="small_img"]')
# slide_url = slide.get_attribute("src")
# background_img = 'background_img.png'
# slide_img = 'slide_img.png'
# # 下载背景大图保存到本地
# request.urlretrieve(background_url, background_img)
# # 下载滑块保存到本地
# request.urlretrieve(slide_url, slide_img)
# # 获取最佳x偏移量
# x = SlideUtils.find_pic(background_img, slide_img)
# # print(f'本地最佳偏移量: {x}')
# # 计算缩放
# # 获取下载背景图宽度
# w1 = cv2.imread(background_img).shape[1]
# # 获取网页背景图宽度
# w2 = background.size['width']
# # 计算实际页面x偏移量
# x = (x * w2 / w1)
# # 找到要获取位置的元素,比如通过其ID
# element = driver.find_element_by_class_name("bg-blue")
#
# # 获取元素的X和Y位置
# x_position = element.location['x'] + 50
# y_position = element.location['y'] + 100
#
# print("元素的X位置:", x_position)
# print("元素的Y位置:", y_position)
# # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# # time.sleep(0.5)
# SlideUtils.slide_by_pyautogui(x_position, y_position,
# x, offset_increase)
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.ID, "J_main"))
# )
# text = driver.page_source
# continue
# except:
# print("没成功,重试")
# finally:
# print("网页加载完成")
# text = driver.page_source
# continue
# except NoSuchElementException:
# # print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
# try:
# while_count_2 = 0
# while while_count_2 < 30:
# while_count_2 += 1
# background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
# background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
# 'background-image: url("', "")
# # 用来找到登录图片的小滑块
# slide = driver.find_element(by=By.XPATH, value='//*[@id="img-rotate-div"]/img')
# slide_url = slide.get_attribute("src")
# background_img = 'background_img.png'
# slide_img = 'slide_img.png'
# # 下载背景大图保存到本地
# request.urlretrieve(background_url, background_img)
# # 下载滑块保存到本地
# request.urlretrieve(slide_url, slide_img)
# best_angle, best_match_score = find_best_match_angle(background_img, slide_img)
# print(f"最佳匹配角度为: {best_angle} 度")
# # time.sleep(0.5)
#
# # w1 = cv2.imread(background_img).shape[1]
# # # 获取网页背景图宽度
# # w2 = background.size['width']
# x = 228 / 360 * best_angle
# x = int(x)
# print(f"移动: {x} ")
# # 找到要获取位置的元素,比如通过其ID
# element = driver.find_element_by_id("slider-div")
#
# # 获取元素的X和Y位置
# x_position = element.location['x'] + 50
# y_position = element.location['y'] + 100
#
# print("元素的X位置:", x_position)
# print("元素的Y位置:", y_position)
# # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# # time.sleep(0.5)
# SlideUtils.slide_by_pyautogui2(x_position,
# y_position,
# x + 2,
# offset_increase)
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.ID, "J_main"))
# )
# text = driver.page_source
# break
# except:
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
# )
# except NoSuchElementException:
# print("网页加载异常")
# text = driver.page_source
# break
# finally:
# print("网页加载完成")
# finally:
# print("网页加载完成")
# text = driver.page_source
#
# except NoSuchElementException:
# driver.get(url)
# text = driver.page_source
# continue
# except Exception as e:
# print(e)
# text = driver.page_source
# continue
# except Exception as e:
# print(e)
# text = driver.page_source
# continue
# 判断是否跳转了登录 # 判断是否跳转了登录
if '<title>京东-欢迎登录</title>' in text: if '<title>京东-欢迎登录</title>' in text:
return get_cookie() return get_cookie()
...@@ -610,9 +155,6 @@ def get_cookie(): ...@@ -610,9 +155,6 @@ def get_cookie():
mysql_zdzs_proxy(insert_sql) mysql_zdzs_proxy(insert_sql)
print("保存成功") print("保存成功")
updateCookie(driver)
pass pass
# 登录成功 # 登录成功
cookie = '' cookie = ''
...@@ -627,4 +169,3 @@ def get_cookie(): ...@@ -627,4 +169,3 @@ def get_cookie():
if __name__ == "__main__": if __name__ == "__main__":
print(get_cookie()) print(get_cookie())
# print(find_best_match_angle('background_img.png', 'slide_img.png'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment