Commit 87f54da1 authored by huangiyu's avatar huangiyu

1

parent 2f4d1e79
......@@ -4,7 +4,6 @@
import time
from urllib import request
import cv2
from putCookies import updateCookie
import numpy as np
import pyautogui
from selenium import webdriver
......@@ -26,7 +25,6 @@ import re
from bs4 import BeautifulSoup
# from utils.spider import jd
mysql_zdzs_proxy_host = '59.110.219.171'
mysql_zdzs_proxy_user = 'zgcindex'
mysql_zdzs_proxy_password = 'zgcprice2019'
......@@ -37,15 +35,6 @@ username = config.username
# 登陆密码
password = config.password
# mysql_zdzs_proxy_host = '59.110.219.171'
# mysql_zdzs_proxy_user = 'zgcindex'
# mysql_zdzs_proxy_password = 'zgcprice2019'
# mysql_zdzs_proxy_database = 'zdzs_proxy'
# 滑块距离屏幕左上角的x, y像素, 需根据自己屏幕大小调整,利用微信截图得到xy的具体位置
# 登录的
# slide_x_position, slid_y_position = 850, 459
# # 验证的
# slide_x_position_quick_verification, slid_y_position_quick_verification = 490, 512
# 偏移度加值(根据电脑分辨率情况)
offset_increase = 0
while True:
......@@ -80,212 +69,6 @@ def mysql_zdzs_proxy(sql=None):
return []
def return_js():
return open('stealth.min.js', 'r').read()
class SlideUtils:
@staticmethod
def find_pic(background, slide):
background_rgb = cv2.imread(background)
background_gray = cv2.cvtColor(background_rgb, cv2.COLOR_BGR2GRAY)
slide_gray = cv2.imread(slide, 0)
# 应用Canny边缘检测增强特征
background_edges = cv2.Canny(background_gray, 100, 200)
slide_edges = cv2.Canny(slide_gray, 100, 200)
# 应用模板匹配
res = cv2.matchTemplate(background_edges, slide_edges, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
return max_loc[0]
@staticmethod
def slide_by_pyautogui(x, y, offset, offset_increase):
"""
使用pyautogui实现滑块并自定义轨迹方程
"""
print(f"睡眠2秒后点击移动")
# time.sleep(2)
xx = x + offset + offset_increase
pyautogui.moveTo(x, y, duration=0.1)
pyautogui.mouseDown()
y += random.randint(9, 19)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(xx, y, duration=0.3)
pyautogui.moveTo(xx+ random.randint(-3, 3), y+30, duration=0.3)
# time.sleep(1.3)
pyautogui.mouseUp()
@staticmethod
def slide_by_pyautogui2(x, y, offset, offset_increase):
"""
使用pyautogui实现滑块并自定义轨迹方程
"""
print(f"睡眠2秒后点击移动")
time.sleep(2)
xx = x + offset + offset_increase
pyautogui.moveTo(x, y, duration=0.1)
pyautogui.mouseDown()
y += random.randint(9, 19)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(xx, y, duration=0.3)
pyautogui.moveTo(xx+ random.randint(-3, 3), y+30, duration=0.3)
# print("睡眠2秒后松开")
time.sleep(1.3)
pyautogui.mouseUp()
# print(f"睡眠2秒后点击
# time.sleep(2)
# xx = x + offset
# pyautogui.moveTo(x, y, duration=0.1)
# pyautogui.mouseDown()
# # y += random.randint(9, 19)
# # pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
# # y += random.randint(-9, 0)
# # pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
# # y += random.randint(0, 8)
# pyautogui.moveTo(xx, y, duration=0.3)
# print("睡眠2秒后松开")
# time.sleep(2)
# pyautogui.mouseUp()
def detect_circular_distortion(image_path, circle_radius=65):
# 加载图像并转换为灰度图
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 确定圆形区域的中心和半径
circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
# 创建一个遮罩,只包含圆形区域
mask = np.zeros_like(gray)
cv2.circle(mask, circle_center, circle_radius, 255, thickness=-1)
# 应用遮罩到图像
masked_img = cv2.bitwise_and(gray, gray, mask=mask)
# 初始化ORB检测器
orb = cv2.ORB_create(500)
# 检测并计算圆形区域和整个图像的关键点和描述符
kp1, des1 = orb.detectAndCompute(gray, None)
kp2, des2 = orb.detectAndCompute(masked_img, None)
# 匹配描述符
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
# 计算匹配点的平均距离,作为扭曲程度的一个简单估计
if matches:
distances = [match.distance for match in matches]
average_distance = sum(distances) / len(distances)
else:
average_distance = float('inf')
return average_distance
def any_of_elements_present(*locators):
"""检查给定的任意一个元素是否出现在DOM中"""
def _predicate(driver):
for locator in locators:
if EC.presence_of_element_located(locator)(driver):
return True
return False
return _predicate
def find_best_match_angle(background_img_path, rotating_img_path):
# 加载背景图片和验证码图片
background = Image.open(background_img_path)
captcha = Image.open(rotating_img_path)
# 确保背景图和验证码图的尺寸符合要求
assert background.size == (320, 200), "背景图尺寸必须是 320x200"
assert captcha.size == (130, 130), "验证码图尺寸必须是 130x130"
# 确定将验证码放置在背景图中的位置
insert_position = ((background.width - captcha.width) // 2, (background.height - captcha.height) // 2)
best_score = None
best_angle = None
best_position = None
best_file_name = None
# 每10度旋转一次,总共旋转36次
for angle in range(-360, 0, 1):
# 旋转验证码图片
rotated_captcha = captcha.rotate(angle, expand=False)
# 创建新的背景图副本用于合成
new_background = background.copy()
# 将旋转后的验证码图片放置到背景图的指定位置
new_background.paste(rotated_captcha, insert_position, rotated_captcha)
# 将PIL图像转换为NumPy数组
numpy_image = np.array(new_background)
# 将RGB格式的图像转换为BGR格式,因为OpenCV默认使用BGR格式
bgr_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
# 保存合成后的图片
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
# 确定圆形区域的中心和半径
circle_center = (gray.shape[1] // 2, gray.shape[0] // 2)
# 创建一个遮罩,只包含圆形区域
mask = np.zeros_like(gray)
cv2.circle(mask, circle_center, 65, 255, thickness=-1)
# 应用遮罩到图像
masked_img = cv2.bitwise_and(gray, gray, mask=mask)
# 初始化ORB检测器
orb = cv2.ORB_create(500)
# 检测并计算圆形区域和整个图像的关键点和描述符
kp1, des1 = orb.detectAndCompute(gray, None)
kp2, des2 = orb.detectAndCompute(masked_img, None)
# 匹配描述符
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
# 计算匹配点的平均距离,作为扭曲程度的一个简单估计
if matches:
distances = [match.distance for match in matches]
average_distance = sum(distances) / len(distances)
else:
average_distance = float('inf')
# new_background.save(f"captcha_{abs(angle)}.png")
distortion_level = average_distance
if angle == -360:
best_score = distortion_level
# print(f'中心区域扭曲程度估计(数值越小表示越接近未扭曲): {angle} {distortion_level}')
# res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED)
# _, max_val, _, max_loc = cv2.minMaxLoc(res)
#
if best_score > distortion_level:
best_score = distortion_level
best_angle = angle
# best_position = max_loc
best_file_name = f"captcha_{abs(angle)}.png"
print(best_score)
print(best_angle)
print(best_position)
print(best_file_name)
return abs(best_angle), best_position
def get_cookie():
option = ChromeOptions()
option.binary_location = 'D:\ChromeMaYi\guge\chrome.exe'
......@@ -293,80 +76,17 @@ def get_cookie():
# option.add_argument('--headless') # 无头模式,可不启用界面显示运行
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": return_js()
})
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 600)
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
# 切換账号密码登录
# driver.find_element(by=By.CLASS_NAME, value='login-tab-r').click()
# 设置账号密码
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录
driver.find_element(by=By.ID, value='loginsubmit').click()
print("睡眠0.5秒后开始滑动登录")
time.sleep(0.5)
# 一直循环直到登录成功位置,超过一百次就算失败。
count = 0
while 1:
count = count + 1
print(f"开始第{count}次尝试")
if count > 100:
print("登录京东失败!")
return
# 获取验证码图片
# 用于找到登录图片的大图
try:
background = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-bigimg"]/img')
except NoSuchElementException:
# 未查找到登陆图片则认为成功
print("京东登录成功!")
break
# 用来找到登录图片的小滑块
slide = driver.find_element(by=By.XPATH, value=r'//div/div[@class="JDJRV-smallimg"]/img')
background_url = background.get_attribute("src")
slide_url = slide.get_attribute("src")
background_img = 'background_img.png'
slide_img = 'slide_img.png'
# 下载背景大图保存到本地
request.urlretrieve(background_url, background_img)
# 下载滑块保存到本地
request.urlretrieve(slide_url, slide_img)
# 获取最佳x偏移量
x = SlideUtils.find_pic(background_img, slide_img)
# print(f'本地最佳偏移量: {x}')
# 计算缩放
# 获取下载背景图宽度
w1 = cv2.imread(background_img).shape[1]
# 获取网页背景图宽度
w2 = background.size['width']
# 计算实际页面x偏移量
x = (x * w2 / w1)
# 找到要获取位置的元素,比如通过其ID
element = driver.find_element_by_class_name("JDJRV-slide-btn")
# 获取元素的X和Y位置
x_position = element.location['x'] + 50
y_position = element.location['y'] + 100
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
print(f"需要登录")
time.sleep(1)
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' limit 1000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000001' limit 1000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202404180000000001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD'"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -389,36 +109,7 @@ def get_cookie():
print("跳转首页了,睡眠10秒")
time.sleep(10)
driver.get(url)
# element_present = WebDriverWait(driver, 10).until(
# any_of_elements_present((By.ID, "J_main"), (By.CLASS_NAME, "verifyBtn"))
# )
# if element_present:
# print("至少有一个元素加载完成")
# else:
# print("元素未加载完成")
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.ID, "J_main"))
# )
# except:
# try:
# WebDriverWait(driver, 5).until(
# EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
# )
# except NoSuchElementException:
# print("网页加载异常")
# driver.get(url)
# time.sleep(2)
# continue
# except Exception as e:
# print("网页加载异常")
# driver.get(url)
# time.sleep(2)
# continue
# finally:
# print("网页加载完成")
# finally:
# print("网页加载完成")
text = driver.page_source
while_count = 0
while (
......@@ -426,152 +117,6 @@ def get_cookie():
print(f"需要验证,睡眠1秒")
time.sleep(1)
text = driver.page_source
# while_count += 1
# if while_count > 1:
# driver.get(url)
# time.sleep(1)
# pass
# # 获取验证码图片
# try:
# print("需要验证。")
# print("开始点击“快速验证”按钮")
# try:
# driver.find_element(by=By.CLASS_NAME, value='verifyBtn').click()
# except:
# print("点击“快速验证”按钮异常")
# try:
# WebDriverWait(driver, 5).until(
# EC.presence_of_element_located((By.XPATH, '//*[@id="cpc_img"]'))
# )
# except:
# print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
# pass
# while_count_2 = 0
# while while_count_2 < 30 and (
# '快速验证' in text and '验证一下,购物无忧' in text) or '前方拥挤,请刷新重试' in text or 'class="title">加载中...</span></div></div><!----></div>' in text:
# while_count_2 += 1
# background = driver.find_element(by=By.XPATH, value='//*[@id="cpc_img"]')
# background_url = background.get_attribute("src")
# # 用来找到登录图片的小滑块
# slide = driver.find_element(by=By.XPATH, value='//*[@id="small_img"]')
# slide_url = slide.get_attribute("src")
# background_img = 'background_img.png'
# slide_img = 'slide_img.png'
# # 下载背景大图保存到本地
# request.urlretrieve(background_url, background_img)
# # 下载滑块保存到本地
# request.urlretrieve(slide_url, slide_img)
# # 获取最佳x偏移量
# x = SlideUtils.find_pic(background_img, slide_img)
# # print(f'本地最佳偏移量: {x}')
# # 计算缩放
# # 获取下载背景图宽度
# w1 = cv2.imread(background_img).shape[1]
# # 获取网页背景图宽度
# w2 = background.size['width']
# # 计算实际页面x偏移量
# x = (x * w2 / w1)
# # 找到要获取位置的元素,比如通过其ID
# element = driver.find_element_by_class_name("bg-blue")
#
# # 获取元素的X和Y位置
# x_position = element.location['x'] + 50
# y_position = element.location['y'] + 100
#
# print("元素的X位置:", x_position)
# print("元素的Y位置:", y_position)
# # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# # time.sleep(0.5)
# SlideUtils.slide_by_pyautogui(x_position, y_position,
# x, offset_increase)
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.ID, "J_main"))
# )
# text = driver.page_source
# continue
# except:
# print("没成功,重试")
# finally:
# print("网页加载完成")
# text = driver.page_source
# continue
# except NoSuchElementException:
# # print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
# try:
# while_count_2 = 0
# while while_count_2 < 30:
# while_count_2 += 1
# background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
# background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
# 'background-image: url("', "")
# # 用来找到登录图片的小滑块
# slide = driver.find_element(by=By.XPATH, value='//*[@id="img-rotate-div"]/img')
# slide_url = slide.get_attribute("src")
# background_img = 'background_img.png'
# slide_img = 'slide_img.png'
# # 下载背景大图保存到本地
# request.urlretrieve(background_url, background_img)
# # 下载滑块保存到本地
# request.urlretrieve(slide_url, slide_img)
# best_angle, best_match_score = find_best_match_angle(background_img, slide_img)
# print(f"最佳匹配角度为: {best_angle} 度")
# # time.sleep(0.5)
#
# # w1 = cv2.imread(background_img).shape[1]
# # # 获取网页背景图宽度
# # w2 = background.size['width']
# x = 228 / 360 * best_angle
# x = int(x)
# print(f"移动: {x} ")
# # 找到要获取位置的元素,比如通过其ID
# element = driver.find_element_by_id("slider-div")
#
# # 获取元素的X和Y位置
# x_position = element.location['x'] + 50
# y_position = element.location['y'] + 100
#
# print("元素的X位置:", x_position)
# print("元素的Y位置:", y_position)
# # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
# # time.sleep(0.5)
# SlideUtils.slide_by_pyautogui2(x_position,
# y_position,
# x + 2,
# offset_increase)
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.ID, "J_main"))
# )
# text = driver.page_source
# break
# except:
# try:
# WebDriverWait(driver, 2).until(
# EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
# )
# except NoSuchElementException:
# print("网页加载异常")
# text = driver.page_source
# break
# finally:
# print("网页加载完成")
# finally:
# print("网页加载完成")
# text = driver.page_source
#
# except NoSuchElementException:
# driver.get(url)
# text = driver.page_source
# continue
# except Exception as e:
# print(e)
# text = driver.page_source
# continue
# except Exception as e:
# print(e)
# text = driver.page_source
# continue
# 判断是否跳转了登录
if '<title>京东-欢迎登录</title>' in text:
return get_cookie()
......@@ -610,9 +155,6 @@ def get_cookie():
mysql_zdzs_proxy(insert_sql)
print("保存成功")
updateCookie(driver)
pass
# 登录成功
cookie = ''
......@@ -627,4 +169,3 @@ def get_cookie():
if __name__ == "__main__":
print(get_cookie())
# print(find_best_match_angle('background_img.png', 'slide_img.png'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment