Commit e5a85e5f authored by huangziyu's avatar huangziyu

1

parent c1f9282c
# https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
\ No newline at end of file
import pymysql
def returnDB(host,user,password,database):
Conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database,
charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
Cur = Conn.cursor()
return Cur,Conn
def updateCookie(Driver):
Cur, Conn = returnDB("59.110.219.171", "zgcindex", "zgcprice2019", "zdzs_proxy")
cookie = ''
for i in Driver.get_cookies():
cookie = cookie + i['name'] + '=' + i['value'] + ';'
sql = f"UPDATE jd_cookies SET cookie = '{cookie}' WHERE user = 'ruoyu'"
Cur.execute(sql)
Cur.close()
Conn.close()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
import pymysql
def returnDB(host,user,password,database):
Conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database,
charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
Cur = Conn.cursor()
return Cur,Conn
def updateCookie(Driver):
Cur, Conn = returnDB("59.110.219.171", "zgcindex", "zgcprice2019", "zdzs_proxy")
cookie = ''
for i in Driver.get_cookies():
cookie = cookie + i['name'] + '=' + i['value'] + ';'
sql = f"UPDATE jd_cookies SET cookie = '{cookie}' WHERE user = 'ruoyu'"
Cur.execute(sql)
Cur.close()
Conn.close()
\ No newline at end of file
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import time import time
from urllib import request from urllib import request
import cv2 import cv2
from putCookies import updateCookie
import numpy as np import numpy as np
import pyautogui import pyautogui
from selenium import webdriver from selenium import webdriver
...@@ -115,12 +116,12 @@ class SlideUtils: ...@@ -115,12 +116,12 @@ class SlideUtils:
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28) pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0) y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100) pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(2, 5) y += random.randint(-3, 3)
pyautogui.moveTo(xx, y, duration=0.8) pyautogui.moveTo(xx, y, duration=0.8)
print("睡眠1秒后松开") print("睡眠1秒后松开")
time.sleep(1) time.sleep(0.5)
import math import math
pyautogui.moveRel(0, random.randint(2, 5), duration=0.3) pyautogui.moveRel(0, random.randint(-50, 50), duration=0.5)
print("松开") print("松开")
pyautogui.mouseUp() pyautogui.mouseUp()
...@@ -132,18 +133,18 @@ class SlideUtils: ...@@ -132,18 +133,18 @@ class SlideUtils:
# print(f"睡眠2秒后点击移动") # print(f"睡眠2秒后点击移动")
# time.sleep(2) # time.sleep(2)
xx = x + offset + offset_increase xx = x + offset + offset_increase
pyautogui.moveTo(x, y, duration=0.3) pyautogui.moveTo(x, y, duration=0.5)
pyautogui.mouseDown() pyautogui.mouseDown()
y += random.randint(9, 19) y += random.randint(9, 19)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.5) pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.2)
y += random.randint(-9, 0) y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100) pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 2) y += random.randint(-3, 3)
pyautogui.moveTo(xx, y, duration=0.5) pyautogui.moveTo(xx, y, duration=0.3)
print("睡眠1秒后松开") print("睡眠1秒后松开")
time.sleep(1) time.sleep(0.5)
import math import math
pyautogui.moveRel(0, y+3, duration=0.3) pyautogui.moveRel(0, random.randint(-300, -100), duration=0.5)
print("松开") print("松开")
pyautogui.mouseUp() pyautogui.mouseUp()
# print(f"睡眠2秒后点击 # print(f"睡眠2秒后点击
...@@ -274,7 +275,7 @@ def find_best_match_angle(background_img_path, rotating_img_path): ...@@ -274,7 +275,7 @@ def find_best_match_angle(background_img_path, rotating_img_path):
distortion_level = average_distance distortion_level = average_distance
if angle == -360: if angle == -360:
best_score = distortion_level best_score = distortion_level
print(f'中心区域扭曲程度估计(数值越小表示越接近未扭曲): {distortion_level}') # print(f'中心区域扭曲程度估计(数值越小表示越接近未扭曲): {angle} {distortion_level}')
# res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED) # res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED)
# _, max_val, _, max_loc = cv2.minMaxLoc(res) # _, max_val, _, max_loc = cv2.minMaxLoc(res)
# #
...@@ -360,13 +361,20 @@ def get_cookie(): ...@@ -360,13 +361,20 @@ def get_cookie():
print("元素的X位置:", x_position) print("元素的X位置:", x_position)
print("元素的Y位置:", y_position) print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置 # 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase) SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
time.sleep(1) time.sleep(1)
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '100' limit 10000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
for row in query_spider: for row in query_spider:
index += 1
print(f"len={len_},index={index}")
# while True: # while True:
# row = { # row = {
# 'url':'https://search.jd.com/search?keyword=COMIX%20B3749&enc=utf-8', # 'url':'https://search.jd.com/search?keyword=COMIX%20B3749&enc=utf-8',
...@@ -489,7 +497,7 @@ def get_cookie(): ...@@ -489,7 +497,7 @@ def get_cookie():
# print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div") # print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
try: try:
while_count_2 = 0 while_count_2 = 0
while while_count_2 < 10: while while_count_2 < 30:
while_count_2 += 1 while_count_2 += 1
background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]') background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace( background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
...@@ -536,7 +544,7 @@ def get_cookie(): ...@@ -536,7 +544,7 @@ def get_cookie():
break break
except: except:
try: try:
WebDriverWait(driver, 5).until( WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn")) EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
) )
except NoSuchElementException: except NoSuchElementException:
...@@ -595,13 +603,20 @@ def get_cookie(): ...@@ -595,13 +603,20 @@ def get_cookie():
mysql_zdzs_proxy(insert_sql) mysql_zdzs_proxy(insert_sql)
print("保存成功") print("保存成功")
updateCookie(driver)
pass pass
# 登录成功 # 登录成功
cookie = '' cookie = ''
for i in driver.get_cookies(): for i in driver.get_cookies():
cookie = cookie + i['name'] + '=' + i['value'] + ';' cookie = cookie + i['name'] + '=' + i['value'] + ';'
driver.quit() driver.quit()
return cookie
return get_cookie()
if __name__ == "__main__": if __name__ == "__main__":
......
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time : 2022/9/6 0006 16:05
import json
import random
import re
from DrissionPage import WebPage,ChromiumPage, ChromiumOptions
import time
import pymysql
from DrissionPage.common import ActionChains
def returnDB(host,user,password,database):
Conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database,
charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
Cur = Conn.cursor()
return Cur,Conn
#返回IP
def returnIP():
Cur,Conn = returnDB("59.110.219.171","zgcindex","zgcprice2019","zdzs_proxy")
Cur.execute('SELECT * FROM proxy')
IP_List = Cur.fetchall()
Cur.close()
Conn.close()
return IP_List
#初始化浏览器对象
def initDriver():
path = r'D:\ChromeMaYi\guge\chrome.exe' # 请改为你电脑内Chrome可执行文件路径
co = ChromiumOptions()
#co.auto_port(True)
# 隐身无头模式
#co.set_argument('--headless')
# 无图片 静音
#co.set_no_imgs(True).set_mute(True)
# 是否禁用JS
#co.set_no_js(True)
# 链接 加载 超时时间
co.set_timeouts(implicit=5, pageLoad=5)
#co.set_argument('--user-agent',random.choice(MY_USER_AGENT))
# 启动参数代理
#IP = returnIP()[0]['ip_port']
#print(IP)
#co.set_proxy(f"http://{IP}")
# 以该配置创建页面对象
Driver = WebPage(driver_or_options=co)
return Driver
def insertDBxpath(Driver,Cur,Key):
try:
XJ = 0
q = 0
SupName = ''
Name = ''
Price = ''
if '找不到了!' not in Driver.html and '下架' not in Driver.html:
Price = Driver.s_ele('tag:span@@class:Price--priceText').text
Name = Driver.s_ele('tag:h1@@class:ItemHeader--mainTitle').text
SupName = Driver.s_ele('tag:div@@class:ShopHeader--title').text if 'chaoshi' not in Driver.url else '天猫超市'
#img = Driver.s_ele('tag:img@@class:PicGallery--thumbnailPic').src
else:
Price = -1
XJ = 1
if SupName and '旗舰' in SupName:
q = 4
Sql = f"""UPDATE product_url SET status = 2 WHERE id = {Key['ID']}"""
print(Sql)
Cur.execute(Sql)
Sql = f"""INSERT INTO product_url_data(sku,goodsName,Price,qijian,Url,status,XJ,supName)VALUES('{Key['ID']}','{Name}','{Price}','{q}','{Key['Url']}',2,{XJ},'{SupName}')"""
print(Sql)
Cur.execute(Sql)
except Exception as e:
print(e)
if 'id' not in Driver.url or 'shop' in Driver.url:
Sql = f"""UPDATE product_url SET status = 2 WHERE id = {Key['ID']}"""
print(Sql)
Cur.execute(Sql)
Sql = f"""INSERT INTO product_url_data(sku,goodsName,Price,qijian,Url,status,XJ,supName)VALUES('{Key['sku']}','','','','{Key['Url']}',2,1,'{SupName}')"""
print(Sql)
Cur.execute(Sql)
def Main():
Cur, Conn = returnDB("39.105.1.55", "root", "*#9U7P61R!657", "ZD_GT_pro")
Cur.execute("""SELECT ID,sku,sku_url as Url FROM `product_url` WHERE sku_url like '%tmall%' AND Status is null and (id like "%7" ) order by id desc limit 1000""")
Driver = initDriver()
Driver.get('https://s.taobao.com/search?page=1&q=%E6%97%A0%20CTL-350HK', timeout=5)
while True:
J = int(input("是否已经滑动验证并登录,完成请按:【1】\n"))
if J == 1:
break
else:
pass
searchList = Cur.fetchall()
searchNum = len(searchList)
for K in searchList:
try:
Url = f"""https://detail.tmall.com/item.htm?id={K['Url'].split('id=')[-1]}""" if 'm.tmal' in K['Url'] else K['Url']
if Driver.ele("xpath://span[@id='nc_1_n1z']"):
ac = ActionChains(Driver)
ac.move_to(Driver.ele("xpath://span[@id='nc_1_n1z']")).hold().right(300).wait(2).release()
Driver.get(Url,timeout=5)
else:
Driver.get(Url, timeout=5)
searchNum -= 1
print(K,searchNum)
# Driver.ele("xpath://input[@id='q']").input(parse.unquote(brandName))
#
#time.sleep(random.randint(2,5))
#
# Driver.ele("xpath://button[@id='button']").click()
#time.sleep(random.randint(0,3))
#if Driver.s_ele('tag:img@@class:Title--iconPic'):
time.sleep(2)
insertDBxpath(Driver,Cur,K)
# else:
#
# insertDBre(Driver, Cur, K,brandName)
except:
pass
Driver.quit()
Main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment