Commit e5a85e5f authored by huangziyu's avatar huangziyu

1

parent c1f9282c
# https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
\ No newline at end of file
import pymysql
def returnDB(host,user,password,database):
Conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database,
charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
Cur = Conn.cursor()
return Cur,Conn
def updateCookie(Driver):
Cur, Conn = returnDB("59.110.219.171", "zgcindex", "zgcprice2019", "zdzs_proxy")
cookie = ''
for i in Driver.get_cookies():
cookie = cookie + i['name'] + '=' + i['value'] + ';'
sql = f"UPDATE jd_cookies SET cookie = '{cookie}' WHERE user = 'ruoyu'"
Cur.execute(sql)
Cur.close()
Conn.close()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
import pymysql
def returnDB(host,user,password,database):
Conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database,
charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
Cur = Conn.cursor()
return Cur,Conn
def updateCookie(Driver):
Cur, Conn = returnDB("59.110.219.171", "zgcindex", "zgcprice2019", "zdzs_proxy")
cookie = ''
for i in Driver.get_cookies():
cookie = cookie + i['name'] + '=' + i['value'] + ';'
sql = f"UPDATE jd_cookies SET cookie = '{cookie}' WHERE user = 'ruoyu'"
Cur.execute(sql)
Cur.close()
Conn.close()
\ No newline at end of file
......@@ -4,6 +4,7 @@
import time
from urllib import request
import cv2
from putCookies import updateCookie
import numpy as np
import pyautogui
from selenium import webdriver
......@@ -115,12 +116,12 @@ class SlideUtils:
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.28)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(2, 5)
y += random.randint(-3, 3)
pyautogui.moveTo(xx, y, duration=0.8)
print("睡眠1秒后松开")
time.sleep(1)
time.sleep(0.5)
import math
pyautogui.moveRel(0, random.randint(2, 5), duration=0.3)
pyautogui.moveRel(0, random.randint(-50, 50), duration=0.5)
print("松开")
pyautogui.mouseUp()
......@@ -132,18 +133,18 @@ class SlideUtils:
# print(f"睡眠2秒后点击移动")
# time.sleep(2)
xx = x + offset + offset_increase
pyautogui.moveTo(x, y, duration=0.3)
pyautogui.moveTo(x, y, duration=0.5)
pyautogui.mouseDown()
y += random.randint(9, 19)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.5)
pyautogui.moveTo(x + int(offset * random.randint(15, 23) / 20), y, duration=0.2)
y += random.randint(-9, 0)
pyautogui.moveTo(x + int(offset * random.randint(17, 21) / 20), y, duration=random.randint(20, 31) / 100)
y += random.randint(0, 2)
pyautogui.moveTo(xx, y, duration=0.5)
y += random.randint(-3, 3)
pyautogui.moveTo(xx, y, duration=0.3)
print("睡眠1秒后松开")
time.sleep(1)
time.sleep(0.5)
import math
pyautogui.moveRel(0, y+3, duration=0.3)
pyautogui.moveRel(0, random.randint(-300, -100), duration=0.5)
print("松开")
pyautogui.mouseUp()
# print(f"睡眠2秒后点击
......@@ -274,7 +275,7 @@ def find_best_match_angle(background_img_path, rotating_img_path):
distortion_level = average_distance
if angle == -360:
best_score = distortion_level
print(f'中心区域扭曲程度估计(数值越小表示越接近未扭曲): {distortion_level}')
# print(f'中心区域扭曲程度估计(数值越小表示越接近未扭曲): {angle} {distortion_level}')
# res = cv2.matchTemplate(background, rotated_captcha, cv2.TM_CCOEFF_NORMED)
# _, max_val, _, max_loc = cv2.minMaxLoc(res)
#
......@@ -360,13 +361,20 @@ def get_cookie():
print("元素的X位置:", x_position)
print("元素的Y位置:", y_position)
# 其中x为屏幕左上角至滑块中心的横向像素值,y为屏幕左上角至滑块中心纵向像素值, 可根据自己屏幕配置
SlideUtils.slide_by_pyautogui(x_position, y_position, x, offset_increase)
time.sleep(1)
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '100' limit 10000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
for row in query_spider:
index += 1
print(f"len={len_},index={index}")
# while True:
# row = {
# 'url':'https://search.jd.com/search?keyword=COMIX%20B3749&enc=utf-8',
......@@ -489,7 +497,7 @@ def get_cookie():
# print("未查找到验证图片背景图,可能是旋转图片,换一种方式获取div")
try:
while_count_2 = 0
while while_count_2 < 10:
while while_count_2 < 30:
while_count_2 += 1
background = driver.find_element(by=By.XPATH, value='//*[@id="img-back-div"]')
background_url = background.get_attribute("style").replace('"); height: 181px;', "").replace(
......@@ -536,7 +544,7 @@ def get_cookie():
break
except:
try:
WebDriverWait(driver, 5).until(
WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.CLASS_NAME, "verifyBtn"))
)
except NoSuchElementException:
......@@ -595,13 +603,20 @@ def get_cookie():
mysql_zdzs_proxy(insert_sql)
print("保存成功")
updateCookie(driver)
pass
# 登录成功
cookie = ''
for i in driver.get_cookies():
cookie = cookie + i['name'] + '=' + i['value'] + ';'
driver.quit()
return cookie
return get_cookie()
if __name__ == "__main__":
......
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time : 2022/9/6 0006 16:05
import json
import random
import re
from DrissionPage import WebPage,ChromiumPage, ChromiumOptions
import time
import pymysql
from DrissionPage.common import ActionChains
def returnDB(host,user,password,database):
Conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database,
charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
Cur = Conn.cursor()
return Cur,Conn
#返回IP
def returnIP():
Cur,Conn = returnDB("59.110.219.171","zgcindex","zgcprice2019","zdzs_proxy")
Cur.execute('SELECT * FROM proxy')
IP_List = Cur.fetchall()
Cur.close()
Conn.close()
return IP_List
#初始化浏览器对象
def initDriver():
path = r'D:\ChromeMaYi\guge\chrome.exe' # 请改为你电脑内Chrome可执行文件路径
co = ChromiumOptions()
#co.auto_port(True)
# 隐身无头模式
#co.set_argument('--headless')
# 无图片 静音
#co.set_no_imgs(True).set_mute(True)
# 是否禁用JS
#co.set_no_js(True)
# 链接 加载 超时时间
co.set_timeouts(implicit=5, pageLoad=5)
#co.set_argument('--user-agent',random.choice(MY_USER_AGENT))
# 启动参数代理
#IP = returnIP()[0]['ip_port']
#print(IP)
#co.set_proxy(f"http://{IP}")
# 以该配置创建页面对象
Driver = WebPage(driver_or_options=co)
return Driver
def insertDBxpath(Driver,Cur,Key):
try:
XJ = 0
q = 0
SupName = ''
Name = ''
Price = ''
if '找不到了!' not in Driver.html and '下架' not in Driver.html:
Price = Driver.s_ele('tag:span@@class:Price--priceText').text
Name = Driver.s_ele('tag:h1@@class:ItemHeader--mainTitle').text
SupName = Driver.s_ele('tag:div@@class:ShopHeader--title').text if 'chaoshi' not in Driver.url else '天猫超市'
#img = Driver.s_ele('tag:img@@class:PicGallery--thumbnailPic').src
else:
Price = -1
XJ = 1
if SupName and '旗舰' in SupName:
q = 4
Sql = f"""UPDATE product_url SET status = 2 WHERE id = {Key['ID']}"""
print(Sql)
Cur.execute(Sql)
Sql = f"""INSERT INTO product_url_data(sku,goodsName,Price,qijian,Url,status,XJ,supName)VALUES('{Key['ID']}','{Name}','{Price}','{q}','{Key['Url']}',2,{XJ},'{SupName}')"""
print(Sql)
Cur.execute(Sql)
except Exception as e:
print(e)
if 'id' not in Driver.url or 'shop' in Driver.url:
Sql = f"""UPDATE product_url SET status = 2 WHERE id = {Key['ID']}"""
print(Sql)
Cur.execute(Sql)
Sql = f"""INSERT INTO product_url_data(sku,goodsName,Price,qijian,Url,status,XJ,supName)VALUES('{Key['sku']}','','','','{Key['Url']}',2,1,'{SupName}')"""
print(Sql)
Cur.execute(Sql)
def Main():
Cur, Conn = returnDB("39.105.1.55", "root", "*#9U7P61R!657", "ZD_GT_pro")
Cur.execute("""SELECT ID,sku,sku_url as Url FROM `product_url` WHERE sku_url like '%tmall%' AND Status is null and (id like "%7" ) order by id desc limit 1000""")
Driver = initDriver()
Driver.get('https://s.taobao.com/search?page=1&q=%E6%97%A0%20CTL-350HK', timeout=5)
while True:
J = int(input("是否已经滑动验证并登录,完成请按:【1】\n"))
if J == 1:
break
else:
pass
searchList = Cur.fetchall()
searchNum = len(searchList)
for K in searchList:
try:
Url = f"""https://detail.tmall.com/item.htm?id={K['Url'].split('id=')[-1]}""" if 'm.tmal' in K['Url'] else K['Url']
if Driver.ele("xpath://span[@id='nc_1_n1z']"):
ac = ActionChains(Driver)
ac.move_to(Driver.ele("xpath://span[@id='nc_1_n1z']")).hold().right(300).wait(2).release()
Driver.get(Url,timeout=5)
else:
Driver.get(Url, timeout=5)
searchNum -= 1
print(K,searchNum)
# Driver.ele("xpath://input[@id='q']").input(parse.unquote(brandName))
#
#time.sleep(random.randint(2,5))
#
# Driver.ele("xpath://button[@id='button']").click()
#time.sleep(random.randint(0,3))
#if Driver.s_ele('tag:img@@class:Title--iconPic'):
time.sleep(2)
insertDBxpath(Driver,Cur,K)
# else:
#
# insertDBre(Driver, Cur, K,brandName)
except:
pass
Driver.quit()
Main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment