Commit 2a68df0e authored by huangziyu's avatar huangziyu

1

parent 0ba97988
query_where_sql = f" {config.query_where_sql} "
# query_where_sql = f" and project_id = '84' "
# username = 'Li3456ab' # username = 'Li3456ab'
# #
# username = 'Tempzgc' # username = 'Tempzgc'
...@@ -23,21 +24,34 @@ password = 'hu19941018' ...@@ -23,21 +24,34 @@ password = 'hu19941018'
# password = '12 # password = '12
account_list = [ account_list = [
{ {
'username':'jack15510118048', 'username':'13801194827', #验证转图
'password':'Hry195521', 'password':'072600uddz',
},
# {
# 'username':'jack15510118048', #验证成语
# 'password':'Hry195521',
# },
{
'username':'13311252087',
'password':'Ayhzy0726',
}, },
{ {
'username':'13717659089', 'username':'13717659089',
'password':'Hry195521', 'password':'Hry195521',
}, },
# {
# 'username':'jack17778135785',
# 'password':'Hry195521',
# },
{ {
'username':'jack17778135785', 'username':'13801194827', #验证转图
'password':'Hry195521', 'password':'072600uddz',
}, },
{ {
'username':'15110163892', 'username':'13801194827', #验证转图
'password':'Hry195521', 'password':'072600uddz',
}, },
{ {
'username':'19118625837', 'username':'19118625837',
...@@ -47,25 +61,18 @@ account_list = [ ...@@ -47,25 +61,18 @@ account_list = [
'username':'15044870720', 'username':'15044870720',
'password':'1946cheN', 'password':'1946cheN',
}, { }, {
'username':'13381042425', 'username':'13381042425', #验证转图
'password':'hu19941018', 'password':'hu19941018',
}, },
{ {
'username':'13801194827', 'username': 'Liantong9690', #语音验证
'password':'072600uddz', 'password': '123456ab',
},
{
'username':'15045077987',
'password':'zt20010311jd',
},
{
'username':'13801194827',
'password':'072600uddz',
}, },
{ {
'username':'Liantong9690', 'username':'15110163892', #语音验证
'password':'123456ab', 'password':'Hry195521',
}, },
] ]
print(len(account_list)) print(len(account_list))
\ No newline at end of file
# https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 # https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
\ No newline at end of file
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
""" """
import time import time
from urllib import request from urllib import request
import cv2 import config
import numpy as np import numpy as np
import pyautogui import pyautogui
from selenium import webdriver from selenium import webdriver
...@@ -81,14 +81,16 @@ def get_cookie(): ...@@ -81,14 +81,16 @@ def get_cookie():
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' limit 30000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and docId in ('wVALFo8Bqn1v89VnfxYB','TFALFo8Bqn1v89VnCRZE') "
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
for row in query_spider: for row in query_spider:
if str(row['id'])[-1] not in ["0","1","2"]: # if str(row['id'])[-1] not in ["0","1","2"]:
continue # continue
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
# while True: # while True:
......
...@@ -93,7 +93,7 @@ def get_cookie(username=None, password=None, driver=None): ...@@ -93,7 +93,7 @@ def get_cookie(username=None, password=None, driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%0' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%0' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%1' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%1' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%2' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%2' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -174,7 +174,7 @@ if __name__ == "__main__": ...@@ -174,7 +174,7 @@ if __name__ == "__main__":
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 600) driver.set_window_size(1200, 600)
account_start_index = 2 account_start_index = 0
get_cookie(username=config.account_list[account_start_index]['username'], get_cookie(username=config.account_list[account_start_index]['username'],
password=config.account_list[account_start_index]['password'], driver=driver) password=config.account_list[account_start_index]['password'], driver=driver)
while True: while True:
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%3' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%3' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -174,7 +174,7 @@ if __name__ == "__main__": ...@@ -174,7 +174,7 @@ if __name__ == "__main__":
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 600) driver.set_window_size(1200, 600)
account_start_index = 3 account_start_index = 1
get_cookie(username=config.account_list[account_start_index]['username'], get_cookie(username=config.account_list[account_start_index]['username'],
password=config.account_list[account_start_index]['password'], driver=driver) password=config.account_list[account_start_index]['password'], driver=driver)
while True: while True:
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%4' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%4' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%5' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%5' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -131,13 +131,13 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -131,13 +131,13 @@ def get_cookie(username=None,password=None,driver=None):
dataList = [] dataList = []
try: try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = '' is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"): if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else '' ".//span[@class='J_im_icon']/a/@title") else ''
dataList.append( dataList.append(
{ {
'is_self_operate_div': is_self_operate_div, 'is_self_operate_div': is_self_operate_div,
...@@ -149,7 +149,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -149,7 +149,7 @@ def get_cookie(username=None,password=None,driver=None):
"'")).strip(), "'")).strip(),
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
} }
) )
insert_sql = f""" insert_sql = f"""
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
...@@ -174,7 +174,7 @@ if __name__ == "__main__": ...@@ -174,7 +174,7 @@ if __name__ == "__main__":
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 600) driver.set_window_size(1200, 600)
account_start_index = 5 account_start_index = 4
get_cookie(username=config.account_list[account_start_index]['username'], get_cookie(username=config.account_list[account_start_index]['username'],
password=config.account_list[account_start_index]['password'], driver=driver) password=config.account_list[account_start_index]['password'], driver=driver)
while True: while True:
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%6' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%6' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%7' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%7' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%8' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%8' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -89,10 +89,11 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -89,10 +89,11 @@ def get_cookie(username=None,password=None,driver=None):
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
while driver.title != '京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!': while driver.title != '京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!':
count = count + 1
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%9' limit 30000" query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and id like '%9' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
...@@ -361,8 +361,8 @@ def get_cookie(): ...@@ -361,8 +361,8 @@ def get_cookie():
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000" # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \ # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000" # f" limit 100000"
# query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000" # query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' " query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' {config.query_where_sql} "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment