Commit 56457489 authored by huangziyu's avatar huangziyu

1

parent b74f6228
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%0' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["0"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%1' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["1"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%2' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["2"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%3' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["3"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%4' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["4"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%5' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["5"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%6' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["6"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%7' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["7"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%8' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["8"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,10 +68,18 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
time.sleep(0.5)
# 登录 # 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
# 一直循环直到登录成功位置,超过一百次就算失败。 # 一直循环直到登录成功位置,超过一百次就算失败。
count = 0 count = 0
...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -79,21 +87,16 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%9' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
query_spider_list = []
for row in query_spider:
if str(row['id'])[-1] not in ["9"]:
continue
query_spider_list.append(row)
index = 0 index = 0
len_ = len(query_spider_list) len_ = len(query_spider)
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
if len_ == 0: if len_ == 0:
print("查询不到需要搜索的链接,结束程序。") print("查询不到需要搜索的链接,结束程序。")
driver.quit() driver.quit()
exit() exit()
for row in query_spider_list: for row in query_spider:
index += 1 index += 1
print(f"len={len_},index={index}") print(f"len={len_},index={index}")
url = row['url'] url = row['url']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment