Commit 4ea23759 authored by huangziyu's avatar huangziyu

1

parent 56457489
......@@ -80,7 +80,7 @@ def get_cookie():
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -121,6 +121,7 @@ def get_cookie():
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -147,7 +148,10 @@ def get_cookie():
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
# 登录成功
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%0' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%0' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%1' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%1' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%2' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%2' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%3' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%3' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%4' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%4' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%5' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%5' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%6' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%6' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%7' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%7' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%8' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%8' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
"""
import time
from selenium.webdriver.common.keys import Keys
from urllib import request
import cv2
import numpy as np
......@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window()
driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear()
time.sleep(0.7)
driver.find_element(by=By.ID, value='nloginpwd').clear()
time.sleep(1.7)
print(f"清空文本框结束")
# 获取输入框中的现有内容的长度
content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录
print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click()
......@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%9' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%9' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -124,6 +128,7 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -150,7 +155,10 @@ def get_cookie(username=None,password=None,driver=None):
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
pass
return
......@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600)
driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True:
for account in config.account_list:
username = account['username']
......
......@@ -361,7 +361,7 @@ def get_cookie():
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000"
# query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202404180000000001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD'"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' "
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -573,6 +573,7 @@ def get_cookie():
Response = etree.HTML(text)
dataList = []
try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
......@@ -599,7 +600,10 @@ def get_cookie():
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功")
updateCookie(driver)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment