Commit 4ea23759 authored by huangziyu's avatar huangziyu

1

parent 56457489
...@@ -80,7 +80,7 @@ def get_cookie(): ...@@ -80,7 +80,7 @@ def get_cookie():
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -121,33 +121,37 @@ def get_cookie(): ...@@ -121,33 +121,37 @@ def get_cookie():
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
# 登录成功 # 登录成功
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%0' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%0' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%1' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%1' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%2' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%2' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%3' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%3' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%4' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%4' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%5' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%5' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%6' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%6' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%7' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%7' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%8' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%8' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855 【京东搜索采集自动验证版Windows10部署文档】https://www.tapd.cn/36769433/prong/stories/view/1136769433001003855
""" """
import time import time
from selenium.webdriver.common.keys import Keys
from urllib import request from urllib import request
import cv2 import cv2
import numpy as np import numpy as np
...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None): ...@@ -68,16 +69,19 @@ def mysql_zdzs_proxy(sql=None):
def get_cookie(username=None,password=None,driver=None): def get_cookie(username=None,password=None,driver=None):
# driver.maximize_window() # driver.maximize_window()
driver.get('https://passport.jd.com/uc/login') driver.get('https://passport.jd.com/uc/login')
time.sleep(0.7)
print(f"清空文本框开始") print(f"清空文本框开始")
driver.find_element(by=By.ID, value='loginname').clear() # 获取输入框中的现有内容的长度
time.sleep(0.7) content_length = len(driver.find_element(by=By.ID, value='loginname').get_attribute('value'))
driver.find_element(by=By.ID, value='nloginpwd').clear() # 逐个删除所有字符
time.sleep(1.7) for _ in range(content_length):
print(f"清空文本框结束") driver.find_element(by=By.ID, value='loginname').send_keys(Keys.BACK_SPACE)
content_length = len(driver.find_element(by=By.ID, value='nloginpwd').get_attribute('value'))
# 逐个删除所有字符
for _ in range(content_length):
driver.find_element(by=By.ID, value='nloginpwd').send_keys(Keys.BACK_SPACE)
print(f"清空文本框成功")
driver.find_element(by=By.ID, value='loginname').send_keys(username) driver.find_element(by=By.ID, value='loginname').send_keys(username)
driver.find_element(by=By.ID, value='nloginpwd').send_keys(password) driver.find_element(by=By.ID, value='nloginpwd').send_keys(password)
# 登录 # 登录
print(f"点击登录开始") print(f"点击登录开始")
driver.find_element(by=By.ID, value='loginsubmit').click() driver.find_element(by=By.ID, value='loginsubmit').click()
...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -87,7 +91,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1 count = count + 1
print(f"需要登录") print(f"需要登录")
time.sleep(1) time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000004' and id like '%9' limit 30000" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' and id like '%9' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None): ...@@ -124,33 +128,37 @@ def get_cookie(username=None,password=None,driver=None):
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
pass pass
return return
...@@ -164,7 +172,8 @@ if __name__ == "__main__": ...@@ -164,7 +172,8 @@ if __name__ == "__main__":
option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=option) driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled") option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(700, 600) driver.set_window_size(1200, 600)
# get_cookie(username=config.account_list[0]['username'],password=config.account_list[0]['password'],driver=driver)
while True: while True:
for account in config.account_list: for account in config.account_list:
username = account['username'] username = account['username']
......
...@@ -361,7 +361,7 @@ def get_cookie(): ...@@ -361,7 +361,7 @@ def get_cookie():
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \ # query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000" # f" limit 100000"
# query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000" # query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202404180000000001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD'" query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '84' and data_batch ='202404180000000002' "
query_spider = mysql_zdzs_proxy(query_spider_sql) query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0 index = 0
len_ = len(query_spider) len_ = len(query_spider)
...@@ -573,33 +573,37 @@ def get_cookie(): ...@@ -573,33 +573,37 @@ def get_cookie():
Response = etree.HTML(text) Response = etree.HTML(text)
dataList = [] dataList = []
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"): try:
for x in Response.xpath("//div[@id='J_goodsList']/ul/li"):
is_self_operate_div = ''
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath( if x.xpath(".//div[@class='p-name p-name-type-2']"):
".//span[@class='J_im_icon']/a/@title") else '' shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
dataList.append(
{ dataList.append(
'is_self_operate_div': is_self_operate_div, {
'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html', 'is_self_operate_div': is_self_operate_div,
'sku_id': str(x.xpath("./@data-sku")[0]), 'item_url': 'https://item.jd.com/' + str(x.xpath("./@data-sku")[0]) + '.html',
'shop_name': shop_name, 'sku_id': str(x.xpath("./@data-sku")[0]),
'sku_name': '‘'.join( 'shop_name': shop_name,
''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split( 'sku_name': '‘'.join(
"'")).strip(), ''.join(x.xpath(".//div[@class='p-name p-name-type-2']//em//text()")).split(
'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0] "'")).strip(),
} 'sku_price': x.xpath(".//div[@class='p-price']//i//text()")[0]
}
)
insert_sql = f""" )
INSERT INTO DMP_SEARCH_DATA(id,result,mallId) insert_sql = f"""
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}') INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
""" VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
mysql_zdzs_proxy(insert_sql) """
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
continue
print("保存成功") print("保存成功")
updateCookie(driver) updateCookie(driver)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment