Commit 0ba97988 authored by huangziyu's avatar huangziyu

1

parent 87c61369
......@@ -81,7 +81,7 @@ def get_cookie():
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -127,7 +127,7 @@ def get_cookie():
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -12,6 +12,7 @@ from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import traceback
import pymysql
from lxml import etree
# 登陆账号
......@@ -92,7 +93,7 @@ def get_cookie(username=None, password=None, driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%0' limit 30000"
query_spider_sql = f"SELECT id,url,mallId,docId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%0' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -135,7 +136,7 @@ def get_cookie(username=None, password=None, driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......@@ -158,8 +159,9 @@ def get_cookie(username=None, password=None, driver=None):
"""
mysql_zdzs_proxy(insert_sql)
except Exception as e:
print(e)
print("保存代码异常")
print(e)
print(traceback.format_exc())
continue
print("保存成功")
pass
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%1' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%1' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%2' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%2' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%3' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%3' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%4' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%4' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%5' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%5' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%6' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%6' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%7' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%7' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%8' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%8' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
......@@ -92,7 +92,7 @@ def get_cookie(username=None,password=None,driver=None):
count = count + 1
print(f"需要登录")
time.sleep(1)
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and id like '%9' limit 30000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and id like '%9' limit 30000"
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -134,7 +134,7 @@ def get_cookie(username=None,password=None,driver=None):
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......@@ -174,7 +174,7 @@ if __name__ == "__main__":
driver = webdriver.Chrome(options=option)
option.add_argument("--disable-blink-features=AutomationControlled")
driver.set_window_size(1200, 600)
account_start_index = 9
account_start_index = 8
get_cookie(username=config.account_list[account_start_index]['username'],
password=config.account_list[account_start_index]['password'], driver=driver)
while True:
......
......@@ -361,8 +361,8 @@ def get_cookie():
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE data_batch ='202401251550270001' and ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' order by docID desc limit 10000"
# query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and data_batch ='202403051815460001' order by id" \
# f" limit 100000"
# query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '41' "
# query_spider_sql = f"-- SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' and (id like '%5' or id like '%6' or id like '%7' or id like '%8' or id like '%9' ) limit 10000"
query_spider_sql = f"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '115' "
query_spider = mysql_zdzs_proxy(query_spider_sql)
index = 0
len_ = len(query_spider)
......@@ -579,7 +579,7 @@ def get_cookie():
is_self_operate_div = ''
if x.xpath(".//div[@class='p-name p-name-type-2']"):
if x.xpath(".//div[@class='p-name p-name-type-2']") and x.xpath(".//div[@class='p-price']//i//text()"):
shop_name = x.xpath(".//span[@class='J_im_icon']/a/@title")[0] if x.xpath(
".//span[@class='J_im_icon']/a/@title") else ''
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment