Commit 8aa6d00e authored by rico.liu's avatar rico.liu

update whiteword

parent 4e536334
......@@ -11,6 +11,7 @@ import re
import requests
import json
import pymssql
import pandas as pd
def get_response(session,url,headers):
'''
......@@ -40,9 +41,9 @@ def checkData(check_data):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database= 'zi_zh',autocommit=True)
cursor = conn.cursor()
cursor.execute('select stop_word from Stopwords')
cursor.execute('select stop_word,white_word from Stopwords')
data = (cursor.fetchall())
stopword_list = pd.DataFrame(data,columns=['stopword'])['stopword'].tolist()
word_df = pd.DataFrame(data,columns=['stopword','whiteword'])
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
......@@ -101,9 +102,16 @@ def checkData(check_data):
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
flag = False
for stopword in stopword_list:
if stopword in str(name):
for stopword,whitewords in zip(list(word_df['stopword']),list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
......@@ -198,8 +206,14 @@ def checkData(check_data):
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
flag = False
for stopword in stopword_list:
if stopword in str(name):
for stopword,whitewords in zip(list(word_df['stopword']),list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
......@@ -280,8 +294,14 @@ def checkData(check_data):
if isPublished == '1':
if '此款有货' in str(youhuo_) :
flag = False
for stopword in stopword_list:
if stopword in str(name):
for stopword,whitewords in zip(list(word_df['stopword']),list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment