Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
checkData
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
checkData
Commits
4e536334
Commit
4e536334
authored
Mar 11, 2020
by
rico.liu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update stopword
parent
e8970176
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
11 deletions
+28
-11
checkData.py
checkData.py
+28
-11
No files found.
checkData.py
View file @
4e536334
...
...
@@ -10,6 +10,7 @@ from lxml import etree
import
re
import
requests
import
json
import
pymssql
def
get_response
(
session
,
url
,
headers
):
'''
...
...
@@ -37,6 +38,12 @@ def get_response(session,url,headers):
def
checkData
(
check_data
):
conn
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'admin@2018@)!*'
,
database
=
'zi_zh'
,
autocommit
=
True
)
cursor
=
conn
.
cursor
()
cursor
.
execute
(
'select stop_word from Stopwords'
)
data
=
(
cursor
.
fetchall
())
stopword_list
=
pd
.
DataFrame
(
data
,
columns
=
[
'stopword'
])[
'stopword'
]
.
tolist
()
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
session
=
requests
.
Session
()
if
check_data
.
empty
:
...
...
@@ -94,7 +101,13 @@ def checkData(check_data):
if
"自营"
in
str
(
ziying
):
name
=
html
.
xpath
(
"//div[@class='sku-name']/text()"
)
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"军迷"
in
str
(
name
))
or
(
"携行具"
in
str
(
name
))
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
))
or
(
"出版社"
in
str
(
brand
))
or
(
"书籍"
in
str
(
subcategory
))
or
(
"酒"
in
str
(
name
)
and
"酒精"
not
in
str
(
name
)):
flag
=
False
for
stopword
in
stopword_list
:
if
stopword
in
str
(
name
):
flag
=
True
break
if
flag
:
print
(
"定制/专用/书籍类产品暂不通过"
)
result
.
append
(
"定制/专用/书籍类产品暂不通过"
)
id_all
.
append
(
date_id
)
...
...
@@ -184,7 +197,12 @@ def checkData(check_data):
if
len
(
ziying
)
==
1
:
name
=
html
.
xpath
(
"//*[@id='gm-prd-main']/div[1]/h1/text()"
)
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
))
or
(
"出版社"
in
str
(
brand
))
or
(
"书籍"
in
str
(
subcategory
))
or
(
"酒"
in
str
(
name
)
and
"酒精"
not
in
str
(
name
)):
flag
=
False
for
stopword
in
stopword_list
:
if
stopword
in
str
(
name
):
flag
=
True
break
if
flag
:
print
(
"定制/专用/书籍类产品暂不通过"
)
result
.
append
(
"定制/专用/书籍类产品暂不通过"
)
id_all
.
append
(
date_id
)
...
...
@@ -237,7 +255,7 @@ def checkData(check_data):
elif
"suning"
in
str
(
main_url
):
try
:
#main_url = 'http://product.suning.com/0000000000/1
1673575307
.html'
#main_url = 'http://product.suning.com/0000000000/1
0643583782
.html'
sku
=
re
.
findall
(
".com/(.*?).html"
,
main_url
)[
0
]
main_url_
=
'https://product.suning.com/'
+
sku
+
'.html'
r
=
get_response
(
session
,
main_url_
,
headers
)
...
...
@@ -258,16 +276,15 @@ def checkData(check_data):
isPublished
=
itemDetail
[
"isPublished"
]
except
:
isPublished
=
'0'
product_
name
=
itemDetail
[
"cmmdtyTitle"
]
name
=
itemDetail
[
"cmmdtyTitle"
]
if
isPublished
==
'1'
:
if
'此款有货'
in
str
(
youhuo_
)
:
if
(
"定制"
in
str
(
product_name
))
or
(
"防弹"
in
str
(
product_name
))
or
(
"射击"
in
str
(
product_name
))
\
or
(
"订制"
in
str
(
product_name
))
or
(
"卫星"
in
str
(
product_name
))
\
or
(
"靶"
in
str
(
product_name
))
or
(
"企业定制"
in
str
(
product_name
))
\
or
(
"军迷"
in
str
(
product_name
))
or
(
"携行具"
in
str
(
product_name
))
\
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
))
\
or
(
"出版社"
in
str
(
brand
))
or
(
"书籍"
in
str
(
subcategory
))
\
or
(
"酒"
in
str
(
name
)
and
"酒精"
not
in
str
(
name
)):
flag
=
False
for
stopword
in
stopword_list
:
if
stopword
in
str
(
name
):
flag
=
True
break
if
flag
:
print
(
"定制/专用/书籍类产品暂不通过"
)
result
.
append
(
"定制/专用/书籍类产品暂不通过"
)
price_list
.
append
(
sn_price
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment