Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
checkData
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
checkData
Commits
6f9d10e5
Commit
6f9d10e5
authored
Jan 08, 2020
by
rico.liu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
init
parent
0e3b9fd7
Pipeline
#130
failed with stages
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
236 additions
and
0 deletions
+236
-0
checkData.py
checkData.py
+236
-0
No files found.
checkData.py
0 → 100644
View file @
6f9d10e5
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 8 11:00:57 2020
@author: rico
"""
from
lxml
import
etree
import
re
import
requests
import
json
def
get_reponse
(
session
,
url
,
headers
):
'''
deal timeout request
'''
network_status
=
True
try
:
response
=
session
.
get
(
url
,
headers
=
headers
,
timeout
=
5
)
if
response
.
status_code
==
200
:
return
response
except
:
network_status
=
False
if
network_status
==
False
:
'''timeout'''
for
i
in
range
(
1
,
10
):
print
(
'请求超时,第
%
s次重复请求'
%
i
)
try
:
response
=
session
.
get
(
url
,
headers
=
headers
,
timeout
=
5
)
if
response
.
status_code
==
200
:
return
response
except
:
continue
return
-
1
def
checkData
(
check_data
):
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
session
=
requests
.
Session
()
print
(
'共'
+
str
(
len
(
check_data
))
+
'条数据待审核'
)
result
=
[]
id_all
=
[]
sku_list
=
[]
name_list
=
[]
url_list
=
[]
source_list
=
[]
for
i
in
range
(
len
(
check_data
)):
df
=
check_data
.
loc
[
i
]
date_id
=
df
[
'id'
]
sku
=
df
[
'sku'
]
name
=
df
[
'name'
]
main_url
=
df
[
'url'
]
.
strip
()
source
=
df
[
'source'
]
print
(
main_url
)
if
"jd"
in
str
(
main_url
):
r
=
get_reponse
(
session
,
main_url
,
headers
)
html
=
etree
.
HTML
(
r
.
text
)
ziying
=
html
.
xpath
(
"//div[@class='name goodshop EDropdown']/em/text()"
)
if
"自营"
in
str
(
ziying
):
name
=
html
.
xpath
(
"//div[@class='sku-name']/text()"
)
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"军迷"
in
str
(
name
))
or
(
"携行具"
in
str
(
name
)):
print
(
"定制/专用类产品暂不通过"
)
result
.
append
(
"定制/专用类产品暂不通过"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"通过"
)
result
.
append
(
"通过"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"非自营,请按要求提供在销渠道证明"
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
elif
"gome"
in
str
(
main_url
):
#try_ = session.get(main_url,headers=headers)
main_url_1
=
re
.
findall
(
".cn/(.*?).html"
,
main_url
)[
0
]
main_url_
=
'https://item.gome.com.cn/'
+
main_url_1
+
'.html'
r
=
get_reponse
(
session
,
main_url_
,
headers
)
html
=
etree
.
HTML
(
r
.
text
)
#content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
ziying
=
html
.
xpath
(
"//span[@class='identify']/text()"
)
if
len
(
ziying
)
==
1
:
name
=
html
.
xpath
(
"//*[@id='gm-prd-main']/div[1]/h1/text()"
)
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
)):
print
(
"定制/专用类产品暂不通过"
)
result
.
append
(
"定制/专用类产品暂不通过"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"通过"
)
result
.
append
(
"通过"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"非自营,请按要求提供在销渠道证明"
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
elif
"suning"
in
str
(
main_url
):
#sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
#main_url_ = 'http://product.suning.com/0000000000/10115687173.html'
#main_url = 'http://product.suning.com/0000000000/10530903341.html'
r
=
get_reponse
(
session
,
main_url
,
headers
)
html
=
etree
.
HTML
(
r
.
text
)
#daaa = r.text
#sn_price = df.价格
#sn_price = '58.00-558.00'
str2
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
ziying1
=
html
.
xpath
(
"//div[@class='proinfo-title']/h1/span/i/text()"
)
ziying2
=
html
.
xpath
(
"//h1[@id='itemDisplayName']/span/text()"
)
#youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i",daaa)
if
"自营"
in
ziying1
or
"自营"
in
ziying2
:
#daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json
=
f
'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json
=
get_reponse
(
session
,
url_json
,
headers
)
json_data
=
json
.
loads
(
response_json
.
text
)
itemDetail
=
json_data
[
"itemDetail"
]
try
:
isPublished
=
itemDetail
[
"isPublished"
]
except
:
isPublished
=
'0'
try
:
product_name
=
itemDetail
[
"cmmdtyTitle"
]
except
:
product_name
=
'满足要求名称'
if
(
"定制"
in
str
(
product_name
))
or
(
"防弹"
in
str
(
product_name
))
or
(
"射击"
in
str
(
product_name
))
\
or
(
"订制"
in
str
(
product_name
))
or
(
"卫星"
in
str
(
product_name
))
\
or
(
"靶"
in
str
(
product_name
))
or
(
"企业定制"
in
str
(
product_name
))
\
or
(
"军迷"
in
str
(
product_name
))
or
(
"携行具"
in
str
(
product_name
)):
result
.
append
(
"定制/专用类产品暂不通过"
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
id_all
.
append
(
date_id
)
else
:
str11
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
str22
=
html
.
xpath
(
"//input[@id='shop_code']/@value"
)[
0
]
str33
=
html
.
xpath
(
"//input[@name='procateCode']/@value"
)[
0
]
#默认收货地址为北京市丰台区
real_url
=
f
'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response
=
requests
.
get
(
real_url
)
sn_price
=
re
.
findall
(
'"promotionPrice":"(.*?)",'
,
price_response
.
text
)[
0
]
if
len
(
sn_price
)
!=
0
:
try
:
print
(
'通过'
)
sn_price
=
float
(
sn_price
)
#price.append(sn_price)
result
.
append
(
'通过'
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
except
:
print
(
'该链接无法定位到唯一商品'
)
result
.
append
(
'该链接无法定位到唯一商品'
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
'无货,请按要求提供在销渠道证明'
)
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
'非自营,请按要求提供在销渠道证明'
)
result
.
append
(
'非自营,请按要求提供在销渠道证明'
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"非自营,请按要求提供在销渠道证明"
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
print
(
f
"已经处理{i+1}条数据"
)
check_data
[
'审核意见'
]
=
result
check_data
[
'update_id'
]
=
id_all
check_data
[
'sku'
]
=
sku_list
check_data
[
'name'
]
=
name_list
check_data
[
'url'
]
=
url_list
check_data
[
'source'
]
=
source_list
return
check_data
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment