Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
checkData
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
checkData
Commits
36e2d265
Commit
36e2d265
authored
Jan 08, 2020
by
rico.liu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
6f9d10e5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
232 additions
and
118 deletions
+232
-118
checkData.py
checkData.py
+232
-118
No files found.
checkData.py
View file @
36e2d265
...
...
@@ -11,7 +11,7 @@ import re
import
requests
import
json
def
get_reponse
(
session
,
url
,
headers
):
def
get_re
s
ponse
(
session
,
url
,
headers
):
'''
deal timeout request
'''
...
...
@@ -47,6 +47,7 @@ def checkData(check_data):
name_list
=
[]
url_list
=
[]
source_list
=
[]
price_list
=
[]
for
i
in
range
(
len
(
check_data
)):
df
=
check_data
.
loc
[
i
]
...
...
@@ -55,10 +56,32 @@ def checkData(check_data):
name
=
df
[
'name'
]
main_url
=
df
[
'url'
]
.
strip
()
source
=
df
[
'source'
]
price
=
df
[
'price'
]
print
(
main_url
)
if
"jd"
in
str
(
main_url
):
r
=
get_reponse
(
session
,
main_url
,
headers
)
try
:
sku
=
main_url
.
split
(
'/'
)[
-
1
]
.
split
(
'.'
)[
0
]
#获取价格
url
=
"https://p.3.cn/prices/mgets?skuIds="
+
str
(
sku
)
r
=
get_response
(
session
,
url
,
headers
)
.
json
()
jd_price
=
r
[
0
][
'p'
]
if
len
(
jd_price
)
==
0
:
jd_price
=
price
if
jd_price
==
'-1.00'
:
jd_price
=
price
price_list
.
append
(
jd_price
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
price_list
.
append
(
jd_price
)
#获取其他信息
main_url_
=
"https://item.jd.com/"
+
sku
+
".html"
r
=
get_response
(
session
,
main_url_
,
headers
)
html
=
etree
.
HTML
(
r
.
text
)
ziying
=
html
.
xpath
(
"//div[@class='name goodshop EDropdown']/em/text()"
)
...
...
@@ -73,6 +96,47 @@ def checkData(check_data):
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
#获取库存信息
url
=
"https://c0.3.cn/stock?skuId="
+
str
(
sku
)
+
"&area=1_2901_2906_0&cat=9987,653,655"
r
=
get_response
(
session
,
url
,
headers
)
if
r
==
-
1
:
print
(
"通过"
)
result
.
append
(
"通过"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
r
.
encoding
=
'gbk'
is_purchase
=
json
.
loads
(
r
.
text
)
try
:
if
"无货"
in
is_purchase
[
'stock'
][
'stockDesc'
]
or
"无货"
in
is_purchase
[
'stock'
][
'StockStateName'
]:
print
(
"无货,请按要求提供在销渠道证明"
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"通过"
)
result
.
append
(
"通过"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
except
:
if
"无货"
in
is_purchase
[
'StockStateName'
]:
print
(
"无货,请按要求提供在销渠道证明"
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"通过"
)
result
.
append
(
"通过"
)
...
...
@@ -89,15 +153,26 @@ def checkData(check_data):
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
except
:
print
(
"链接有误,请按要求提供在销渠道证明"
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
price_list
.
append
(
price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
elif
"gome"
in
str
(
main_url
):
#try_ = session.get(main_url,headers=headers)
try
:
main_url_1
=
re
.
findall
(
".cn/(.*?).html"
,
main_url
)[
0
]
main_url_
=
'https://item.gome.com.cn/'
+
main_url_1
+
'.html'
r
=
get_re
ponse
(
session
,
main_url_
,
headers
)
r
=
get_res
ponse
(
session
,
main_url_
,
headers
)
html
=
etree
.
HTML
(
r
.
text
)
#content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
content
=
html
.
xpath
(
"//script[contains(text(),'gomePrice')]/text()"
)[
0
]
gm_price
=
content
.
split
(
'gomePrice:"'
)[
1
:][
0
]
.
split
(
'"'
)[
0
]
price_list
.
append
(
gm_price
)
ziying
=
html
.
xpath
(
"//span[@class='identify']/text()"
)
if
len
(
ziying
)
==
1
:
...
...
@@ -111,7 +186,23 @@ def checkData(check_data):
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
#获取库存信息
sku
=
main_url_
.
split
(
'.html'
)[
0
]
.
split
(
'/'
)[
-
1
]
.
replace
(
'-'
,
'/'
)
url
=
"https://ss.gome.com.cn/item/v1/d/m/store/unite/"
+
str
(
sku
)
+
"/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r
=
get_response
(
session
,
url
,
headers
)
content
=
r
.
text
.
replace
(
'allStores('
,
''
)
content
=
content
.
replace
(
')'
,
''
)
content
=
json
.
loads
(
content
)
wuhuo
=
content
[
'result'
][
'stock'
][
'status'
]
if
wuhuo
==
False
:
print
(
"无货,请按要求提供在销渠道证明"
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"通过"
)
result
.
append
(
"通过"
)
...
...
@@ -128,94 +219,117 @@ def checkData(check_data):
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
except
:
print
(
"链接有误,请按要求提供在销渠道证明"
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
price_list
.
append
(
price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
elif
"suning"
in
str
(
main_url
):
#sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
#main_url_ = 'http://product.suning.com/0000000000/10115687173.html'
#main_url = 'http://product.suning.com/0000000000/10530903341.html'
r
=
get_reponse
(
session
,
main_url
,
headers
)
try
:
sku
=
re
.
findall
(
".com/(.*?).html"
,
main_url
)[
0
]
main_url_
=
'https://product.suning.com/'
+
sku
+
'.html'
r
=
get_response
(
session
,
main_url_
,
headers
)
html
=
etree
.
HTML
(
r
.
text
)
#daaa = r.text
#sn_price = df.价格
#sn_price = '58.00-558.00'
daaa
=
r
.
text
sn_price
=
price
str2
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
ziying1
=
html
.
xpath
(
"//div[@class='proinfo-title']/h1/span/i/text()"
)
ziying2
=
html
.
xpath
(
"//h1[@id='itemDisplayName']/span/text()"
)
#youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i",daaa)
youhuo_
=
re
.
findall
(
"id=
\"
ie7_onsale
\"
>(.*?)<i"
,
daaa
)
if
"自营"
in
ziying1
or
"自营"
in
ziying2
:
#daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json
=
f
'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json
=
get_re
ponse
(
session
,
url_json
,
headers
)
response_json
=
get_res
ponse
(
session
,
url_json
,
headers
)
json_data
=
json
.
loads
(
response_json
.
text
)
itemDetail
=
json_data
[
"itemDetail"
]
try
:
isPublished
=
itemDetail
[
"isPublished"
]
except
:
isPublished
=
'0'
try
:
product_name
=
itemDetail
[
"cmmdtyTitle"
]
except
:
product_name
=
'满足要求名称'
if
isPublished
==
'1'
:
if
'此款有货'
in
str
(
youhuo_
)
:
if
(
"定制"
in
str
(
product_name
))
or
(
"防弹"
in
str
(
product_name
))
or
(
"射击"
in
str
(
product_name
))
\
or
(
"订制"
in
str
(
product_name
))
or
(
"卫星"
in
str
(
product_name
))
\
or
(
"靶"
in
str
(
product_name
))
or
(
"企业定制"
in
str
(
product_name
))
\
or
(
"军迷"
in
str
(
product_name
))
or
(
"携行具"
in
str
(
product_name
)):
print
(
"定制/专用类产品暂不通过"
)
result
.
append
(
"定制/专用类产品暂不通过"
)
price_list
.
append
(
sn_price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
id_all
.
append
(
date_id
)
else
:
str11
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
str22
=
html
.
xpath
(
"//input[@id='shop_code']/@value"
)[
0
]
str33
=
html
.
xpath
(
"//input[@name='procateCode']/@value"
)[
0
]
#默认收货地址为北京市丰台区
real_url
=
f
'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response
=
requests
.
get
(
real_url
)
sn_price
=
re
.
findall
(
'"promotionPrice":"(.*?)",'
,
price_response
.
text
)[
0
]
if
len
(
sn_price
)
!=
0
:
try
:
print
(
'通过'
)
sn_price
=
float
(
sn_price
)
#price.append(sn_price)
result
.
append
(
'通过'
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
except
:
print
(
'该链接无法定位到唯一商品'
)
result
.
append
(
'该链接无法定位到唯一商品'
)
str11
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
str22
=
html
.
xpath
(
"//input[@id='shop_code']/@value"
)[
0
]
str33
=
html
.
xpath
(
"//input[@name='procateCode']/@value"
)[
0
]
real_url
=
f
'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response
=
requests
.
get
(
real_url
)
sn_price
=
re
.
findall
(
'"promotionPrice":"(.*?)",'
,
price_response
.
text
)[
0
]
if
len
(
sn_price
)
!=
0
:
price_list
.
append
(
sn_price
)
else
:
sn_price
=
price
price_list
.
append
(
sn_price
)
else
:
print
(
'无货,请按要求提供在销渠道证明'
)
sn_price
=
price
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
price_list
.
append
(
sn_price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
'无货,请按要求提供在销渠道证明'
)
sn_price
=
price
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
price_list
.
append
(
sn_price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
'非自营,请按要求提供在销渠道证明'
)
result
.
append
(
'非自营,请按要求提供在销渠道证明'
)
price_list
.
append
(
price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
except
:
print
(
"链接有误,请按要求提供在销渠道证明"
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
price_list
.
append
(
price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
source_list
.
append
(
source
)
else
:
print
(
"非自营,请按要求提供在销渠道证明"
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
print
(
"非三大电商,请按要求提供在销渠道证明"
)
result
.
append
(
"非三大电商,请按要求提供在销渠道证明"
)
price_list
.
append
(
price
)
id_all
.
append
(
date_id
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment