Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
InitAPIData
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
InitAPIData
Commits
6efd5a86
Commit
6efd5a86
authored
Jan 19, 2020
by
rico.liu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
69fa1067
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
457 additions
and
427 deletions
+457
-427
checkData.py
checkData.py
+368
-365
matchSKU.py
matchSKU.py
+89
-62
No files found.
checkData.py
View file @
6efd5a86
#!/usr/bin/env python3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
"""
Created on Wed Jan 8 11:00:57 2020
Created on Wed Jan 8 11:00:57 2020
@author: rico
@author: rico
"""
"""
from
lxml
import
etree
from
lxml
import
etree
import
re
import
re
import
requests
import
requests
import
json
import
json
def
get_response
(
session
,
url
,
headers
):
def
get_response
(
session
,
url
,
headers
):
'''
'''
deal timeout request
deal timeout request
'''
'''
network_status
=
True
network_status
=
True
try
:
try
:
response
=
session
.
get
(
url
,
headers
=
headers
,
timeout
=
5
)
response
=
session
.
get
(
url
,
headers
=
headers
,
timeout
=
5
)
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
return
response
return
response
except
:
except
:
network_status
=
False
network_status
=
False
if
network_status
==
False
:
if
network_status
==
False
:
'''timeout'''
'''timeout'''
for
i
in
range
(
1
,
10
):
for
i
in
range
(
1
,
10
):
print
(
'请求超时,第
%
s次重复请求'
%
i
)
print
(
'请求超时,第
%
s次重复请求'
%
i
)
try
:
try
:
response
=
session
.
get
(
url
,
headers
=
headers
,
timeout
=
5
)
response
=
session
.
get
(
url
,
headers
=
headers
,
timeout
=
5
)
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
return
response
return
response
except
:
except
:
continue
continue
return
-
1
return
-
1
def
checkData
(
check_data
):
def
checkData
(
check_data
):
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
session
=
requests
.
Session
()
session
=
requests
.
Session
()
if
check_data
.
empty
:
if
check_data
.
empty
:
print
(
'今日无新增数据'
)
print
(
'今日无新增数据'
)
return
check_data
return
check_data
print
(
'共'
+
str
(
len
(
check_data
))
+
'条数据待审核'
)
print
(
'共'
+
str
(
len
(
check_data
))
+
'条数据待审核'
)
result
=
[]
result
=
[]
id_all
=
[]
id_all
=
[]
sku_list
=
[]
sku_list
=
[]
name_list
=
[]
name_list
=
[]
url_list
=
[]
url_list
=
[]
source_list
=
[]
source_list
=
[]
price_list
=
[]
price_list
=
[]
for
i
in
range
(
len
(
check_data
)):
for
i
in
range
(
len
(
check_data
)):
df
=
check_data
.
loc
[
i
]
df
=
check_data
.
loc
[
i
]
date_id
=
df
[
'id'
]
date_id
=
df
[
'id'
]
sku
=
df
[
'sku'
]
sku
=
df
[
'sku'
]
name
=
df
[
'name'
]
name
=
df
[
'name'
]
main_url
=
df
[
'url'
]
.
strip
()
main_url
=
df
[
'url'
]
.
strip
()
source
=
df
[
'source'
]
source
=
df
[
'source'
]
price
=
df
[
'price'
]
price
=
df
[
'price'
]
print
(
main_url
)
brand
=
df
[
'brand'
]
if
"jd"
in
str
(
main_url
):
subcategory
=
df
[
'subcategory'
]
try
:
print
(
main_url
)
sku
=
main_url
.
split
(
'/'
)[
-
1
]
.
split
(
'.'
)[
0
]
if
"jd"
in
str
(
main_url
):
#获取价格
try
:
url
=
"https://p.3.cn/prices/mgets?skuIds="
+
str
(
sku
)
sku
=
main_url
.
split
(
'/'
)[
-
1
]
.
split
(
'.'
)[
0
]
r
=
get_response
(
session
,
url
,
headers
)
.
json
()
#获取价格
jd_price
=
r
[
0
][
'p'
]
url
=
"https://p.3.cn/prices/mgets?skuIds="
+
str
(
sku
)
r
=
get_response
(
session
,
url
,
headers
)
.
json
()
if
len
(
jd_price
)
==
0
:
jd_price
=
r
[
0
][
'p'
]
jd_price
=
price
if
jd_price
==
'-1.00'
:
if
len
(
jd_price
)
==
0
:
jd_price
=
price
jd_price
=
price
price_list
.
append
(
jd_price
)
if
jd_price
==
'-1.00'
:
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
jd_price
=
price
id_all
.
append
(
date_id
)
price_list
.
append
(
jd_price
)
sku_list
.
append
(
sku
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
price_list
.
append
(
jd_price
)
source_list
.
append
(
source
)
#获取其他信息
else
:
main_url_
=
"https://item.jd.com/"
+
sku
+
".html"
price_list
.
append
(
jd_price
)
r
=
get_response
(
session
,
main_url_
,
headers
)
#获取其他信息
html
=
etree
.
HTML
(
r
.
text
)
main_url_
=
"https://item.jd.com/"
+
sku
+
".html"
ziying
=
html
.
xpath
(
r
=
get_response
(
session
,
main_url_
,
headers
)
"//div[@class='name goodshop EDropdown']/em/text()"
)
html
=
etree
.
HTML
(
r
.
text
)
if
"自营"
in
str
(
ziying
):
ziying
=
html
.
xpath
(
name
=
html
.
xpath
(
"//div[@class='name goodshop EDropdown']/em/text()"
)
"//div[@class='sku-name']/text()"
)
if
"自营"
in
str
(
ziying
):
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"军迷"
in
str
(
name
))
or
(
"携行具"
in
str
(
name
))
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
)):
name
=
html
.
xpath
(
print
(
"定制/专用类产品暂不通过"
)
"//div[@class='sku-name']/text()"
)
result
.
append
(
"定制/专用类产品暂不通过"
)
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"军迷"
in
str
(
name
))
or
(
"携行具"
in
str
(
name
))
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
))
or
(
"出版社"
in
str
(
brand
))
or
(
"书籍"
in
str
(
subcategory
)):
id_all
.
append
(
date_id
)
print
(
"定制/专用/书籍类产品暂不通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"定制/专用/书籍类产品暂不通过"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
#获取库存信息
source_list
.
append
(
source
)
url
=
"https://c0.3.cn/stock?skuId="
+
str
(
sku
)
+
"&area=1_2901_2906_0&cat=9987,653,655"
else
:
r
=
get_response
(
session
,
url
,
headers
)
#获取库存信息
if
r
==
-
1
:
url
=
"https://c0.3.cn/stock?skuId="
+
str
(
sku
)
+
"&area=1_2901_2906_0&cat=9987,653,655"
print
(
"通过"
)
r
=
get_response
(
session
,
url
,
headers
)
result
.
append
(
"通过"
)
if
r
==
-
1
:
id_all
.
append
(
date_id
)
print
(
"通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"通过"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
r
.
encoding
=
'gbk'
source_list
.
append
(
source
)
is_purchase
=
json
.
loads
(
r
.
text
)
else
:
try
:
r
.
encoding
=
'gbk'
if
"无货"
in
is_purchase
[
'stock'
][
'stockDesc'
]
or
"无货"
in
is_purchase
[
'stock'
][
'StockStateName'
]:
is_purchase
=
json
.
loads
(
r
.
text
)
print
(
"无货,请按要求提供在销渠道证明"
)
try
:
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
if
"无货"
in
is_purchase
[
'stock'
][
'stockDesc'
]
or
"无货"
in
is_purchase
[
'stock'
][
'StockStateName'
]:
id_all
.
append
(
date_id
)
print
(
"无货,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
"通过"
)
source_list
.
append
(
source
)
result
.
append
(
"通过"
)
else
:
id_all
.
append
(
date_id
)
print
(
"通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"通过"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
except
:
url_list
.
append
(
main_url
)
if
"无货"
in
is_purchase
[
'StockStateName'
]:
source_list
.
append
(
source
)
print
(
"无货,请按要求提供在销渠道证明"
)
except
:
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
if
"无货"
in
is_purchase
[
'StockStateName'
]:
id_all
.
append
(
date_id
)
print
(
"无货,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
"通过"
)
source_list
.
append
(
source
)
result
.
append
(
"通过"
)
else
:
id_all
.
append
(
date_id
)
print
(
"通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"通过"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
"非自营,请按要求提供在销渠道证明"
)
source_list
.
append
(
source
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
else
:
id_all
.
append
(
date_id
)
print
(
"非自营,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
except
:
url_list
.
append
(
main_url
)
print
(
"链接有误,请按要求提供在销渠道证明"
)
source_list
.
append
(
source
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
except
:
price_list
.
append
(
price
)
print
(
"链接有误,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
price_list
.
append
(
price
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
elif
"gome"
in
str
(
main_url
):
source_list
.
append
(
source
)
try
:
main_url_1
=
re
.
findall
(
".cn/(.*?).html"
,
main_url
)[
0
]
elif
"gome"
in
str
(
main_url
):
main_url_
=
'https://item.gome.com.cn/'
+
main_url_1
+
'.html'
try
:
r
=
get_response
(
session
,
main_url_
,
headers
)
main_url_1
=
re
.
findall
(
".cn/(.*?).html"
,
main_url
)[
0
]
html
=
etree
.
HTML
(
r
.
text
)
main_url_
=
'https://item.gome.com.cn/'
+
main_url_1
+
'.html'
content
=
html
.
xpath
(
"//script[contains(text(),'gomePrice')]/text()"
)[
0
]
r
=
get_response
(
session
,
main_url_
,
headers
)
gm_price
=
content
.
split
(
'gomePrice:"'
)[
1
:][
0
]
.
split
(
'"'
)[
0
]
html
=
etree
.
HTML
(
r
.
text
)
price_list
.
append
(
gm_price
)
content
=
html
.
xpath
(
"//script[contains(text(),'gomePrice')]/text()"
)[
0
]
gm_price
=
content
.
split
(
'gomePrice:"'
)[
1
:][
0
]
.
split
(
'"'
)[
0
]
ziying
=
html
.
xpath
(
price_list
.
append
(
gm_price
)
"//span[@class='identify']/text()"
)
if
len
(
ziying
)
==
1
:
ziying
=
html
.
xpath
(
name
=
html
.
xpath
(
"//span[@class='identify']/text()"
)
"//*[@id='gm-prd-main']/div[1]/h1/text()"
)
if
len
(
ziying
)
==
1
:
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
)):
name
=
html
.
xpath
(
print
(
"定制/专用类产品暂不通过"
)
"//*[@id='gm-prd-main']/div[1]/h1/text()"
)
result
.
append
(
"定制/专用类产品暂不通过"
)
if
(
"定制"
in
str
(
name
))
or
(
"防弹"
in
str
(
name
))
or
(
"射击"
in
str
(
name
))
or
(
"订制"
in
str
(
name
))
or
(
"卫星"
in
str
(
name
))
or
(
"靶"
in
str
(
name
))
or
(
"企业定制"
in
str
(
name
))
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
))
or
(
"出版社"
in
str
(
brand
))
or
(
"书籍"
in
str
(
subcategory
)):
id_all
.
append
(
date_id
)
print
(
"定制/专用/书籍类产品暂不通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"定制/专用/书籍类产品暂不通过"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
#获取库存信息
source_list
.
append
(
source
)
sku
=
main_url_
.
split
(
'.html'
)[
0
]
.
split
(
'/'
)[
-
1
]
.
replace
(
'-'
,
'/'
)
else
:
url
=
"https://ss.gome.com.cn/item/v1/d/m/store/unite/"
+
str
(
sku
)
+
"/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
#获取库存信息
r
=
get_response
(
session
,
url
,
headers
)
sku
=
main_url_
.
split
(
'.html'
)[
0
]
.
split
(
'/'
)[
-
1
]
.
replace
(
'-'
,
'/'
)
content
=
r
.
text
.
replace
(
'allStores('
,
''
)
url
=
"https://ss.gome.com.cn/item/v1/d/m/store/unite/"
+
str
(
sku
)
+
"/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
content
=
content
.
replace
(
')'
,
''
)
r
=
get_response
(
session
,
url
,
headers
)
content
=
json
.
loads
(
content
)
content
=
r
.
text
.
replace
(
'allStores('
,
''
)
wuhuo
=
content
[
'result'
][
'stock'
][
'status'
]
content
=
content
.
replace
(
')'
,
''
)
if
wuhuo
==
False
:
content
=
json
.
loads
(
content
)
print
(
"无货,请按要求提供在销渠道证明"
)
wuhuo
=
content
[
'result'
][
'stock'
][
'status'
]
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
if
wuhuo
==
False
:
id_all
.
append
(
date_id
)
print
(
"无货,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"无货,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
"通过"
)
source_list
.
append
(
source
)
result
.
append
(
"通过"
)
else
:
id_all
.
append
(
date_id
)
print
(
"通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"通过"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
"非自营,请按要求提供在销渠道证明"
)
source_list
.
append
(
source
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
else
:
id_all
.
append
(
date_id
)
print
(
"非自营,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"非自营,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
except
:
url_list
.
append
(
main_url
)
print
(
"链接有误,请按要求提供在销渠道证明"
)
source_list
.
append
(
source
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
except
:
price_list
.
append
(
price
)
print
(
"链接有误,请按要求提供在销渠道证明"
)
id_all
.
append
(
date_id
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
price_list
.
append
(
price
)
name_list
.
append
(
name
)
id_all
.
append
(
date_id
)
url_list
.
append
(
main_url
)
sku_list
.
append
(
sku
)
source_list
.
append
(
source
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
elif
"suning"
in
str
(
main_url
):
source_list
.
append
(
source
)
try
:
elif
"suning"
in
str
(
main_url
):
sku
=
re
.
findall
(
".com/(.*?).html"
,
main_url
)[
0
]
try
:
main_url_
=
'https://product.suning.com/'
+
sku
+
'.html'
r
=
get_response
(
session
,
main_url_
,
headers
)
sku
=
re
.
findall
(
".com/(.*?).html"
,
main_url
)[
0
]
html
=
etree
.
HTML
(
r
.
text
)
main_url_
=
'https://product.suning.com/'
+
sku
+
'.html'
daaa
=
r
.
text
r
=
get_response
(
session
,
main_url_
,
headers
)
sn_price
=
price
html
=
etree
.
HTML
(
r
.
text
)
str2
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
daaa
=
r
.
text
ziying1
=
html
.
xpath
(
"//div[@class='proinfo-title']/h1/span/i/text()"
)
sn_price
=
price
ziying2
=
html
.
xpath
(
"//h1[@id='itemDisplayName']/span/text()"
)
str2
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
youhuo_
=
re
.
findall
(
"id=
\"
ie7_onsale
\"
>(.*?)<i"
,
daaa
)
ziying1
=
html
.
xpath
(
"//div[@class='proinfo-title']/h1/span/i/text()"
)
ziying2
=
html
.
xpath
(
"//h1[@id='itemDisplayName']/span/text()"
)
if
"自营"
in
ziying1
or
"自营"
in
ziying2
:
youhuo_
=
re
.
findall
(
"id=
\"
ie7_onsale
\"
>(.*?)<i"
,
daaa
)
url_json
=
f
'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json
=
get_response
(
session
,
url_json
,
headers
)
if
"自营"
in
ziying1
or
"自营"
in
ziying2
:
json_data
=
json
.
loads
(
response_json
.
text
)
url_json
=
f
'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
itemDetail
=
json_data
[
"itemDetail"
]
response_json
=
get_response
(
session
,
url_json
,
headers
)
try
:
json_data
=
json
.
loads
(
response_json
.
text
)
isPublished
=
itemDetail
[
"isPublished"
]
itemDetail
=
json_data
[
"itemDetail"
]
except
:
try
:
isPublished
=
'0'
isPublished
=
itemDetail
[
"isPublished"
]
product_name
=
itemDetail
[
"cmmdtyTitle"
]
except
:
if
isPublished
==
'1'
:
isPublished
=
'0'
if
'此款有货'
in
str
(
youhuo_
)
:
product_name
=
itemDetail
[
"cmmdtyTitle"
]
if
(
"定制"
in
str
(
product_name
))
or
(
"防弹"
in
str
(
product_name
))
or
(
"射击"
in
str
(
product_name
))
\
if
isPublished
==
'1'
:
or
(
"订制"
in
str
(
product_name
))
or
(
"卫星"
in
str
(
product_name
))
\
if
'此款有货'
in
str
(
youhuo_
)
:
or
(
"靶"
in
str
(
product_name
))
or
(
"企业定制"
in
str
(
product_name
))
\
if
(
"定制"
in
str
(
product_name
))
or
(
"防弹"
in
str
(
product_name
))
or
(
"射击"
in
str
(
product_name
))
\
or
(
"军迷"
in
str
(
product_name
))
or
(
"携行具"
in
str
(
product_name
))
\
or
(
"订制"
in
str
(
product_name
))
or
(
"卫星"
in
str
(
product_name
))
\
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
)):
or
(
"靶"
in
str
(
product_name
))
or
(
"企业定制"
in
str
(
product_name
))
\
print
(
"定制/专用类产品暂不通过"
)
or
(
"军迷"
in
str
(
product_name
))
or
(
"携行具"
in
str
(
product_name
))
\
result
.
append
(
"定制/专用类产品暂不通过"
)
or
(
"出版社"
in
str
(
name
))
or
(
"书籍"
in
str
(
name
))
\
price_list
.
append
(
sn_price
)
or
(
"出版社"
in
str
(
brand
))
or
(
"书籍"
in
str
(
subcategory
)):
id_all
.
append
(
date_id
)
print
(
"定制/专用/书籍类产品暂不通过"
)
sku_list
.
append
(
sku
)
result
.
append
(
"定制/专用/书籍类产品暂不通过"
)
name_list
.
append
(
name
)
price_list
.
append
(
sn_price
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
else
:
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
str11
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
source_list
.
append
(
source
)
str22
=
html
.
xpath
(
"//input[@id='shop_code']/@value"
)[
0
]
else
:
str33
=
html
.
xpath
(
"//input[@name='procateCode']/@value"
)[
0
]
real_url
=
f
'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
str11
=
html
.
xpath
(
"//input[@id='curPartNumber']/@value"
)[
0
]
price_response
=
requests
.
get
(
real_url
)
str22
=
html
.
xpath
(
"//input[@id='shop_code']/@value"
)[
0
]
sn_price
=
re
.
findall
(
'"promotionPrice":"(.*?)",'
,
price_response
.
text
)[
0
]
str33
=
html
.
xpath
(
"//input[@name='procateCode']/@value"
)[
0
]
if
len
(
sn_price
)
!=
0
:
real_url
=
f
'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_list
.
append
(
sn_price
)
price_response
=
requests
.
get
(
real_url
)
print
(
'通过'
)
sn_price
=
re
.
findall
(
'"promotionPrice":"(.*?)",'
,
price_response
.
text
)[
0
]
result
.
append
(
'通过'
)
if
len
(
sn_price
)
!=
0
:
id_all
.
append
(
date_id
)
price_list
.
append
(
sn_price
)
sku_list
.
append
(
sku
)
print
(
'通过'
)
name_list
.
append
(
name
)
result
.
append
(
'通过'
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
else
:
name_list
.
append
(
name
)
sn_price
=
price
url_list
.
append
(
main_url
)
price_list
.
append
(
sn_price
)
source_list
.
append
(
source
)
print
(
'该地区不销售(北京市丰台区)'
)
else
:
result
.
append
(
'该地区不销售(北京市丰台区)'
)
sn_price
=
price
id_all
.
append
(
date_id
)
price_list
.
append
(
sn_price
)
sku_list
.
append
(
sku
)
print
(
'该地区不销售(北京市丰台区)'
)
name_list
.
append
(
name
)
result
.
append
(
'该地区不销售(北京市丰台区)'
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
'无货,请按要求提供在销渠道证明'
)
source_list
.
append
(
source
)
sn_price
=
price
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
else
:
price_list
.
append
(
sn_price
)
print
(
'无货,请按要求提供在销渠道证明'
)
id_all
.
append
(
date_id
)
sn_price
=
price
sku_list
.
append
(
sku
)
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
name_list
.
append
(
name
)
price_list
.
append
(
sn_price
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
'无货,请按要求提供在销渠道证明'
)
source_list
.
append
(
source
)
sn_price
=
price
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
else
:
price_list
.
append
(
sn_price
)
print
(
'无货,请按要求提供在销渠道证明'
)
id_all
.
append
(
date_id
)
sn_price
=
price
sku_list
.
append
(
sku
)
result
.
append
(
'无货,请按要求提供在销渠道证明'
)
name_list
.
append
(
name
)
price_list
.
append
(
sn_price
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
else
:
name_list
.
append
(
name
)
print
(
'非自营,请按要求提供在销渠道证明'
)
url_list
.
append
(
main_url
)
result
.
append
(
'非自营,请按要求提供在销渠道证明'
)
source_list
.
append
(
source
)
price_list
.
append
(
price
)
else
:
id_all
.
append
(
date_id
)
print
(
'非自营,请按要求提供在销渠道证明'
)
sku_list
.
append
(
sku
)
result
.
append
(
'非自营,请按要求提供在销渠道证明'
)
name_list
.
append
(
name
)
price_list
.
append
(
price
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
except
:
name_list
.
append
(
name
)
print
(
"链接有误,请按要求提供在销渠道证明"
)
url_list
.
append
(
main_url
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
source_list
.
append
(
source
)
price_list
.
append
(
price
)
except
:
id_all
.
append
(
date_id
)
print
(
"链接有误,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"链接有误,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
price_list
.
append
(
price
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
else
:
url_list
.
append
(
main_url
)
print
(
"非三大电商,请按要求提供在销渠道证明"
)
source_list
.
append
(
source
)
result
.
append
(
"非三大电商,请按要求提供在销渠道证明"
)
price_list
.
append
(
price
)
else
:
id_all
.
append
(
date_id
)
print
(
"非三大电商,请按要求提供在销渠道证明"
)
sku_list
.
append
(
sku
)
result
.
append
(
"非三大电商,请按要求提供在销渠道证明"
)
name_list
.
append
(
name
)
price_list
.
append
(
price
)
url_list
.
append
(
main_url
)
id_all
.
append
(
date_id
)
source_list
.
append
(
source
)
sku_list
.
append
(
sku
)
name_list
.
append
(
name
)
url_list
.
append
(
main_url
)
print
(
f
"已经处理{i+1}条数据"
)
source_list
.
append
(
source
)
check_data
[
'审核意见'
]
=
result
print
(
f
"已经处理{i+1}条数据"
)
check_data
[
'update_id'
]
=
id_all
check_data
[
'sku'
]
=
sku_list
check_data
[
'name'
]
=
name_list
check_data
[
'审核意见'
]
=
result
check_data
[
'url'
]
=
url_list
check_data
[
'update_id'
]
=
id_all
check_data
[
'source'
]
=
source_list
check_data
[
'sku'
]
=
sku_list
check_data
[
'price'
]
=
price_list
check_data
[
'name'
]
=
name_list
check_data
[
'url'
]
=
url_list
return
check_data
check_data
[
'source'
]
=
source_list
check_data
[
'price'
]
=
price_list
return
check_data
matchSKU.py
View file @
6efd5a86
#!/usr/bin/env python3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
"""
Created on Tue Jan 7 15:26:43 2020
Created on Tue Jan 7 15:26:43 2020
@author: rico
@author: rico
"""
"""
import
pymssql
import
pymssql
import
pandas
as
pd
import
pandas
as
pd
'''
'''
DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
'''
'''
def
matchSKU
(
sku_list
,
frm
):
def
matchSKU
(
sku_list
,
frm
):
conn
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'admin@2018@)!*'
,
database
=
'ZI_DataBase'
)
conn
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'admin@2018@)!*'
,
database
=
'ZI_DataBase'
)
cursor
=
conn
.
cursor
()
cursor
=
conn
.
cursor
()
print
(
'正在删除库中产品状态为6的sku,稍等。。。'
)
#首先将库中错误状态产品的JD/SN/GM的SKU删除
print
(
'正在删除库中产品状态为6的sku,稍等。。。'
)
#首先将库中错误状态产品的JD/SN/GM的SKU删除
cursor
.
execute
(
"DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')"
)
cursor
.
execute
(
"DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')"
)
conn
.
commit
()
print
(
'正在删除库中重复的SKU,稍等。。。'
)
#需先删除库中重复的SKU,避免匹配相同的编码
print
(
'正在获取当前库中所有sku,请稍等.....'
)
#获取库中所有的SKU,以供检查
cursor
.
execute
(
"declare OperCursor Cursor for
\
try
:
SELECT productcode,sku,frm,count(*) ca FROM productcode_sku
\
get_all_sku
=
f
"select * from productcode_sku where frm in {frm}"
GROUP BY productcode,sku,frm
\
cursor
.
execute
(
get_all_sku
)
HAVING COUNT(*)>1
\
data_sku
=
(
cursor
.
fetchall
())
open OperCursor
\
data_sku_file
=
pd
.
DataFrame
(
data_sku
,
columns
=
[
'id'
,
'productcode'
,
'sku'
,
'skuname'
,
'createddate'
,
'frm'
])
declare @PRODUCTCODE as nvarchar(20)
\
except
:
declare @sku as nvarchar(50)
\
get_all_sku
=
f
"select * from productcode_sku where frm='{frm}'"
declare @frm as nvarchar(20)
\
cursor
.
execute
(
get_all_sku
)
declare @ca as int
\
data_sku
=
(
cursor
.
fetchall
())
declare @return_value as int
\
data_sku_file
=
pd
.
DataFrame
(
data_sku
,
columns
=
[
'id'
,
'productcode'
,
'sku'
,
'skuname'
,
'createddate'
,
'frm'
])
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca
\
IF @@fetch_status=0
\
print
(
'sku获取完毕'
)
delete from productcode_sku
\
conn
.
close
()
where id in (select top (@ca-1) id from productcode_sku
\
cursor
.
close
()
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)
\
sku_check
=
{}
\
for
sku
in
set
(
sku_list
):
while @@fetch_status=0
\
#print(sku)
begin
\
#sku = 100004460761
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca
\
if
str
(
sku
)
in
list
(
data_sku_file
[
'sku'
]):
IF @@fetch_status=0
\
try
:
delete from productcode_sku
\
product_no
=
[
target
for
sku_source
,
target
in
zip
(
list
(
data_sku_file
[
'sku'
]),
list
(
data_sku_file
[
'productcode'
]))
if
str
(
sku
)
==
sku_source
]
where id in (select top (@ca-1) id from productcode_sku
\
sku_check
[
f
'{sku}'
]
=
product_no
[
0
]
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)
\
except
:
end
\
sku_check
[
f
'{sku}'
]
=
'2'
#匹配上多个SKU,属于问题数据
close OperCursor
\
deallocate OperCursor"
)
else
:
conn
.
commit
()
if
frm
==
'SN'
:
print
(
'正在获取当前库中所有sku,请稍等.....'
)
#获取库中所有的SKU,以供检查
sku_add
=
'0000000000/'
+
str
(
sku
)
try
:
if
str
(
sku_add
)
in
list
(
data_sku_file
[
'sku'
]):
get_all_sku
=
f
"select * from productcode_sku where frm in {frm} and productcode not like '78
%
' "
try
:
cursor
.
execute
(
get_all_sku
)
product_no
=
[
target
for
sku_source
,
target
in
zip
(
list
(
data_sku_file
[
'sku'
]),
list
(
data_sku_file
[
'productcode'
]))
if
str
(
sku_add
)
==
sku_source
]
data_sku
=
(
cursor
.
fetchall
())
sku_check
[
f
'{sku}'
]
=
product_no
[
0
]
data_sku_file
=
pd
.
DataFrame
(
data_sku
,
columns
=
[
'id'
,
'productcode'
,
'sku'
,
'skuname'
,
'createddate'
,
'frm'
])
except
:
except
:
sku_check
[
f
'{sku}'
]
=
'2'
#匹配上多个SKU,属于问题数据
get_all_sku
=
f
"select * from productcode_sku where frm='{frm}' and productcode not like '78
%
' "
else
:
cursor
.
execute
(
get_all_sku
)
sku_check
[
f
'{sku}'
]
=
'0'
data_sku
=
(
cursor
.
fetchall
())
else
:
data_sku_file
=
pd
.
DataFrame
(
data_sku
,
columns
=
[
'id'
,
'productcode'
,
'sku'
,
'skuname'
,
'createddate'
,
'frm'
])
sku_check
[
f
'{sku}'
]
=
'0'
return
sku_check
print
(
'sku获取完毕'
)
conn
.
close
()
cursor
.
close
()
sku_check
=
{}
for
sku
in
set
(
sku_list
):
#print(sku)
#sku = 100004460761
if
str
(
sku
)
in
list
(
data_sku_file
[
'sku'
]):
try
:
product_no
=
[
target
for
sku_source
,
target
in
zip
(
list
(
data_sku_file
[
'sku'
]),
list
(
data_sku_file
[
'productcode'
]))
if
str
(
sku
)
==
sku_source
]
sku_check
[
f
'{sku}'
]
=
product_no
[
0
]
except
:
sku_check
[
f
'{sku}'
]
=
'2'
#匹配上多个SKU,属于问题数据
else
:
if
frm
==
'SN'
:
sku_add
=
'0000000000/'
+
str
(
sku
)
if
str
(
sku_add
)
in
list
(
data_sku_file
[
'sku'
]):
try
:
product_no
=
[
target
for
sku_source
,
target
in
zip
(
list
(
data_sku_file
[
'sku'
]),
list
(
data_sku_file
[
'productcode'
]))
if
str
(
sku_add
)
==
sku_source
]
sku_check
[
f
'{sku}'
]
=
product_no
[
0
]
except
:
sku_check
[
f
'{sku}'
]
=
'2'
#匹配上多个SKU,属于问题数据
else
:
sku_check
[
f
'{sku}'
]
=
'0'
else
:
sku_check
[
f
'{sku}'
]
=
'0'
return
sku_check
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment