Commit 745aa944 authored by rico.liu's avatar rico.liu

init

parent cb5add1a
Pipeline #132 canceled with stages
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 8 11:00:57 2020
@author: rico
"""
from lxml import etree
import re
import requests
import json
def get_response(session,url,headers):
'''
deal timeout request
'''
network_status = True
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
network_status = False
if network_status == False:
'''timeout'''
for i in range(1, 10):
print('请求超时,第%s次重复请求' % i)
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
continue
return -1
def checkData(check_data):
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
print('共'+str(len(check_data))+'条数据待审核')
result = []
id_all = []
sku_list = []
name_list = []
url_list = []
source_list = []
price_list = []
for i in range(len(check_data)):
df = check_data.loc[i]
date_id = df['id']
sku = df['sku']
name = df['name']
main_url = df['url'].strip()
source = df['source']
price = df['price']
print(main_url)
if "jd" in str(main_url):
try:
sku = main_url.split('/')[-1].split('.')[0]
#获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = get_response(session,url,headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
jd_price = price
if jd_price == '-1.00':
jd_price = price
price_list.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
price_list.append(jd_price)
#获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = get_response(session,main_url_,headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("军迷"in str(name)) or ("携行具"in str(name)):
print("定制/专用类产品暂不通过")
result.append("定制/专用类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
#获取库存信息
url = "https://c0.3.cn/stock?skuId="+str(sku)+"&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session,url,headers)
if r == -1:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
r.encoding='gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in is_purchase['stock']['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "gome" in str(main_url):
try:
main_url_1 = re.findall(".cn/(.*?).html",main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = get_response(session,main_url_,headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
price_list.append(gm_price)
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)):
print("定制/专用类产品暂不通过")
result.append("定制/专用类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
#获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-','/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/"+str(sku)+"/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = get_response(session,url,headers)
content = r.text.replace('allStores(','')
content = content.replace(')','')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "suning" in str(main_url):
try:
sku = re.findall(".com/(.*?).html",main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = get_response(session,main_url_,headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = price
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i",daaa)
if "自营" in ziying1 or "自营" in ziying2:
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = get_response(session,url_json,headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
product_name = itemDetail["cmmdtyTitle"]
if isPublished == '1':
if '此款有货' in str(youhuo_) :
if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
or ("军迷"in str(product_name)) or ("携行具"in str(product_name)):
print("定制/专用类产品暂不通过")
result.append("定制/专用类产品暂不通过")
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result. append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result. append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result. append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result. append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('非自营,请按要求提供在销渠道证明')
result. append('非自营,请按要求提供在销渠道证明')
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非三大电商,请按要求提供在销渠道证明")
result.append("非三大电商,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
print(f"已经处理{i+1}条数据")
check_data['审核意见'] = result
check_data['update_id'] = id_all
check_data['sku'] = sku_list
check_data['name'] = name_list
check_data['url'] = url_list
check_data['source'] = source_list
check_data['price'] = price_list
return check_data
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 8 10:00:34 2020
@author: rico
"""
import pymssql
import pandas as pd
import datetime
#from pyforest import *
from matchSKU import matchSKU
from checkData import checkData
'''
JD_ZH/ZH_DL/ZH_DW/ZH_DZ/ZH_LXWL/ZH_OFS/ZH_SN/ZH_YHD/ZH_ZCSM
初始化API数据包含SKU排重、三大电商数据审核
'''
def initAPIData(base_frm):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database= f'{base_frm}',autocommit=True)
cursor = conn.cursor()
source = base_frm.replace('_','').replace('ZH','')
if source == 'JD':
pass
elif source == 'DL':
cursor.execute(f"insert into product_all(sku,name,brand,category,zi_subcategoryname,zi_subcategorycode,url,price,source,state)\
select distinct a.sku,a.name,a.brand,f.name'sourcecategoryname',d.name'subcategoryname',c.zi_subcategorycode,a.url,e.price,'{base_frm}',a.state from \
(select * from product_detail where sku not in (select sku from product_all ))a\
left join Product_Category f\
on a.category=f.categoryid\
left join (select distinct zi_subcategorycode,sourcesubcategory from ZI_DataBase.dbo.Product_Relation_Attribute_SubTitle where source='{source}') c\
on f.name=c.sourcesubcategory\
left join ZI_DataBase.dbo.zi_subcategory d\
on c.zi_subcategorycode=d.subcategorycode\
left join product_price e\
on a.sku=e.sku")
else:
cursor.execute(f"insert into product_all(sku,name,brand,category,zi_subcategoryname,zi_subcategorycode,url,price,source,state)\
select distinct a.sku,a.name,a.brand,a.category,d.name'subcategoryname',c.zi_subcategorycode,a.url,e.price,'{base_frm}',a.state from \
(select * from product_detail where sku not in (select sku from product_all ))a\
left join (select distinct zi_subcategorycode,sourcesubcategory from ZI_DataBase.dbo.Product_Relation_Attribute_SubTitle where source='{source}') c\
on a.category=c.sourcesubcategory\
left join ZI_DataBase.dbo.zi_subcategory d\
on c.zi_subcategorycode=d.subcategorycode\
left join product_price e\
on a.sku=e.sku")
if source in ['JD','SN','GM']:
#筛选出待审核的数据
cursor.execute("select id,sku,name,url,source,price from product_all where ziying is null and productcode is null")
data = (cursor.fetchall())
check_data = pd.DataFrame(data,columns= ['id','sku','name','url','source','price'])
#审核三大电商数据
check_data = checkData(check_data)
id_pass = []
id_no_ziying = []
id_dingzhi = []
id_other_nopass = []
print('补全价格')
for id_name,suggestion,price in zip(check_data['update_id'], check_data['审核意见'],check_data['price']):
cursor.execute( f"update product_all set price='{price}' where id= '{id_name}'")
conn.commit()
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_no_ziying.append(id_name)
elif suggestion == '定制/专用类产品暂不通过':
id_dingzhi.append(id_name)
else:
id_other_nopass.append(id_name)
print('更新数据库~')
for id_num in id_pass:
cursor.execute( f"update product_all set ziying='1',dingzhi='1',state='1' where id= '{id_num}'")
conn.commit()
for i in id_no_ziying:
cursor.execute( f"update product_all set ziying='2',dingzhi='1',state='2' where id='{i}'")
conn.commit()
for i in id_dingzhi:
cursor.execute( f"update product_all set ziying='1',dingzhi='2',state='2' where id='{i}'")
conn.commit()
for i in id_other_nopass:
cursor.execute( f"update product_all set ziying='2',dingzhi='1',state='2' where id='{i}'")
conn.commit()
#SKU排重
cursor.execute("select distinct sku from product_all where productcode is null and state ='1'")
data = (cursor.fetchall())
sku_list = pd.DataFrame(data,columns= ['sku'])['sku'].tolist()
sku_check = matchSKU(sku_list,source)
print('sku排重中')
for k,v in sku_check.items():
if v == '0':
continue
else:
cursor.execute( f"update product_all set productcode='{v}' where sku='{k}'")
conn.commit()
print('sku排重完毕')
cursor.close()
print('完成')
print('更新问题数据库')
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database= 'zi_zh',autocommit=True)
cursor = conn.cursor()
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for sku,name,url,remark,source in zip(check_data['sku'],check_data['name'] ,check_data['url'],check_data['审核意见'],check_data['source']):
if remark == '通过':
continue
cursor.execute( f"insert into API_returnErrorData (sku,name,url,remark,create_time,source) VALUES ('{sku}','{name}','{url}','{remark}','{today}','{source}')")
cursor.close()
print('完成')
print('数据初始化处理完成')
return None
else:
#SKU排重
cursor.execute("select distinct sku from product_all where productcode is null")
data = (cursor.fetchall())
sku_list = pd.DataFrame(data,columns= ['sku'])['sku'].tolist()
sku_check = matchSKU(sku_list,source)
print('sku排重中')
for k,v in sku_check.items():
if v == '0':
continue
else:
cursor.execute( f"update product_all set productcode='{v}' where sku='{k}'")
conn.commit()
print('sku排重完毕')
print('更新数据库~')
cursor.execute("select id from product_all where ziying is null and productcode is null")
data = (cursor.fetchall())
check_data = pd.DataFrame(data,columns= ['id'])
id_list = check_data['id'].tolist()
for _id in id_list:
cursor.execute( f"update product_all set ziying='1',dingzhi='1',state='1' where id= '{_id}'")
conn.commit()
print('数据初始化处理完成')
return None
if __name__ == "__main__":
initAPIData('ZH_SN')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 7 15:26:43 2020
@author: rico
"""
import pymssql
import pandas as pd
'''
DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
'''
def matchSKU(sku_list,frm):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database='ZI_DataBase')
cursor = conn.cursor()
print('正在删除库中产品状态为6的sku,稍等。。。')#首先将库中错误状态产品的JD/SN/GM的SKU删除
cursor.execute("DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')")
conn.commit()
print('正在获取当前库中所有sku,请稍等.....')#获取库中所有的SKU,以供检查
try:
get_all_sku = f"select * from productcode_sku where frm in {frm}"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['id','productcode','sku','skuname','createddate','frm'])
except:
get_all_sku = f"select * from productcode_sku where frm='{frm}'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['id','productcode','sku','skuname','createddate','frm'])
print('sku获取完毕')
conn.close()
cursor.close()
sku_check = {}
for sku in set(sku_list):
#print(sku)
#sku = 100004460761
if str(sku) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
if frm == 'SN':
sku_add = '0000000000/'+ str(sku)
if str(sku_add) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku_add) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
sku_check[f'{sku}'] = '0'
else:
sku_check[f'{sku}'] = '0'
return sku_check
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment