Commit 62bb5b88 authored by rico.liu's avatar rico.liu

remove

parent fa056178
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 11:07:18 2020
@author: rico
"""
import requests
def zgc_api(func,data):
headers = {
'Connection': 'Keep-Alive'
}
key = 'eRo1#ZFHY5N&GEzV'
api = f"http://59.110.219.171:8000/{func}/"
print(api)
data.update({'key':key})
session = requests.session()
result = session.post(api,json=data,headers=headers,timeout=600).json()
return result
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 7 23:08:30 2020
@author: rico
"""
import pymssql
class MSSQL:
def __init__(self,host,db):
self.host = host
self.db = db
self._conn = self.GetConnect()
if(self._conn):
self._cur = self._conn.cursor()
#连接数据库
def GetConnect(self):
conn = False
if self.host == '123.57.45.119':
user = 'zgcprice'
pwd = 'zgcprice20200708'
elif self.host == '123.56.115.207':
user = 'zgcindex'
pwd = 'jiayou202006'
elif self.host == '10.0.120.131':
user = 'sa'
pwd = '1qaz@WSX'
elif self.host == '10.0.120.79':
user = 'sa'
pwd = '1qaz@WSX'
try:
conn = pymssql.connect(
host=self.host,
user=user,
password=pwd,
database =self.db,
autocommit=True
)
except Exception as err:
print("连接数据库失败, %s" % err)
else:
return conn
#获取连接信息
def GetConnectInfo(self):
print( "连接信息:" )
print( "服务器:%s , 用户名:%s , 数据库:%s " % (self.host,self.user,self.db))
def Close(self):
self._cur.close()
self._conn.close()
'''
ms = MSSQL('123.56.115.207','zdindex')
conn = ms._conn
cursor = ms._cur
cursor.execute(f"select top 10 * from zd_week_price")
cursor.fetchall()
ms.Close()
cursor.close()
conn.close()
'''
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 19 22:26:55 2020
@author: rico
"""
import pymssql
import pandas as pd
import time
import warnings
from public_function import Index
warnings.filterwarnings("ignore")
def delete_spu_price():
create_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
conn_zdindex = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'zdindex',autocommit=True)
cursor_zdindex = conn_zdindex.cursor()
cursor_zdindex.execute("select top 1 max(periods) from zd_entry_goods_price")
periods = str(cursor_zdindex.fetchone()[0])
cursor_zdindex.execute(f"delete from zd_electricity_price where periods = '{periods}' and mall_id = 'DS-SPU-HY'")
cursor_zdindex.execute(f"delete from zd_entry_goods_price where periods = '{periods}' and shop_id = 'ST-SPU-HY'")
cursor_zdindex.execute(f"delete from zd_purchase_price where periods = '{periods}' and purchase_id = 'ZC-SPU-HY'")
conn_zdindex.close()
print(f"SPU还原价删除完毕 -- {create_time}")
def get_configure_price():
#获取配件价格
try:
conn = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708', database='price_calculate',autocommit=True)
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select category_code,category_name,part,detail,price from configure_price")
configure_price = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
configure_price['price'] = pd.to_numeric(configure_price['price'])
cursor.close()
conn.close()
return configure_price
except:
print('连接失败,重新连接')
return get_configure_price()
def get_product_code(category_list):
#获取指定类别 SPU对应的SKU(new)
try:
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
#cursor.execute("select a.spuid,b.spuname,a.sku,a.skuname,c.name as categoryname from p_sku a \
# left join p_spu b \
# on a.spuid = b.id \
# left join p_category c \
# on b.categoryid = c.id \
# where a.state in (1,4) and c.name in (%s)" % ','.join(['%s'] * len(category_list)),tuple(category_list))
cursor.execute("select a.spuid,b.spuname,a.sku,a.skuname,c.name as categoryname,b.categoryid,b.brandid from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join p_category c \
on b.categoryid = c.id \
where a.state in (1,4) and c.name in (%s)" % ','.join(['%s'] * len(category_list)),tuple(category_list))
spu_sku_df = pd.DataFrame(cursor.fetchall(),columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return spu_sku_df
except:
print('连接失败,重新连接')
return get_product_code()
def get_attr_data(category_list):
#获取指定类别参数数据
try:
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute("select * from vw_sku_params where categoryname in (%s)" % ','.join(['%s'] * len(category_list)),tuple(category_list))
attr_data = pd.DataFrame(cursor.fetchall(),columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return attr_data
except:
print('连接失败,重新连接')
return get_attr_data(category_list)
def calculate_configure_price(cat_params_df,productCode,configure_price_df):
#计算配件价格
#productCode = '0101020260021'
filter_df = cat_params_df[cat_params_df['sku'] == productCode]
price = 0
for name,value in zip(list(filter_df['name']),list(filter_df['value'])):
if name not in list(configure_price_df['part']):
continue
try:
current_price = configure_price_df[(configure_price_df['part'] == name) & (configure_price_df['detail'] == value)]['price'].tolist()[0]
except:
current_price = 0
print("无该配件价格:",name,value)
price+=current_price
return price
def create_price_difference():
#创建SKU 与 SPU 配件差价表
category_list = ['笔记本','台式机','一体电脑']
category_id_list = ['672','673','12798']
category_dict = dict(zip(category_id_list,category_list))
configure_price_df = get_configure_price()
spu_sku_df = get_product_code(category_list)
params_df = get_attr_data(category_list)
#清空原表
conn = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708', database='price_calculate',autocommit=True)
cursor = conn.cursor()
cursor.execute("delete from configure_price_difference")
create_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
#sku_list = list()
#spuid_list = list()
#configure_difference_list = list()
for category_id in category_id_list:
category = category_dict[category_id]
cat_params_df = params_df[params_df['categoryname'] == category]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
sku_source_list = spu_sku_df[spu_sku_df['categoryname'] == category]['sku'].unique().tolist()
process_index = 0
index = Index()
for sku in sku_source_list:
print(index(process_index,len(sku_source_list)-1), end='%')
process_index+=1
skuname = str(spu_sku_df[spu_sku_df['sku'] == sku]['skuname'].tolist()[0]).replace("'","''")
categoryid = spu_sku_df[spu_sku_df['sku'] == sku]['categoryid'].tolist()[0]
brandid = spu_sku_df[spu_sku_df['sku'] == sku]['brandid'].tolist()[0]
spuid = spu_sku_df[spu_sku_df['sku'] == sku]['spuid'].tolist()[0]
configure_difference = calculate_configure_price(cat_params_df,sku,cat_configure_price_df)
cursor.execute(f"insert into configure_price_difference (sku,skuname,categoryid,brandid,spuid,price_difference,create_time) values ('{sku}','{skuname}','{categoryid}','{brandid}','{spuid}',{configure_difference},'{create_time}')")
#sku_list.append(sku)
#spuid_list.append(spuid)
#configure_difference_list.append(configure_difference)
print(f"{category}计算完成")
conn.close()
def create_price_difference_single(category,sku_list):
#创建SKU 与 SPU 配件差价表
category_list = [category]
configure_price_df = get_configure_price()
spu_sku_df = get_product_code(category_list)
params_df = get_attr_data(category_list)
#清空原表
conn = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708', database='price_calculate',autocommit=True)
cursor = conn.cursor()
#cursor.execute("delete from configure_price_difference")
create_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
#sku_list = list()
#spuid_list = list()
#configure_difference_list = list()
cat_params_df = params_df[params_df['categoryname'] == category]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
#sku_source_list = spu_sku_df[spu_sku_df['categoryname'] == category]['sku'].unique().tolist()
for sku in sku_list:
skuname = str(spu_sku_df[spu_sku_df['sku'] == sku]['skuname'].tolist()[0]).replace("'","''")
categoryid = spu_sku_df[spu_sku_df['sku'] == sku]['categoryid'].tolist()[0]
brandid = spu_sku_df[spu_sku_df['sku'] == sku]['brandid'].tolist()[0]
spuid = spu_sku_df[spu_sku_df['sku'] == sku]['spuid'].tolist()[0]
configure_difference = calculate_configure_price(cat_params_df,sku,cat_configure_price_df)
cursor.execute(f"select sku from configure_price_difference where sku = '{sku}'")
check_df = pd.DataFrame(cursor.fetchall(),columns=[tuple[0] for tuple in cursor.description])
if check_df.empty:
cursor.execute(f"insert into configure_price_difference (sku,skuname,categoryid,brandid,spuid,price_difference,create_time) values ('{sku}','{skuname}','{categoryid}','{brandid}','{spuid}',{configure_difference},'{create_time}')")
else:
cursor.execute(f"update configure_price_difference set price_difference = {configure_difference} , update_time = '{create_time}' where sku = '{sku}'")
#sku_list.append(sku)
#spuid_list.append(spuid)
#configure_difference_list.append(configure_difference)
print(f"{category}新增配件差价计算完成")
conn.close()
def check_configure_price(category,sku_list):
category_list = [category]
configure_price_df = get_configure_price()
params_df = get_attr_data(category_list)
cat_params_df = params_df[params_df['categoryname'] == category]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
#sku_source_list = spu_sku_df[spu_sku_df['categoryname'] == category]['sku'].unique().tolist()
for sku in sku_list:
filter_df = cat_params_df[cat_params_df['sku'] == sku]
for name,value in zip(list(filter_df['name']),list(filter_df['value'])):
if name not in list(cat_configure_price_df['part']):
continue
try:
current_price = cat_configure_price_df[(cat_configure_price_df['part'] == name) & (cat_configure_price_df['detail'] == value)]['price'].tolist()[0]
except:
print("无该配件价格:",category,name,value)
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 13 11:42:04 2020
@author: rico
"""
import hashlib
from API import zgc_api
from db import MSSQL
import pandas as pd
from public_function import *
'''
#加载数据
def LoadData():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute("select * from product_all where sku is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df
'''
def hash_name(name):
m = hashlib.md5(name.encode("utf8"))
hasn_name = m.hexdigest()
return hasn_name
def get_pre_deal():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute("select * from product_all where sku is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
#创建数据库链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
cursor_zi_new.execute("select * from p_brand where id not in (select pid from p_brand)")
brand_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description]).fillna('无品牌信息')
cursor_zi_new.execute("select * from p_category where id not in (select pid from p_category)")
category_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#hash产品名称至SKU
df['sku'] = df['name'].apply(lambda x: hash_name(x))
name_list = df['name'].tolist()
#获取预测类别结果
data = {
"category_info": {
"cust_name_list": name_list,
"url_name_list":name_list
}
}
res = zgc_api("GetPredictCategory",data)
df['predict_cat'] = res['category_list']
df['predict_cat_code'] = res['categoryCode_list']
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
id_ = row['id']
sku = row['sku']
name = row['name'].strip().upper()
print(index_(counter, len(df)-1), end='%')
counter += 1
pre_cat = row['predict_cat']
pre_cat_code = row['predict_cat_code']
zi_brandname = '未抽取到数据'
zi_brandcode = '未抽取到数据'
zi_categoryname = '未抽取到数据'
zi_categorycode = '未抽取到数据'
#抽取品牌
search_name = name.split(' ')[0]
for index_b,row_brand in brand_df.iterrows():
brandcode = row_brand['id']
brandname = row_brand['name']
cname = row_brand['cname']
ename = row_brand['ename'].upper()
if len(cname) == 1 or len(ename) == 1:
continue
if len(re.findall(r"[\u4e00-\u9fa5]", search_name)) == 0:
if ename in search_name:
zi_brandname = brandname
zi_brandcode = brandcode
break
else:
if cname in search_name:
zi_brandname = brandname
zi_brandcode = brandcode
break
#抽取类别
for index_c,row_cat in category_df.iterrows():
categorycode = row_cat['id']
categoryname = row_cat['name']
categoryname_list = row_cat['name'].split('/')
for category in categoryname_list:
if category in name:
zi_categoryname = categoryname
zi_categorycode = categorycode
break
if zi_categoryname == '未抽取到数据':
zi_categoryname = pre_cat
zi_categorycode = pre_cat_code
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = {zi_brandcode},\
zi_subcategoryname = '{zi_categoryname}',zi_subcategorycode = {zi_categorycode}, \
predict_category = '{pre_cat}',predict_category_code = {pre_cat_code},sku = '{sku}' where id = {id_}")
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 9 23:37:15 2020
@author: rico
"""
import pymssql
import pandas as pd
import os
import requests
import tensorflow as tf
from lxml import etree
import re
import datetime
import json
import time
#from text_moudle.run_cnn import name2subcategory as generl_name2subcategory
#from text_moudle_LXWL.run_cnn import name2subcategory as LXWL_name2subcategory
#print (os.getcwd())
class Index(object):
def __init__(self, number=50, decimal=2):
"""
:param decimal: 你保留的保留小数位
:param number: # 号的 个数
"""
self.decimal = decimal
self.number = number
self.a = 100/number # 在百分比 为几时增加一个 # 号
def __call__(self, now, total):
# 1. 获取当前的百分比数
percentage = self.percentage_number(now, total)
# 2. 根据 现在百分比计算
well_num = int(percentage / self.a)
# print("well_num: ", well_num, percentage)
# 3. 打印字符进度条
progress_bar_num = self.progress_bar(well_num)
# 4. 完成的进度条
result = "\r%s %s" % (progress_bar_num, percentage)
return result
def percentage_number(self, now, total):
"""
计算百分比
:param now: 现在的数
:param total: 总数
:return: 百分
"""
return round(now / total * 100, self.decimal)
def progress_bar(self, num):
"""
显示进度条位置
:param num: 拼接的 “#” 号的
:return: 返回的结果当前的进度条
"""
# 1. "#" 号个数
well_num = "#" * num
# 2. 空格的个数
space_num = " " * (self.number - num)
return '[%s%s]' % (well_num, space_num)
def match_sku(sku_list,frm):
'''
对供应商提供的链接进行排重
frm : DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
'''
#sku_list = ['11867713605']
#frm = 'SN'
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
conn_zi_database = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_DataBase',autocommit=True)
cursor_zi_database = conn_zi_database.cursor()
#print('正在删除库中产品状态为6的sku,稍等。。。')#首先将库中错误状态产品的JD/SN/GM的SKU删除
#cursor.execute("DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')")
#print('正在删除库中重复的SKU,稍等。。。')#需先删除库中重复的SKU,避免匹配相同的编码
'''
cursor.execute("declare OperCursor Cursor for\
SELECT productcode,sku,frm,count(*) ca FROM productcode_sku\
GROUP BY productcode,sku,frm\
HAVING COUNT(*)>1\
open OperCursor\
declare @PRODUCTCODE as nvarchar(20)\
declare @sku as nvarchar(50)\
declare @frm as nvarchar(20)\
declare @ca as int\
declare @return_value as int\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
\
while @@fetch_status=0\
begin\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
end\
close OperCursor\
deallocate OperCursor")
conn.commit()
'''
print('正在获取当前库中所有sku,请稍等.....')#获取库中所有的SKU,以供检查
try:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm in {frm} and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
except:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm='{frm}' and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
#获取op_productcode_sku
if frm == 'SN':
frm = 'SN-NEW'
get_all_sku = f"select productcode,sku from op_product_sku where frm='{frm}' and productcode not like '78%' "
cursor_zi_database.execute(get_all_sku)
data_sku = (cursor_zi_database.fetchall())
data_sku_file_op = pd.DataFrame(data_sku,columns=['productcode','sku'])
data_sku_file = pd.concat([data_sku_file,data_sku_file_op]).drop_duplicates()
data_sku_file['productcode'] = data_sku_file['productcode'].apply(lambda x:x.strip())
data_sku_file['sku'] = data_sku_file['sku'].apply(lambda x:x.strip())
print('sku获取完毕')
conn.close()
cursor.close()
conn_zi_database.close()
cursor_zi_database.close()
sku_check = {}
for sku in set(sku_list):
sku = sku.strip()
#print(sku)
#sku = 100004460761
if str(sku) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
if frm == 'SN' or frm == 'SN-NEW':
sku_add = '0000000000/'+ str(sku)
if str(sku_add) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku_add) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
sku_check[f'{sku}'] = '0'
else:
sku_check[f'{sku}'] = '0'
return sku_check
def supporturlDataDeal(source,batch):
'''
对反爬的数据进行审核排重
:return:
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'reverse_data',autocommit=True)
cursor = conn.cursor()
cursor.execute(f'select * from product where channel_url_validate is Null and 来源={source} and batch={batch}')
data = (cursor.fetchall())
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])[['product_id', 'channel_sku', 'product_name', 'channel_id', 'channel_product_id']]
df.rename(columns={'product_id': 'id', 'channel_sku': 'sku', 'product_name': 'name', 'channel_id': 'source','channel_product_id': 'url'}, inplace=True)
df['price'] = '0'
df_otherDS = df[(df['source'] != 'JD') & (df['source'] != 'GM') & (df['source'] != 'SN')]
for id_ in df_otherDS['id'].tolist():
cursor.execute(f"update product set channel_url_validate='通过',zgc_productcode='无' where product_id='{id_}'")
df_DS = df[(df['source'] == 'JD') | (df['source'] == 'GM') | (df['source'] == 'SN')].reset_index(drop=True)
# 审核三大电商
check_data = check_reverse_data(df_DS)
for id_, suggestion in zip(check_data['update_id'], check_data['审核意见']):
cursor.execute(f"update product set channel_url_validate='{suggestion}' where product_id='{id_}'")
# 三大电商SKU与库内排重
for source in list(df_DS['source'].unique()):
print(source + 'sku排重中')
sku_list = df_DS[df_DS['source'] == source]['sku'].tolist()
sku_check = match_sku(sku_list, source)
for k, v in sku_check.items():
if v == '0':
cursor.execute(f"update product set zgc_productcode='无' where channel_sku='{k}'")
else:
cursor.execute(f"update product set zgc_productcode='{v}' where channel_sku='{k}'")
print(source + 'sku排重完毕')
conn.close()
def check_reverse_data(check_data):
'''
审核爬到的数据
:param check_data: 需要审核的数据
:return: 审核完毕的数据
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
cursor.execute('select stop_word,white_word from Stopwords')
data = (cursor.fetchall())
word_df = pd.DataFrame(data, columns=['stopword', 'whiteword'])
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
if check_data.empty:
print('今日无新增数据')
return check_data
print('共' + str(len(check_data)) + '条数据待审核')
result = []
id_all = []
sku_list = []
name_list = []
url_list = []
source_list = []
price_list = []
for i in range(len(check_data)):
df = check_data.loc[i]
date_id = df['id']
sku = df['sku']
name = df['name']
main_url = df['url'].strip()
source = df['source']
price = df['price']
brand = df['brand']
subcategory = df['category']
print(main_url)
if "jd" in str(main_url):
try:
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = get_response(session, url, headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
jd_price = price
if jd_price == '-1.00':
jd_price = price
price_list.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
price_list.append(jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in is_purchase['stock'][
'StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "gome" in str(main_url):
try:
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
price_list.append(gm_price)
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = get_response(session, url, headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "suning" in str(main_url):
try:
# main_url = 'http://product.suning.com/0000000000/10643583782.html'
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = price
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = get_response(session, url_json, headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1':
if '此款有货' in str(youhuo_):
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
str_price = html.xpath("//a[@id='addCart2']/@sa-data")
str1 = re.findall("'prdid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str2 = re.findall("'shopid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str3 = html.xpath("//input[@name='procateCode']/@value")[0] # 为了拼接所需要的url,需要三个字段
real_url = f'https://pas.suning.com/nspcsale_0_{str1}_{str1}_{str2}_10_010_0100100_157122_1000000_9017_10106_Z001___{str3}.html?callback=pcData'
try:
price_response = requests.get(real_url, timeout=5)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
except:
sn_price = price
if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result.append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result.append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
#if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result. append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result. append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('非自营,请按要求提供在销渠道证明')
result.append('非自营,请按要求提供在销渠道证明')
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非三大电商,请按要求提供在销渠道证明")
result.append("非三大电商,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
print(f"已经处理{i + 1}条数据")
check_data['审核意见'] = result
check_data['update_id'] = id_all
check_data['sku'] = sku_list
check_data['name'] = name_list
check_data['url'] = url_list
check_data['source'] = source_list
check_data['price'] = price_list
conn.close()
return check_data
def get_response(session, url, headers):
'''
deal timeout request
'''
network_status = True
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
network_status = False
if network_status == False:
'''timeout'''
for i in range(1, 10):
print('请求超时,第%s次重复请求' % i)
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
continue
return -1
def check_data(fileName,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
reverse_conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='reverse_data',autocommit=True)
reverse_cursor = reverse_conn.cursor()
result = []
price = []
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
cursor.execute("select stop_word,white_word from stopwords")
stopwords = (cursor.fetchall())
word_list = pd.DataFrame(stopwords, columns=['stopword', 'white_word'])
#获取需要审核的政采的供应商的id
#cursor.execute("select id from users where frm is not null and frm != ''")
#need_check_id_from_zc = [i[0] for i in cursor.fetchall()]
# 审核链接
for i in range(len(fileName)):
df = fileName.loc[i]
if 'otherurl' in df.index.tolist():
if df['otherurl']:
try:
otherurl = eval(df['otherurl'])[0].strip()
except:
otherurl = df['otherurl'].strip()
else:
otherurl = ''
elif 'supporturl' in df.index.tolist():
other = eval(df['supporturl'].strip())
#other = df['supporturl'].strip()
if isinstance(other,list) and len(other) != 0:
otherurl = other[0]
else:
otherurl = other
else:
otherurl = ''
print('该渠道为API流程中的没有提供其他链接的渠道,其product_all表既没有other字段,也没有supporturl字段')
if df['url']:
try:
main_url = eval(df['url'])[0].strip()
except:
main_url = df['url'].strip()
else:
main_url = otherurl
if "jd" not in str(main_url) and "gome" not in str(main_url) and "suning" not in str(main_url):
main_url = otherurl
# if len(main_url) == 0 and len(otherurl) != 0:
# main_url = otherurl
print(f'开始处理:{main_url}')
if len(main_url) == 0:
print("其他销售渠道证明")
result.append("其他销售渠道证明")
price.append("其他销售渠道价格")
elif "jd" in str(main_url) or "gome" in str(main_url) or "suning" in str(main_url):
if "jd" in str(main_url):
if "i-item" in str(main_url):
jd_price = df['price']
price.append(jd_price)
result.append("通过")
else:
try:
try_ = session.get(main_url, headers=headers)
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = session.get(url, headers=headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
print('未获取到价格,使用供应商提交价格!!!')
jd_price = df['price']
if jd_price == '-1.00':
jd_price = df['price']
price.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
else:
print('价格为:', jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath("//div[@class='sku-name']/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or
# ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name))
# or ("军迷"in str(name)) or ("携行具"in str(name)) or ("酒"in str(name) and "酒精" not in str(name))
# or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name))
# or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name))
# or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name))
# or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
# name = "地图"
pass_word_jd = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_jd = '暂通过'
else:
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
if pass_word_jd == '暂通过':
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(
sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in \
is_purchase['stock']['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(jd_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "gome" in str(main_url):
# 获取价格
try:
try_ = session.get(main_url, headers=headers)
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("酒"in str(name) and "酒精" not in str(name)) or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name)) or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name)) or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name)) or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
pass_word_gm = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_gm = '暂通过'
else:
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
if pass_word_gm == '暂通过':
# print("定制/专用/含禁止上架关键词,产品暂不通过")
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = session.get(url, headers=headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(gm_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "suning" in str(main_url):
# sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
# main_url = 'https://product.suning.com/0000000000/11768660427.html?safp=d488778a.13701.productWrap.2&safc=prd.3.ssdsn_pic01-1_jz'
print(f'苏宁:{main_url}')
try:
try_ = session.get(main_url, headers=headers)
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = df.price
# sn_price = '58.00-558.00'
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = session.get(url_json, headers=headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1' or isPublished == '0':
if '此款有货' in str(youhuo_):
state = '有货'
# if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
# or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
# or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
# or ("军迷"in str(product_name)) or ("携行具"in str(product_name)) or ("酒"in str(product_name) and "酒精" not in str(product_name)) or ("军用" in str(product_name)) or ("作战" in str(product_name)) or ("部队" in str(product_name)) or ("迷彩" in str(product_name)) or ("军队" in str(product_name)) or ("陆军" in str(product_name)) or ("海军" in str(product_name)) or ("空军" in str(product_name)) or ("火箭军" in str(product_name)) or ("涉密" in str(product_name)) or ("保密" in str(product_name)) or ("补给单元" in str(product_name)) or ("书籍" in str(product_name)) or ("出版物" in str(product_name)) or ("地图" in str(product_name)):
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# price.append(sn_price)
pass_word_sn = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_sn = '暂通过'
else:
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
if pass_word_sn == '暂通过':
# else:
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
if len(str22) == 0:
str22 = '0000000000'
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_0000000000_10_010_0100100_501126_1000000_9017_10106_Z001___{str33}_1.0_0___000321NJB____0__.html?callback=pcData'
# real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"refPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) == 0:
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
else:
pass
else:
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) != 0:
try:
sn_price = float(sn_price)
price.append(sn_price)
result.append('通过')
except:
print(f'该链接获取的价格有问题:{sn_price},{main_url_}')
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
result.append('定制/专用/含禁止上架关键词,产品暂不通过')
price.append(sn_price)
else:
state = '无货,请按要求提供在销渠道证明'
sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
state = "无货,请按要求提供在销渠道证明"
# sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
result.append('非自营,请按要求提供在销渠道证明')
price.append(df['price'])
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
else:
#系统申请的、非三大电商的、有供应商的需要判断停用词
if 'otherurl' in df.index.tolist():
#main_url = "http://shanxi.gpmart.cn/productInfo/3300947.html"
# 去reverse_data获取对应id的产品名称和价格
print(main_url)
try:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
except:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}'")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
# 非系统申请、非三大电商
# 去reverse_data获取对应id的产品名称和价格
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['sku']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
if name_price:
print(f'name_price:{name_price},价格:{str(name_price[1])}')
name = name_price[0]
price_ = str(name_price[1])
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
print(f"已经处理{i + 1}条数据\n")
if len(result)!=len(price):
print("问题链接:",main_url)
break
print(f'审核意见:{len(result)}')
print(f'price:{len(price)}')
print(f'总条数:{len(fileName)}')
fileName['审核意见'] = result
fileName['price'] = price
urlss_all = []
for i in range(len(fileName)):
dw = fileName.loc[i]
if dw.url:
url_u = dw.url.strip()
if len(url_u) == 0:
pass
elif "jd" in str(url_u) or "gome" in str(url_u) or "suning" in str(url_u):
print(url_u)
try:
if "jd" in str(url_u):
skuu = url_u.split('/')[-1].split('.')[0]
elif "suning" in str(url_u):
skuu = re.findall(".com/(.*?).html", url_u)[0]
elif "gome" in str(url_u):
skuu = re.findall(".cn/(.*?).html", url_u)[0]
# print(skuu)
if str(skuu) in urlss_all:
fileName.loc[i, '审核意见'] = '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等'
else:
urlss_all.append(skuu)
except:
fileName.loc[i, '审核意见'] = '链接有误,请按要求提供在销渠道证明'
else:
pass
print('更新状态中。。。')
id_pass = []
id_nopass = []
id_nojd = []
id_noname = []
id_qita = []
id_sku = []
id_buxiaoshou = []
id_ljcw = []
id_suning = []
id_cfcf = []
id_noding = []
# elif suggestion == '苏宁产品链接略过,人工审核':
# id_suning.append(id_name)
# elif sugestion == '已在库中':
# id_sku.append(id_name)
if source == 'zi_zh':
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['product_zh_id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_zh set state='1' where id= '{id_num}'")
cursor.execute(f"update product_all set state='1' where product_zh_id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_zh set state='2',remark='无货,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='无货,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_zh set state='2',remark='非自营,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='非自营,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_zh set state='1',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where product_zh_id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到最终产品,存在多个价格' where product_zh_id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_zh set state='1',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='链接有误,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_zh set state='1',remark='其他销售渠道证明,需人工审核' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='其他销售渠道证明,需人工审核' where product_zh_id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_zh set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where product_zh_id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到唯一商品' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到唯一商品' where product_zh_id='{i}'")
conn.commit()
else:
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_all_api set state='1' where id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_all_api set state='2',remark='无货,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_all_api set state='2',remark='非自营,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_all_api set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_all_api set state='2',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_all_api set state='2',remark='其他销售渠道证明,需人工审核' where id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_all_api set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到唯一商品' where id='{i}'")
conn.commit()
print('更新完成~')
def get_zgc_brand_info(url_brand_list,brand_list):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute('select id,name,cname,ename from p_brand where id not in (select distinct(pid) from p_brand where pid <> 0)') # 不匹配主品牌
data = (cursor.fetchall())
brand_table = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates().fillna('EMPTY')
brand_table['name'] = brand_table['name'].apply(lambda x : str(x).strip().upper())
brand_table['cname'] = brand_table['cname'].apply(lambda x : str(x).strip().upper())
brand_table['ename'] = brand_table['ename'].apply(lambda x : str(x).strip().upper())
zgc_brand_list = []
zgc_brand_code_list = []
for brand,source_brand in zip(url_brand_list,brand_list):
combine_list = [brand,source_brand]
temp_name = []
temp_code = []
for brand in combine_list:
if brand:
if "联想" in str(brand):
brand = "联想"
#if "国产" in str(brand):
# brand = "错误信息"
else:
brand = '无信息'
brand = str(brand)
try:
country = brand.split('[')[1].split(']')[-2]
brand = brand.replace(country, '')
except IndexError:
pass
cn_res = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', brand))
if cn_res.isdigit():
cn_res = ''
cnbrand = cn_res
en_res = (''.join(re.findall(r'[a-zA-Z0-9-]', brand))).upper()
if en_res.isdigit():
en_res = ''
enbrand = en_res
if cnbrand == '':
cnbrand = '无对应'
if enbrand == '' or len(enbrand) == 1:
enbrand = '无对应'
findres = [[brandcode, name] for brandcode, enname,name in
zip(brand_table['id'].tolist(), brand_table['ename'].tolist(),brand_table['name'].tolist()) if enbrand == str(enname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
if brand == "联想":
findres = [[brandcode, name] for brandcode, cnname,enname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['ename'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname) or "THINKPAD" in str(enname)]
else:
findres = [[brandcode, name] for brandcode, cnname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
findres = [[brandcode, name] for brandcode,name in
zip(brand_table['id'].tolist(), brand_table['name'].tolist()) if brand == str(name)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
temp_name.append('无对应')
temp_code.append('无对应')
continue
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
temp_name.append('无对应')
temp_code.append('无对应')
zgc_brand_list.append(temp_name)
zgc_brand_code_list.append(temp_code)
conn.close()
return zgc_brand_list, zgc_brand_code_list
def get_zgc_classify_info(classify_list,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_DataBase')
cursor = conn.cursor()
if source == 'zi_zh':
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
#subcategory_name = '打印机'
zd_category_single = []
zd_category_code_single = []
cursor.execute(f"select a.subcategoryname,b.SubCategoryCode from op_zh_zd_type a \
left join VW_Relation_Property b \
on a.subcategoryname = b.SubCategoryName \
where a.second = '{subcategory_name}'")
data = (cursor.fetchall())
data = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None or "错误" in name:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
if zd_category_single:
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
else:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
return zd_category,zd_category_code
else:
source = source.replace('_', '').replace('ZH', '')
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
zd_category_single = []
zd_category_code_single = []
cursor.execute(f'''select b.name 'subcategoryname',a.ZI_SubCategoryCode 'SubCategoryCode' from Product_Relation_Attribute_SubTitle a LEFT JOIN ZI_SubCategory b
on a.zi_subcategorycode=b.subcategorycode
where a.source='{source}' and a.SourceSubCategory='{subcategory_name}' ''')
data = (cursor.fetchall())
data = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
return zd_category, zd_category_code
def remove_error_productcode(productcodes):
'''
排除productcode对应的子类或品牌包含错误两个字的productcode
:param productcodes: 排重之后的productcode列表
:return: 删除错误品牌之后的新productcode列表
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_DataBase', autocommit=True)
cursor = conn.cursor()
error_code = []
sql = '''
select distinct c.productcode,c.zi_brandname,d.name 'zi_subcategoryname' from
(select a.productcode,a.subcategorycode,b.name 'zi_brandname' from info_product a left join zi_brand b on a.brandcode=b.brandcode) c
left join zi_subcategory d on c.subcategorycode=d.subcategorycode
where c.productcode in (%s)
''' % ','.join(['%s'] * len(productcodes))
cursor.execute(sql,tuple(productcodes))
all_productcodes = cursor.fetchall()
for i in all_productcodes:
if '错误' in i[1] or '错误' in i[2]:
error_code.append(i[0])
return error_code
def remove_old_productcode(productcodes):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW', autocommit=True)
cursor = conn.cursor()
error_code = []
for code in productcodes:
cursor.execute(f"select sku from p_sku where sku = '{code}'")
check_df = pd.DataFrame(cursor.fetchall(), columns=[tuple[0] for tuple in cursor.description])
if check_df.empty:
error_code.append(code)
else:
pass
conn.close()
return error_code
def get_cn_number(char):
"""
判断字符串中,中文的个数
:param char: 字符串
:return:
"""
count = 0
for item in char:
if 0x4E00 <= ord(item) <= 0x9FA5:
count += 1
return count
def judge_unit(string):
unit_list = ['MM','CM', 'DM', 'ML', 'W', 'KW','KG','G','M','L','KBPS','MMM','P','V','KM']
error_unit_list = ['公斤','NULL','PVC','MONITOR','QIANG','I3','I5','I7','I9','LED','NO','寸','USB','OPTIX','OSN','ZXMP','BASE','ZFSD']
for error_unit in error_unit_list:
if error_unit in string:
return False
if get_cn_number(string) >= 3:
return False
elif not string[0].isdigit():
return True
m = 0
n = 0
for char in string:
if char.isdigit() or char == '.' or char == '-' or char == 'X':
m += 1
if n != 0:
m = m-1
break
elif char.isalpha():
n += 1
if string[m:m+n].upper() in unit_list:
return False
return True
def get_model(name, params, brand):
#name = '得力(deli)A4浅红色复印纸 彩色打印纸手工折纸卡纸 ZFSD 80g100张/包 7757'
#params = "{'产品型号': '得力7757', '产品详细参数': '得力\t彩色复印纸 A4 80g 210*297mm 100张/包 浅红 '}"
#brand = '[得力]'
try:
params = eval(params)
except:
params = ''
try:
name = name.split('(')[0] + name.split(')')[1]
except:
try:
name = name.split('(')[0] + name.split(')')[1]
except:
name = name
brand = str(brand)
brand_flag = 0
speical_brand_list = ['得力','世达','华为']
for special_brand in speical_brand_list:
if special_brand in brand:
brand_flag = 1
try:
brand_remove = re.findall(r"[A-Za-z0-9-]+", brand)[0].upper()
except IndexError:
brand_remove = '没有英文品牌!'
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
elif '产品型号' in params.keys():
param_model = params['产品型号']
elif '型号' in params:
param_model = params['型号']
else:
pass
param_model = param_model.upper()
param_model = param_model.replace(brand_remove,'')
if param_model:
if judge_unit(param_model) == False:
param_model = ''
if '*' in param_model:
param_model = ''
if '×' in param_model:
param_model = ''
if param_model.isdigit() and brand_flag !=1:
param_model = ''
else:
param_model = ''
name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-]+", name)))
for i in range(len(name_xinghao_lyst)):
name_xinghao_lyst[i] = name_xinghao_lyst[i].upper()
name_xinghao_lyst[i] = name_xinghao_lyst[i].replace(brand_remove,'')
if name_xinghao_lyst[i]:
if judge_unit(name_xinghao_lyst[i]) == False:
name_xinghao_lyst[i] = '?'
if '*' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = '?'
if '×' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = ''
if name_xinghao_lyst[i].isdigit() and brand_flag !=1:
name_xinghao_lyst[i] = '?'
else:
name_xinghao_lyst[i] = '?'
if len(name_xinghao_lyst) == 0:
return_model = param_model
else:
return_model = max(name_xinghao_lyst, key=len)
if param_model:
res_model = param_model
else:
res_model = return_model
#res_model = max([param_model,return_model], key=len)
if res_model == '?' or res_model == '':
res_model = '无型号'
if '/T' in res_model or '\T' in res_model:
res_model = res_model.replace('/T','').replace('\T','')
return res_model
'''
if len(name_xinghao_lyst) == 0:
#type_lyst.append(param_xinghao.upper())
if param_xinghao.isdigit():
param_xinghao = '无型号'
return param_xinghao
else:
return param_xinghao.upper()
else:
if param_xinghao in name_xinghao_lyst:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
xinghao_data = max(name_xinghao_lyst, key=len)
for xinghao in name_xinghao_lyst:
if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
xinghao_data = xinghao
break
if not judge_unit(xinghao_data):
xinghao_data == '无型号'
#type_lyst.append(xinghao_data.upper())
return xinghao_data
'''
def predict_category(name_list):
'''
获取预测类别结果
:param name_list: 原始名称
:return: 预测结果列表
'''
os.chdir("./text_moudle_LXWL")
tf.reset_default_graph()
model = LXWL_name2subcategory()
category_list = model.namelyst_predict(name_list)
os.chdir("../")
return category_list
def fuzzy_match(zgc_brand_code_list,model_list,price_list):
#获取库中所有产品信息
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou202006','ZI_NEW')
cursor = conn.cursor()
cursor.execute(f"select a.sku,a.skuname,b.brandid,c.index_price_wave from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join (select goods_id,index_price_wave from zdindex.dbo.zd_week_price where periods in (select top 1 periods from zdindex.dbo.zd_entry_goods_price)) c \
on a.sku = c.goods_id\
where a.state in ('1','4')")
data = cursor.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
res_name_list = []
res_code_list = []
for brand_code,model,price in zip(zgc_brand_code_list,model_list,price_list):
try:
price = int(float(price))
except:
price = 0
try:
brand_df = df_db[df_db['brandid'] == int(brand_code)]
except:
brand_df = df_db[df_db['brandid'] == str(brand_code)]
temp_df = pd.DataFrame()
temp_res_name_list = []
temp_res_code_list = []
temp_res_price_diff_list = []
for name,product_code,index_price in zip(brand_df['skuname'].tolist(),brand_df['sku'].tolist(),brand_df['index_price_wave'].tolist()):
if index_price == None:
index_price = 0
if str(model).strip().upper() in name.upper():
if product_code not in temp_res_code_list:
temp_res_name_list.append(name)
temp_res_code_list.append(product_code)
temp_res_price_diff_list.append(abs(price-index_price))
temp_df['name'] = temp_res_name_list
temp_df['code'] = temp_res_code_list
temp_df['diff'] = temp_res_price_diff_list
temp_df = temp_df.sort_values('diff',ascending=True).head(3)
res_name_list.append(temp_df['name'].tolist())
res_code_list.append(temp_df['code'].tolist())
return res_name_list,res_code_list
def standard_point_sku_name(category_name,name_rules,cat_subtitle_df):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 09:59:23 2020
@author: rico
"""
update nopoint_match_result set product = '' where productcode is Null
from API import zgc_api
from db import MSSQL
import pandas as pd
import time
from public_function import *
import uuid
import datetime
from delCalPrice_UpdatePriceDiff import check_configure_price,create_price_difference_single
#初始化数据
def InitializeData(path,channel_alias):
#加载上传数据
df_product = pd.read_excel(path,converters = {'供应商SKU':str})
df_product = df_product.fillna('无')
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
insert_data = list()
#实例化进度条
index_ = Index()
counter = 1
#上传产品数据
for index,row in df_product.iterrows():
print(index_(counter, len(df_product)-1), end='%')
counter += 1
category = row['三级品类']
brand = row['品牌']
sku = row['供应商SKU']
productname = row['商品名称']
channel = row['客户名称']
channel_alias = channel_alias
params_ori = str(row['参数值'])
batch = time.strftime("%Y-%m-%d",time.localtime())
#处理参数
if channel_alias == 'CQ':
try:
params = str(dict(zip([kv['key'].replace(':','') for kv in eval(row['参数值'])],[kv['value'] for kv in eval(row['参数值'])])))
except:
params = ""
else:
params = str(row['参数值'])
if params == "无":
params = "{}"
insert_data.append((sku,productname,brand,category,params,url,price,channel,channel_alias,channel_alias,batch,params_ori))
cursor_zi_service.executemany(f"insert into product_all (sku,name,brand,category,params,url,price,channel,channel_id,channel_alias,batch,params_ori) \
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",insert_data)
mssql.Close()
if df_category_map.empty:
pass
else:
#上传类别对应关系
data = {
"params_info": {
"channelAlias": f"{channel_alias}",
"cust_category_list": df_category_map['三级类别'].tolist(),
"category_list": df_category_map['指数末级类'].tolist(),
"categoryId_list": df_category_map['指数末级类编码'].tolist()
}
}
print(zgc_api("Stock-InCategoryRelationshipToServerInfo",data))
if df_attr_map.empty:
pass
else:
#上传参数项对应关系
data = {
"params_info": {
"channelAlias": f"{channel_alias}",
"category_list": df_attr_map['指数末级类'].tolist(),
"paramsAttr": df_attr_map['指数参数项'].tolist(),
"cust_paramsAttr_list": df_attr_map['参数项'].tolist()
}
}
print(zgc_api("Stock-InAttrRelationshipInfo",data))
#加载数据
def LoadData(batch,channel_alias,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
if batch == "all":
if mode == 'deal':
cursor_zi_service.execute("select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute("select * from product_all")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
if mode == 'deal':
cursor_zi_service.execute(f"select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute(f"select * from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df
#SKU排重
def SkuMatch(df,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#初始化参数
id_list = df['id'].tolist()
sku_list = df['sku'].tolist()
name_list = df['name'].tolist()
batch = df['batch'].unique().tolist()[0]
#获取SKU匹配结果
data = {
"params_info": {
"channelAlias": f"{channel_alias}",
"cust_sku_list": sku_list
}
}
res = zgc_api("GetSKUMatchResInfo",data)
#排重失败
if res['code'] == 0:
return res['msg']
#排重成功
res_dict = res['res_dict']
#实例化进度条
index_ = Index()
counter = 1
#更新排重信息
for id_,sku,name in zip(id_list,sku_list,name_list):
print(index_(counter, len(id_list)-1), end='%')
counter += 1
#查询历史数据
cursor_zi_service.execute(f"select zi_brandname,zi_brandcode,zi_subcategoryname,zi_subcategorycode,model,params_standard,name from product_all where id = (select max(id) from product_all where batch != '{batch}' and channel_alias = '{channel_alias}' and sku = '{sku}')")
df_his = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
if res_dict[sku] == False:
if df_his.empty:
continue
else:
zi_brandname = df_his['zi_brandname'].tolist()[0].replace("'","''")
zi_brandcode = str(df_his['zi_brandcode'].tolist()[0])
zi_subcategoryname = df_his['zi_subcategoryname'].tolist()[0]
zi_subcategorycode = df_his['zi_subcategorycode'].tolist()[0]
model = df_his['model'].tolist()[0]
try:
params_standard = df_his['params_standard'].tolist()[0].replace("'","''")
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = {zi_subcategorycode},model = '{model}',params_standard = '{params_standard}',state = '8' where id = {id_}")
except:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = {zi_subcategorycode},model = '{model}',state = '8' where id = {id_}")
else:
code = res_dict[sku]
if df_his.empty:
name_his = "往期该渠道无数据"
else:
name_his = df_his['name'].tolist()[0]
cursor_zi_new.execute(f"select skuname from p_sku where sku = '{code}'")
try:
input_name = cursor_zi_new.fetchone()[0]
except:
input_name = '未查询到数据'
if name == name_his:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{code}',new_name = '{input_name}' ,pic_state = '0',is_different = '0' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{code}',new_name = '{input_name}' ,pic_state = '0',is_different = '1' where id = {id_}")
mssql.Close()
mssql_new.Close()
def SkuMatchResult(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select id,name,brand,category,productcode,new_name,is_different,state from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state = '9'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
df.to_excel(f"{channel_alias}-{batch}sku排重结果确认.xlsx")
mssql.Close()
def UpdateSkuMatchResult(path):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
df = pd.read_excel(path,converters = {'productcode':str,'state':str})
for index,row in df.iterrows():
id_ = int(row['id'])
productcode = row['productcode']
new_name = row['new_name']
state = row['state']
if state == '' or state == 'None' or state == 'null':
cursor_zi_service.execute(f"upadte product_all set state = Null,productcode = Null,new_name = Null where id = {id_}")
else:
cursor_zi_service.execute(f"upadte product_all set state = '{state}',productcode = '{productcode}',new_name = '{new_name}' where id = {id_}")
mssql.Close()
#分析获取数据基本信息
def AnalyseBasicInfo(df):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#初始化参数
name_list = df['name'].tolist()
url_name_list = df['name'].tolist()
brand_list = df['brand'].tolist()
url_brand_list = df['brand'].tolist()
category_list = df['category'].tolist()
params_list = df['params'].tolist()
channelAlias_list = df['channel_alias'].tolist()
#获取预测类别结果
data = {
"category_info": {
"cust_name_list": name_list,
"url_name_list":url_name_list
}
}
res = zgc_api("GetPredictCategory",data)
df['predict_cat'] = res['category_list']
df['predict_cat_code'] = res['categoryCode_list']
#获取指数类别信息
data = {
"category_info": {
"cust_category_list": category_list,
"pre_category_list":df['predict_cat'].tolist()
}
}
res = zgc_api("GetZGCCategoryInfo",data)
df['zi_category'] = res['category_list']
df['zi_category_code'] = res['categoryCode_list']
#获取指数品牌信息
data = {
"brand_info": {
"cust_brand_list":brand_list,
"url_brand_list":url_brand_list,
"cust_name_list":name_list,
"url_name_list": url_name_list
}
}
res = zgc_api("GetZGCBrandInfo",data)
df['zi_brandname'] = res['brand_list']
df['zi_brandcode'] = res['brandId_list']
#获取型号信息
try:
params_list_ = [eval(params) for params in params_list]
except:
params_list_ = [{} for params in params_list]
_params_list_ = list()
for params in params_list_:
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
_params_list_.append({'认证型号':param_model})
elif '产品型号' in params.keys():
param_model = params['产品型号']
_params_list_.append({'产品型号':param_model})
elif '型号' in params:
param_model = params['型号']
_params_list_.append({'型号':param_model})
else:
_params_list_.append({})
else:
_params_list_.append({})
params_list_ = [str(params) for params in _params_list_]
data = {
"model_info": {
"channelAlias_list": channelAlias_list,
"cust_category_list":category_list,
"cust_name_list": name_list,
"url_name_list": url_name_list,
"cust_brand_list": brand_list,
"cust_params_list": params_list_
}
}
res = zgc_api("GetModelInfo",data)
df['model'] = res['model_list']
df['model_flag'] = res['modelFlag_list']
insert_data =list()
#实例化进度条
index_ = Index()
counter = 1
#更新基础信息
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
predict_category = row['predict_cat']
predict_category_code = row['predict_cat_code']
zi_subcategoryname = row['zi_category']
zi_subcategorycode = row['zi_category_code']
predict_result = 1 if predict_category == zi_subcategoryname else 0
zi_brandname = row['zi_brandname']
zi_brandcode = row['zi_brandcode']
#获取品牌对比结果
cust_name = str(row['name']).upper()
cust_brand = str(row['brand']).upper()
cn_name = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', zi_brandname))
en_name = (''.join(re.findall(r'[a-zA-Z0-9-]', zi_brandname))).upper()
if cn_name == '':
cn_name = '无数据'
if en_name == '':
en_name = '无数据'
if '新建品牌' in zi_brandname:
brand_match_result = '2'
elif (cn_name in cust_brand and cn_name in cust_name) or (en_name in cust_brand and en_name in cust_name):
brand_match_result = '1'
else:
brand_match_result = '0'
model = row['model']
modelFlag = row['model_flag']
_id = row['id']
insert_data.append((predict_category,predict_category_code,zi_subcategoryname,zi_subcategorycode,predict_result,zi_brandname,zi_brandcode,model,modelFlag,brand_match_result,_id))
cursor_zi_service.executemany("update product_all set predict_category = (%s) ,predict_category_code = (%s) ,zi_subcategoryname = (%s) ,zi_subcategorycode = (%s) ,predict_result = (%s) ,zi_brandname = (%s) ,zi_brandcode = (%s) ,model = (%s),model_flag = (%s),brand_match_result = (%s) where id = (%d)",insert_data)
mssql.Close()
#导出基础信息至excel
def ExportToExcelBasicConfirm(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select id,name,brand,zi_brandname,zi_brandcode,category,zi_subcategoryname,zi_subcategorycode,predict_category,predict_category_code,predict_result,model,model_flag,params,price,url,channel,channel_alias,productcode,remark from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
df.to_excel(f"{channel_alias}基础信息确认{batch}.xlsx")
mssql.Close()
#更新基础信息确认后数据
def UpdateBasicData(path):
#读取Excel数据
df = pd.read_excel(path,sheet_name = "Sheet1",converters={'zi_brandcode':str,'zi_subcategorycode':str})
df['zi_brandcode'] = df['zi_brandcode'].apply(lambda x:str(x))
df['zi_subcategorycode'] = df['zi_subcategorycode'].apply(lambda x:str(x))
df['remark'] = df['remark'].apply(lambda x:str(x))
#创建数据库链接
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#获取重点类列表
cursor_zi_new.execute("select category_name from important_category")
important_cat_list = pd.DataFrame((cursor_zi_new.fetchall()),columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#实例化进度条
index_ = Index()
counter = 1
#更新数据
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
id_ = row['id']
remark = row['remark']
if remark != 'nan':#驳回数据
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{row['remark']}' where id = {id_}")
continue
zi_brandname = row['zi_brandname'].strip().replace("'","''").replace("[","").replace("]","")
zi_brandcode = row['zi_brandcode'].strip().replace("?","").replace("[","").replace("]","")
zi_subcategoryname = row['zi_subcategoryname'].strip()
zi_subcategorycode = row['zi_subcategorycode'].strip().replace("?","")
model = str(row['model']).strip()
if zi_brandcode == 'nan':
cursor_zi_new.execute(f"select id from p_brand where name = '{zi_brandname}'")
zi_brandcode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategorycode == 'nan' and zi_subcategoryname != '该类别非中电类别':
cursor_zi_new.execute(f"select id from p_category where name = '{zi_subcategoryname}'")
zi_subcategorycode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategoryname in important_cat_list:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '1',model = '{model}' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '0',model = '{model}' where id = {id_}")
mssql_service.Close()
mssql_new.Close()
#获取重点类产品数据
def GetPointCategoryData(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where (state is Null or state = '8') and zi_subcategoryname in ('笔记本','台式机','一体电脑','复印纸') and batch = '{batch}' and channel_alias = '{channel_alias}'")
data = cursor_zi_service.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df_db
#解析重点类产品数据并导出
def GetPointCategoryDataDetail(df,channel_alias):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取属性规格项对应关系
cursor_zi_new.execute(f"select * from p_skusubtitle_out_map")
data = cursor_zi_new.fetchall()
subtitle_map_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data = cursor_zi_new.fetchall()
data_dict = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取系列子系列关系
cursor_zi_new.execute("select * from series_relationship")
data = cursor_zi_new.fetchall()
series_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
'''
#依据品牌+型号获取ZOL参数信息(标准参数项信息)
zol_params_list = []
for index,row in df.iterrows():
print(index)
category = row['zi_subcategoryname']
brand = row['zi_brandname']
model = row['model']
data = {
"params_info": {
"category": category,
"brand": brand,
"model": model
}
}
res = zgc_api("GetZOLParamsInfo",data)
zol_params_list.append(res['params_dict'])
df['ZOL_params'] = zol_params_list
'''
#df['ZOL_params'] = []
#标准化链接爬取参数项
zi_category_list = df['zi_subcategoryname'].unique().tolist()
params_res = list()
for zi_category in zi_category_list:
cat_df = df[df['zi_subcategoryname'] == zi_category]
params_list_temp = list()
for params in cat_df['params'].tolist():
params = params.replace('nan',"无")
for key in eval(params):
params_list_temp.append(key)
params_list = list(set(params_list_temp))
data = {
"params_info": {
"category": zi_category,
"cust_paramsAttr_list": params_list
}
}
res = zgc_api("GetZGCParamsInfo",data)
params_res.append(res['paramsAttr_dict'])
params_dict = dict(zip(zi_category_list,params_res))
params_standard_list = []
for index,row in df.iterrows():
category = row['zi_subcategoryname']
name = row['name']
brand = row['zi_brandname']
try:
params = eval(row['params'])
except:
params = {}
key_list = []
value_list = []
for key in params:
if params_dict[category][key] == '无参数项对应关系':
continue
else:
transform_key = params_dict[category][key]
value = params[key]
key_list.append(transform_key)
value_list.append(value)
params_std_dict = dict(zip(key_list, value_list))
#处理产品子系列问题
if category in ['笔记本','台式机','一体电脑']:
flag = False
temp_series_list = []
for child_series,series in zip(series_df[(series_df['子类名称'] == category) & (series_df['品牌名称'] == brand)]['产品子系列'].tolist(),series_df[(series_df['子类名称'] == category) & (series_df['品牌名称'] == brand)]['产品系列'].tolist()):
if child_series.upper() in name.upper():
temp_dict = {"产品系列":series,"产品子系列":child_series}
flag = True
break
elif series.upper() in name.upper():
temp_series_list.append(series)
if flag:
params_std_dict.update(temp_dict)
else:
if temp_series_list:
params_std_dict.update({'产品系列':temp_series_list[0]})
params_standard_list.append(params_std_dict)
print(index+1)
df['params_standard_create'] = params_standard_list
writer = pd.ExcelWriter(f"{channel_alias}重点类参数确认.xlsx")
index = 0
for category in df['zi_subcategoryname'].unique().tolist():
#获取每一个品类的dataframe
cat_df = df[df['zi_subcategoryname'] == category]
cat_df['remark'] = cat_df['remark'].apply(lambda x:str(x))
#获取这个类的属性规格项
single_subtitle_df = subtitle_df[subtitle_df['name'] == category]
#获取这个类的必填属性规格(CPU属性无需填写,系统在建库时自动给出)
requier_param_list = single_subtitle_df[single_subtitle_df['require_param'] == '1']['subtitle'].tolist()
try:
requier_param_list.remove('CPU属性')
except:
pass
#获取这个类的标准属性规格
#standard_param_list = single_subtitle_df[single_subtitle_df['standard_param'] == '1']['subtitle'].tolist()
#获取这个类别的数据字典
cat_data_dict = data_dict[data_dict['categoryname'] == category]
cat_data_dict['stdvalue'] = cat_data_dict['stdvalue'].apply(lambda x:str(x).strip())
cat_data_dict['primitive'] = cat_data_dict['primitive'].apply(lambda x:str(x).strip().upper())
requier_param_var_list = []
for requier_param in requier_param_list:
requier_param_var_list.append(requier_param.replace('/','_').replace('(','_').replace(')','').replace('(','_').replace(')',''))
for requier_param_var in requier_param_var_list:
exec("%s_list=[]"%requier_param_var)
error_flag_list = [0,"0",1,"1","-","/","\\","无","其他",'--','---']
id_list = list()
url_params_list = list()
#zol_params_list = list()
for index,row in cat_df.iterrows():
url_params_dict = row['params_standard_create']
#zol_params_dict = row['ZOL_params']
if row['remark'] != 'nan' and row['remark'] != 'None' :
continue
id_ = row['id']
#category = row['zi_subcategoryname']
id_list.append(id_)
temp_key_list = list()
temp_url_value_list = list()
#temp_zol_value_list = list()
for db_param in requier_param_list:
if db_param in ['产品型号','CPU属性']:
continue
temp_key_list.append(db_param)
try:
url_value = url_params_dict[db_param]
except:
url_value = ''
'''
try:
zol_value = zol_params_dict[db_param]
except:
zol_value = ''
'''
temp_url_value_list.append(url_value)
#temp_zol_value_list.append(zol_value)
url_params_list.append(dict(zip(temp_key_list,temp_url_value_list)))
#zol_params_list.append(dict(zip(temp_key_list,temp_zol_value_list)))
print(f"{category}数据准备完成")
data ={
"params_info": {
"category": category,
"id_list":id_list,
"params_dict_list": url_params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
url_res_dict = res['paramsValue_dict']
'''
data ={
"params_info": {
"category": category,
"id_list":id_list,
"params_dict_list": zol_params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
zol_res_dict = res['paramsValue_dict']
'''
for index,row in cat_df.iterrows():
_id = str(row['id'])
try:
params_standard = eval(row['params_standard'])
except:
params_standard = '无历史参数信息'
if params_standard == '无历史参数信息':
for requier_param,requier_param_var in zip(requier_param_list,requier_param_var_list):
try:
url_value,flag = tuple(url_res_dict[_id][requier_param].items())[0]
url_value = url_value.replace("\t"," ").replace("\n"," ")
except:
url_value = ''
#if url_value == '' or flag == False:
if url_value == '':
exec("%s_list.append('%s')"%(requier_param_var,"暂无数据"))
'''
不使用zol数据
try:
zol_value,flag = tuple(zol_res_dict[_id][requier_param].items())[0]
zol_value = zol_value.replace("\t"," ").replace("\n"," ")
except:
zol_value = ''
if zol_value == '' or flag == False or zol_value == '爬取不到数据':
exec("%s_list.append('%s')"%(requier_param_var,"暂无数据"))
else:
exec("%s_list.append('%s')"%(requier_param_var,zol_value))
'''
else:
exec("%s_list.append('%s')"%(requier_param_var,url_value))
else:
for requier_param,requier_param_var in zip(requier_param_list,requier_param_var_list):
exec("%s_list.append('%s')"%(requier_param_var,params_standard[requier_param]))
for requier_param,requier_param_var in zip(requier_param_list,requier_param_var_list):
exec("cat_df['%s'] = %s_list"%(requier_param,requier_param_var))
columns_list = ['id','name','url_name','brand','url_brand','zi_brandname','zi_brandcode','zi_subcategoryname','zi_subcategorycode','params','model','url','channel','channel_alias','remark','productcode','new_name'] + requier_param_list
export_df = cat_df[columns_list]
#export_df['产品详细参数'] = detail_filled_params_list
category = category.replace('/','_')
export_df.to_excel(writer,f"{category}参数数据")
writer.save()
mssql.Close()
#获取非重点类产品数据
def GetNonpointCategoryData(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where (state is Null or state = '8') and zi_subcategoryname not in ('笔记本','台式机','一体电脑','复印纸') and batch = '{batch}' and channel_alias = '{channel_alias}'")
data = cursor_zi_service.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df_db
#获取非重点类数据详情
def GetNonpointCategoryDataDetail(df_):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#初始化参数
all_channel_df = df_
export_excel = pd.DataFrame()
product_zh_id_ = []
name_ = []
url_name_ = []
zi_brandname_ = []
zi_brandcode_ = []
params_ = []
model_ = []
category_ = []
zi_subcategoryname_ = []
zi_subcategorycode_ = []
url_ = []
price_ = []
channel_ = []
channel_alias_ = []
fuzzy_match_result_ = []
fuzzy_match_name_ = []
fuzzy_match_code_ = []
warranty_ = []
state_ = []
channel_alias_list_ = all_channel_df['channel_alias'].unique().tolist()
for channel_alias in channel_alias_list_:
df = all_channel_df[all_channel_df['channel_alias'] == channel_alias]
product_zh_id_list = df['id'].tolist()
name_list = df['name'].tolist()
url_name_list = df['url_name'].tolist()
zi_brandname_list = df['zi_brandname'].tolist()
zi_brandcode_list = df['zi_brandcode'].tolist()
params_list = df['params'].tolist()
zi_brand_code_list_ = df['zi_brandcode'].apply(lambda x:x.replace('[','').replace(']','')).tolist()
zi_brand_code_list = []
for zi_brand_code in zi_brand_code_list_:
try:
zi_brand_code = int(zi_brand_code)
except:
zi_brand_code = str(zi_brand_code)
zi_brand_code_list.append(zi_brand_code)
model_list = df['model'].tolist()
category_list = df['category'].tolist()
zi_subcategoryname_list = df['zi_subcategoryname'].tolist()
zi_subcategorycode_list = df['zi_subcategorycode'].tolist()
url_list = [url_ if url_ else otherurl_ for url_,otherurl_ in zip(df['url'].tolist(),df['url'].tolist())]
channel_list = df['channel'].tolist()
channel_alias_list = df['channel_alias'].tolist()
price_list = df['price'].tolist()
#品牌+型号+类别模糊匹配
data = {
"params_info": {
"cust_category_list": category_list,
"brandId_list": zi_brandcode_list,
"model_list": model_list,
"cust_price_list": price_list
}
}
res = zgc_api("GetModelMatchResInfo",data)
res_name_list = res['res_name_list']
res_code_list = res['res_sku_list']
res_warranty_list = res['res_warranty_list']
res_state_list = res['res_state_list']
#实例化进度条
index_ = Index()
counter = 1
for _id,name,url_name,zi_brand_name,zi_brand_code,params,model,category,zi_subcategoryname,zi_subcategorycode,name_list,code_list,url,channel,channel_alias,price,warranty_list,state_list in zip(product_zh_id_list,name_list,url_name_list,zi_brandname_list,zi_brandcode_list,params_list,model_list,category_list,zi_subcategoryname_list,zi_subcategorycode_list,res_name_list,res_code_list,url_list,channel_list,channel_alias_list,price_list,res_warranty_list,res_state_list):
print(index_(counter, len(df)-1), end='%')
counter += 1
if len(name_list) ==1:
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '1',fuzzy_match_name = '{name_list[0]}',fuzzy_match_code = '{code_list[0]}' where id = '{_id}'")
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(1)
fuzzy_match_name_.append(name_list[0])
fuzzy_match_code_.append(code_list[0])
warranty_.append(warranty_list[0])
state_.append(state_list[0])
elif len(name_list)>1 and len(name_list)<=3:
for match_name,match_code,warranty,state in zip(name_list,code_list,warranty_list,state_list):
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(1)
fuzzy_match_name_.append(match_name)
fuzzy_match_code_.append(match_code)
warranty_.append(warranty)
state_.append(state)
name_list = str(name_list).replace("'","")
code_list = str(code_list).replace("'","")
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '1',fuzzy_match_name = '{name_list}',fuzzy_match_code = '{code_list}' where id = '{_id}'")
elif len(name_list) > 3:
name_list = name_list[:3]
code_list = code_list[:3]
for match_name,match_code,warranty,state in zip(name_list,code_list,warranty_list,state_list):
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(1)
fuzzy_match_name_.append(match_name)
fuzzy_match_code_.append(match_code)
warranty_.append(warranty)
state_.append(state)
name_list = str(name_list).replace("'","")
code_list = str(code_list).replace("'","")
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '1',fuzzy_match_name = '{name_list}',fuzzy_match_code = '{code_list}' where id = '{_id}'")
else:
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(0)
fuzzy_match_name_.append('')
fuzzy_match_code_.append('')
warranty_.append('')
state_.append('')
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '0' where id = '{_id}'")
print(channel_alias,"完成")
export_excel['id'] = product_zh_id_
export_excel['name'] = name_
export_excel['url_name'] = url_name_
export_excel['zi_brandname'] = zi_brandname_
export_excel['zi_brandcode'] = zi_brandcode_
export_excel['params'] = params_
export_excel['model'] = model_
export_excel['category'] = category_
export_excel['zi_subcategoryname'] = zi_subcategoryname_
export_excel['zi_subcategorycode'] = zi_subcategorycode_
export_excel['url'] = url_
export_excel['channel'] = channel_
export_excel['channel_alias'] = channel_alias_
export_excel['price'] = price_
export_excel['fuzzy_match_result'] = fuzzy_match_result_
export_excel['fuzzy_match_name'] = fuzzy_match_name_
export_excel['fuzzy_match_code'] = fuzzy_match_code_
export_excel['warranty'] = warranty_
export_excel['state'] = state_
export_excel['url'] = export_excel['url'].apply(lambda x:str(x).replace('[\'','').replace('\']',''))
export_excel['new_name'] = 'null'
export_excel['remark'] = 'null'
mssql.Close()
return export_excel
#校验重点类参数数据,若有问题,添加数据字典
def CheckPointParamsData(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#加载库中参数项数据
cursor_zi_new.execute("select * from vw_property")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params = db_params[db_params['identy'] != '0000']
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
return_dict = dict()
complicated_data_dict_id_list = list()
flag = True
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
except:
continue
cat_df['remark'] = cat_df['remark'].apply(lambda x:str(x))
#实例化进度条
index_ = Index()
counter = 1
id_list = list()
params_list = list()
for index,row in cat_df.iterrows():
try:
print(index_(counter, len(cat_df)-1), end='%')
counter += 1
except:
print(index_(counter, 1), end='%')
if row['remark'] != 'nan' and row['remark'] != 'None':
continue
id_ = row['id']
category = row['zi_subcategoryname']
id_list.append(id_)
temp_key_list = list()
temp_value_list = list()
for db_param in db_params[db_params['name'] == category]['subtitle'].tolist():
if db_param in ['产品型号','CPU属性']:
continue
temp_key_list.append(db_param)
temp_value_list.append(row[db_param])
params_list.append(dict(zip(temp_key_list,temp_value_list)))
print(f"{category}数据准备完成")
#获取标准参数值
data ={
"params_info": {
"category": category,
"id_list": id_list,
"params_dict_list": params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
res_value_dict = res['paramsValue_dict']
return_dict.update(res_value_dict)
#实例化进度条
index_ = Index()
counter = 1
for index,row in cat_df.iterrows():
try:
print(index_(counter, len(cat_df)-1), end='%')
counter += 1
except:
print(index_(counter, 1), end='%')
if row['remark'] != 'nan' and row['remark'] != 'None':
continue
id_ = str(row['id'])
category = row['zi_subcategoryname']
for db_param in db_params[db_params['name'] == category]['subtitle'].tolist():
if db_param in ['产品型号','CPU属性']:
continue
pri_value = row[db_param]
value,value_flag = tuple(res_value_dict[id_][db_param].items())[0]
if value_flag == False:
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{db_param}' and primitive = '{pri_value}'")
check_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
flag = False
cursor_zi_new.execute(f"insert into ShuJuZiDian_Cfg (categoryname,subtitle,primitive) values ('{category}','{db_param}','{pri_value}')")
cursor_zi_new.execute(f"select id from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{db_param}' and primitive = '{pri_value}'")
complicated_data_dict_id = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
complicated_data_dict_id_list.append(complicated_data_dict_id)
print(f"完成{category}参数校验")
if flag:
mssql.Close()
print("本批数据校验通过!,返回标准值字典")
return return_dict
else:
data_dict_path = path.split(".xlsx")[0] + '(数据字典补充).xlsx'
if len(complicated_data_dict_id_list) == 1:
_id = complicated_data_dict_id_list[0]
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where stdvalue is Null and id = {_id}")
else:
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg where stdvalue is Null and id in (%s)" % ','.join(['%s'] * len(complicated_data_dict_id_list)),tuple(complicated_data_dict_id_list))
fill_shujuzidian_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
fill_shujuzidian_df.to_excel(data_dict_path)
mssql.Close()
return {}
#上传数据字典补充数据
def ComplicatedDataDict(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
df = pd.read_excel(path)
for index,row in df.iterrows():
cursor_zi_new.execute(f"update ShuJuZiDian_Cfg set stdvalue = '{row['stdvalue']}',simplevalue = '{row['simplevalue']}' where id = {row['id']}")
print(index)
mssql.Close()
#记录标准参数项参数值
def save_standard_params_info(std_value_dict):
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for id_ in std_value_dict.keys():
print(index_(counter, len(std_value_dict)-1), end='%')
counter += 1
subtitle_list = []
value_list = []
for subtitle in std_value_dict[id_].keys():
if subtitle == 'CPU型号':
subtitle_list.append(subtitle)
cpu_model = list(std_value_dict[id_][subtitle].keys())[0]
value_list.append(cpu_model)
subtitle_list.append('CPU属性')
if '飞腾' in cpu_model or '龙芯' in cpu_model or '兆芯' in cpu_model:
pass
else:
cpu_model = cpu_model.split('-')[0]
value_list.append(cpu_model)
else:
subtitle_list.append(subtitle)
value_list.append(list(std_value_dict[id_][subtitle].keys())[0])
params_dict = str(dict(zip(subtitle_list,value_list))).replace("'","''")
cursor_zi_service.execute(f"update product_all set params_standard = '{params_dict}' where id = {id_}")
mssql_service.Close()
print("完成记录标准参数项参数值")
def transform_simplevalue(cursor_zi_new,shujuzidiandf,categoryname,subtitle,stdvalue):
stdvalue = stdvalue.strip()
simple_subtitle_list = ['CPU型号','显存容量','操作系统','双面器','双面输稿器','网络打印','标配外服务及配件','标配外耗材','镜头描述','碎纸效果']
if subtitle not in simple_subtitle_list:
return stdvalue
simplevalue_list = list(set(shujuzidiandf[(shujuzidiandf['categoryname'] == categoryname) & (shujuzidiandf['subtitle'] == subtitle) & (shujuzidiandf['stdvalue'] == stdvalue)]['simplevalue'].tolist()))
if len(simplevalue_list) == 0:
print(f"非法值,不存在数据字典中。{subtitle},{stdvalue}")
return False
elif len(simplevalue_list) == 1:
if simplevalue_list[0] == '无简称':
#add_shujuzidian(categoryname,subtitle,stdvalue)
return " "
else:
return simplevalue_list[0]
else:
print(f"异常数据,具有多个简称。{subtitle},{stdvalue}")
return False
def dael_name_content(skuname,brand):
#处理产品名称中 括号里的内容
skuname = skuname.replace(brand,'')
name_head = skuname.split('(')[0]
name_tail = skuname.split('(')[1].split(')')[1]
name_content = skuname.split('(')[1].split(')')[0].split('/')
if (len(list(set(name_content))) == 1 and name_content[0] == ' ') or (len(list(set(name_content))) == 2 and ' ' in name_content and '/' in name_content):
if name_tail == '':
res = brand + name_head
else:
res = brand + name_head + " " + name_tail
else:
temp_content = ''
for element in name_content:
if element == ' ':
continue
else:
temp_content += element + '/'
temp_content = "(" + temp_content.strip('/') + ")"
if name_tail == '':
res = brand + name_head + temp_content
else:
if temp_content == '':
res = brand + name_head + " " + name_tail
else:
res = brand + name_head + temp_content + name_tail
return res.strip()
def create_to_db(path,std_value_dict):
#创建链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#获取重点类命名规则
cursor_zi_new.execute(f"select * from skuname_named_rule")
named_rules_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取重点类属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取重点类SKU数据
cursor_zi_new.execute(f"select * from vw_sku_params where categoryname in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
sku_db_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data_dict = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取spu价格黑名单
cursor_zi_price.execute("select spuid from spu_calculate_price where zc_price = 0 and ds_price = 0 and st_price = 0")
hmd_spuid_list = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])['spuid'].to_list()
new_df_combine_list = list()
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
channel_list = cat_df['channel_alias'].unique().tolist()
except:
continue
for channel_alias in channel_list:
df = cat_df[cat_df['channel_alias'] == channel_alias]
cat_subtitle_df = subtitle_df[subtitle_df['name'] == category]#获取该类别的属性规格项数据
#cat_spu_db_df = spu_db_df[spu_db_df['categoryname'] == point_category]#获取该类别的所有SPU数据
cat_sku_db_df = sku_db_df[sku_db_df['categoryname'] == category]#获取该类别的所有SKU数据
cat_sku_db_df['skuname'] = cat_sku_db_df['skuname'].apply(lambda x :x.upper())
#获取这个类的数据字典
single_data_dict = data_dict[data_dict['categoryname'] == category]
single_data_dict['stdvalue'] = single_data_dict['stdvalue'].apply(lambda x : x.upper())
#该类别命名规则
named_rule = named_rules_df[named_rules_df['categoryname'] == category]['rule'].str.cat()
new_product_flag_list = []#新建产品标识符号 1:需要创建的产品 0:不需要创建的产品
computer_cat = ['笔记本','台式机','一体电脑']#整机类 需要进行匹配
#spu_id = [] #若只匹配到SPU,则记录spuid,方便后续建库使用
df['remark'] = df['remark'].apply(lambda x:str(x))
df['productcode'] = df['productcode'].apply(lambda x:str(x))
#产品命名
new_name_list = []
sku_list = list()
spuid_list = list()
for index,row in df.iterrows():
if (row['remark'] != 'nan' and row['remark'] != 'None') or (row['productcode'] != 'nan' and row['productcode'] != 'None'):
new_name_list.append(" ")
continue
id_ = str(row['id'])
skuname = ''
brandname = row['zi_brandname']
for element in named_rule.split(" "):
if element == '品牌名称':
value = brandname
elif element == '类别名称':
value = category
elif element[0] == "(" and "/" in element:
element = element.replace("(","").replace(")","")
detail_element_list = element.split("/")
for i in range(len(detail_element_list)):
#value = cat_sku_df[(cat_sku_df['sku'] == sku) & (cat_sku_df['name'] == detail_element_list[i])]['value'].tolist()[0]
#获取标准参数值
param_ = detail_element_list[i]
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,detail_element_list[i],value.upper())
#if value:
# pass
#else:
# flag = False
# continue
if i == 0:
skuname += "(" + value + "/"
elif i == len(detail_element_list)-1:
skuname += value + ") "
else:
skuname += value + "/"
continue
elif element[0] == "(" and "/" not in element:
element = element.replace("(","").replace(")","")
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += "(" + value + ") "
continue
else:
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += value + " "
#if flag == False:
# continue
skuname = skuname.strip()
#处理名称中的空格
if category in ['复印纸','扫描仪']:
skuname = skuname
elif category in ['笔记本','台式机','一体电脑','碎纸机']:
skuname = dael_name_content(skuname,brandname)
elif category in ['复印机','多功能一体机','单反相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
elif category in ['投影机','激光打印机','空调']:
if category == '空调':
skuname_part1 = skuname.split("空调 (")[0].strip()
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.replace("中央空调","").replace("空调扇","").split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " + category + skuname_2
elif category in ['单电/微单相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " +category + skuname_2
print(skuname)
new_name_list.append(skuname)
df['new_name'] = new_name_list
for index,row in df.iterrows():
if row['remark'] != 'nan' and row['remark'] != 'None':#驳回数据
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '2',new_name = '{row['new_name']}',remark = '{row['remark']}' where id = {row['id']}")
elif row['productcode'] != 'nan' and row['productcode'] != 'None':
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{row['productcode']}' where id = {row['id']}")
else:
newname = row['new_name']
#匹配数据(整机类进行匹配)
if cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()].empty:#未匹配上数据
new_product_flag_list.append('1')
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',new_name = '{newname}' where id = {row['id']}")
else:#匹配上数据
productcode = cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()]['sku'].tolist()[0]
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{productcode}',new_name = '{newname}',remark = Null,pic_state = '0' where id = {row['id']}")
new_product_flag_list.append('0')
print('完成名称匹配')
df['new_product_flag'] = new_product_flag_list
new_df = df[df['new_product_flag'] == '1']#需要新建的产品
if category in computer_cat:
cpu_attr_list = []
for cpu in new_df['CPU型号'].tolist():
if '飞腾' in cpu or '龙芯' in cpu or '兆芯' in cpu:
pass
else:
cpu = cpu.split('-')[0]
cpu_attr_list.append(cpu)
new_df['CPU属性'] = cpu_attr_list
else:
continue
#新建产品
print(f"开始新建产品,共计:'{len(new_df)}'件产品")
index_ = 0
delete_index_list = list()
for index,row in new_df.iterrows():
id_ = str(row['id'])
brand = row['zi_brandname'].strip()
brandcode = row['zi_brandcode'] # 建库需要的品牌编码
#创建SPU 获取SPUid
name = str(brand)
for spu_param in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].sort_values("Expr1")['subtitle'].tolist():
if spu_param == '产品品牌':
continue
else:
name += " "
name += str(row[spu_param])
name = name + " " +str(row['zi_subcategoryname'])#spu名称
spu = str(uuid.uuid1()).replace('-','')#spu编码
category_code = int(str(row['zi_subcategorycode']).replace("?",""))#类别编码
brand_code = brandcode#品牌编码
cursor_zi_new.execute(f"select id from p_spu where categoryid = {category_code} and brandid = {brand_code} and spuname = '{name}'")
data = cursor_zi_new.fetchall()
spu_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False #是否处理参数
if spu_df.empty:
#cursor_zi_new.execute(f"insert into p_spu (spuname,spu,categoryid,brandid) values ('{name}','{spu}',{category_code},{brand_code})")
#param_deal_flag = True
delete_index_list.append(index)
continue
else:
spuid = spu_df['id'].tolist()[0]
if spuid in hmd_spuid_list:
delete_index_list.append(index)
continue
spuid_list.append(spuid)
#cursor_zi_service.execute(f"update product_all_original_list set spuid = {spuid} where id = {row['id']}")
if param_deal_flag:
#创建SPU属性
for index,spu_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].iterrows():
subtitle = spu_param_row['subtitle'].strip()
subtitleid = spu_param_row['subtitleid']
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
cursor_zi_new.execute(f"select a.valueid from p_valuemap a \
left join p_value b \
on a.valueid = b.id \
left join p_subtitle c \
on b.subtitleid = c.id \
where a.spuid = {spuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_valuemap where spuid = {spuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_value where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
#创建SKU
skuname = str(row['new_name']).strip()
sku = datetime.datetime.now().strftime("%Y-%m-%d%H:%M:%S.%f").replace('-','').replace(':','').replace('.','')[:-3]
time.sleep(0.001)
#spuid = int(row['spuid'])
state = 1
source = '1'
createddate = datetime.datetime.now().strftime("%Y-%m-%d %X")
cursor_zi_new.execute(f"select sku from p_sku where skuname = '{skuname}' and spuid = {spuid}")
data = cursor_zi_new.fetchall()
sku_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False#是否处理参数
if sku_df.empty:
cursor_zi_new.execute(f"insert into p_sku (skuname,sku,spuid,state,source,createddate) values ('{skuname}','{sku}',{spuid},{state},'{source}','{createddate}')")
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku}',new_name = '{row['new_name']}',remark = Null,pic_state = '1' where id = {row['id']}")
param_deal_flag = True
else:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku_df['sku'].tolist()[0]}',new_name = '{row['new_name']}',remark = Null,pic_state = '0' where id = {row['id']}")
sku = sku_df['sku'].tolist()[0]
cursor_zi_new.execute(f"select id from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
skuid = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0] #skuid
sku_list.append(sku)
if param_deal_flag:
#录入产品详细参数
for index,sku_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'sku'].iterrows():
subtitle = sku_param_row['subtitle'].strip()
subtitleid = sku_param_row['subtitleid']
if subtitle == '产品品牌':
continue
try:
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
except:
continue
cursor_zi_new.execute(f"select a.valueid from p_skuvaluemap a \
left join p_skuvalue b \
on a.valueid = b.id \
left join p_skusubtitle c \
on b.subtitleid = c.id \
where a.skuid = {skuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_skuvalue where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
index_+=1
print(f"已处理完'{index_}'件产品")
print(f"完成{channel_alias}{category}数据建库")
#计算价格
new_df = new_df.drop(index= delete_index_list)
new_df['sku'] = sku_list
new_df['spuid'] = spuid_list
export_df = new_df[['id','sku','spuid','zi_subcategoryname','zi_brandname','new_name','channel_alias']]
new_df_combine_list.append(export_df)
new_df_combine = pd.concat(new_df_combine_list)
mssql_new.Close()
mssql_price.Close()
mssql_service.Close()
return new_df_combine
def check_configure(new_df_all):
category_list = new_df_all['zi_subcategoryname'].unique().tolist()
for category in category_list:
new_df = new_df_all[new_df_all['zi_subcategoryname'] == category]
sku_list = new_df['sku'].unique().tolist()
#录入新增产品配件差异价格
check_configure_price(category,sku_list)
print("检查完毕")
def calculate_spu_price(new_df_all):
mssql= MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql._cur
category_list = new_df_all['zi_subcategoryname'].unique().tolist()
for category in category_list:
new_df = new_df_all[new_df_all['zi_subcategoryname'] == category]
sku_list = new_df['sku'].unique().tolist()
#录入新增产品配件差异价格
create_price_difference_single(category,sku_list)
#读取配件差价表和SPU价格表
cursor_zi_price.execute("select spuid,zc_price,ds_price,st_price from spu_calculate_price")
spu_price_df = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])
cursor_zi_price.execute("select sku,price_difference from configure_price_difference")
price_difference_df = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])
new_df['zc_price'] = [0 if spu_price_df[spu_price_df['spuid'] == spuid]['zc_price'].tolist()[0] == 0 else spu_price_df[spu_price_df['spuid'] == spuid]['zc_price'].tolist()[0] + float(price_difference_df[price_difference_df['sku'] == sku]['price_difference'].tolist()[0]) for spuid,sku in zip(new_df['spuid'].tolist(),new_df['sku'].tolist())]
new_df['ds_price'] = [0 if spu_price_df[spu_price_df['spuid'] == spuid]['ds_price'].tolist()[0] == 0 else spu_price_df[spu_price_df['spuid'] == spuid]['ds_price'].tolist()[0] + float(price_difference_df[price_difference_df['sku'] == sku]['price_difference'].tolist()[0]) for spuid,sku in zip(new_df['spuid'].tolist(),new_df['sku'].tolist())]
new_df['st_price'] = [0 if spu_price_df[spu_price_df['spuid'] == spuid]['st_price'].tolist()[0] == 0 else spu_price_df[spu_price_df['spuid'] == spuid]['st_price'].tolist()[0] + float(price_difference_df[price_difference_df['sku'] == sku]['price_difference'].tolist()[0]) for spuid,sku in zip(new_df['spuid'].tolist(),new_df['sku'].tolist())]
new_df['zc_price_weight'] = new_df['zc_price'].apply(lambda x: 0 if x == 0 else 0.2)
new_df['ds_price_weight'] = new_df['ds_price'].apply(lambda x: 0 if x == 0 else 0.1)
new_df['st_price_weight'] = new_df['st_price'].apply(lambda x: 0 if x == 0 else 0.7)
new_df['weight'] = new_df['zc_price_weight']+ new_df['ds_price_weight'] + new_df['st_price_weight']
new_df['cal_price'] = (new_df['st_price']*0.7 + new_df['zc_price']*0.2 + new_df['ds_price']*0.1)/new_df['weight']
new_df['cal_price'] = new_df['cal_price'].fillna("SPU下无可用价格")
createddate = datetime.datetime.now().strftime("%Y-%m-%d %X")
#价格存入临时表
insert_list = list()
for index,row in new_df.iterrows():
insert_list.append((row['spuid'],row['sku'],row['new_name'],row['zi_brandname'],category,row['zc_price'],row['ds_price'],row['st_price'],str(row['cal_price']),createddate))
sql = "insert into sku_calculate_price (spuid,sku,skuname,brand,category,zc_price,ds_price,st_price,sku_price,create_time) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
cursor_zi_price.executemany(sql,insert_list)
print(f"完成{category}价格还原")
mssql.Close()
def update_non_point_to_db(path,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
df_match = pd.read_excel(path,sheet_name = '匹配到的数据',converters = {'fuzzy_match_code':str,'zi_subcategorycode':str})
df_match['remark'] = df_match['remark'].apply(lambda x : str(x))
df_match['fuzzy_match_code'] = df_match['fuzzy_match_code'].apply(lambda x : str(x))
del df_match['Unnamed: 0']
del df_match['warranty']
del df_match['state']
df_match = df_match.drop_duplicates()
channel_list = df_match['channel_alias'].unique().tolist()
for channel_alias in channel_list:
df_match_channel = df_match[df_match['channel_alias'] == channel_alias]
#实例化进度条
index_ = Index()
counter = 1
for _id,remark,matchcode,brand,brandcode,newname in zip(df_match_channel['id'].tolist(),df_match_channel['remark'].tolist(),df_match_channel['fuzzy_match_code'].tolist(),df_match_channel['zi_brandname'].tolist(),df_match_channel['zi_brandcode'].tolist(),df_match_channel['new_name'].tolist()):
try:
print(index_(counter, len(df_match_channel)-1), end='%')
counter += 1
except:
print(index_(counter,1), end='%')
if remark == 'nan':
if matchcode == 'nan':
newname = str(newname).replace("'","''")
brand = str(brand).replace("'","''")
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',productcode = Null,zi_brandname = '{brand}',zi_brandcode = '{brandcode}',new_name = '{newname}' where id = '{_id}' and productcode is Null")
else:
matchcode = str(matchcode).strip()
newname = str(newname).replace("'","''")
cursor_zi_service.execute(f"update product_all set state = '9',remark = Null,productcode = '{matchcode}',new_name = '{newname}',pic_state = '0' where id = '{_id}'")
else:
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{remark}' where id = '{_id}'")
print(f"完成{channel_alias}匹配数据更新")
if mode == 'match_data':
mssql.Close()
return "完成数据更新"
df_unmatch = pd.read_excel(path,sheet_name = '未匹配数据处理',converters = {'fuzzy_match_code':str,'zi_subcategorycode':str})
df_unmatch['remark'] = df_unmatch['remark'].apply(lambda x : str(x))
df_unmatch['fuzzy_match_code'] = df_unmatch['fuzzy_match_code'].apply(lambda x : str(x))
channel_list = df_unmatch['channel_alias'].unique().tolist()
for channel_alias in channel_list:
df_unmatch_channel = df_unmatch[df_unmatch['channel_alias'] == channel_alias]
#实例化进度条
index_ = Index()
counter = 1
for _id,remark,matchcode,brand,brandcode,newname in zip(df_unmatch_channel['id'].tolist(),df_unmatch_channel['remark'].tolist(),df_unmatch_channel['fuzzy_match_code'].tolist(),df_unmatch_channel['zi_brandname'].tolist(),df_unmatch_channel['zi_brandcode'].tolist(),df_unmatch_channel['new_name'].tolist()):
try:
print(index_(counter, len(df_unmatch_channel)-1), end='%')
counter += 1
except:
print(index_(counter,1), end='%')
if remark == 'nan':
if matchcode == 'nan':
newname = str(newname).replace("'","''")
brand = str(brand).replace("'","''")
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',productcode = Null,zi_brandname = '{brand}',zi_brandcode = '{brandcode}',new_name = '{newname}' where id = '{_id}'")
else:
matchcode = str(matchcode).strip()
cursor_zi_service.execute(f"update product_all set state = '9',remark = Null,productcode = '{matchcode}',pic_state = '0' where id = '{_id}'")
else:
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{remark}' where id = '{_id}'")
print(f"完成{channel_alias}未匹配数据更新")
mssql.Close()
def get_data_all():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and (cal_price is Null or new_name is Null)")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
#df_point = df[df['point_category_flag'] == '1']
#df_unpoint = df[df['point_category_flag'] == '0']
mssql.Close()
return df
def get_price(df):
#创建链接
mssql_index = MSSQL('123.56.115.207','zdindex')
cursor_zdindex = mssql_index._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = str(row['productcode']).strip()
#cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods ='20200824' and goods_id = '{sku}'")
cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods in (select top 1 max(periods) from zd_entry_goods_price) and goods_id = '{sku}'")
data = cursor_zdindex.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zdindex.description])
if check_df.empty:
cursor_zi_price.execute(f"select sku_price from sku_calculate_price where sku = '{sku}'")
data = cursor_zi_price.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_price.description])
if check_df.empty:
cal_price = "无法获取价格,请核查"
else:
cal_price = check_df['sku_price'].tolist()[0]
else:
cal_price = check_df['index_price_wave'].tolist()[0]
cursor_zi_service.execute(f"update product_all set cal_price = '{cal_price}',productcode='{sku}' where id = {row['id']}")
mssql_index.Close()
mssql_price.Close()
mssql_service.Close()
def get_name(df):
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = row['productcode']
cursor_zi_new.execute(f"select skuname from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
try:
name = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['skuname'].tolist()[0]
except:
name = "无法获取产品名称"
cursor_zi_service.execute(f"update product_all set new_name = '{name}',remark = Null where id = {row['id']}")
mssql_new.Close()
mssql_service.Close()
def save_sku_relationship(filter,channel_alias):
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and channel_alias = '{channel_alias}' and batch = '{filter}'")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
cust_sku_list = df['sku'].tolist()
sku_list = df['productcode'].tolist()
channel_alias_list = df['channel_alias'].tolist()
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channel_alias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
print(res)
mssql.Close()
path = '/Users/rico/project/客户数据处理(调接口)/原始客户数据/成都CD/2020-10-21/客户数据整理模板-成都电子商城.xlsx'
channel_alias = 'AH'
#初始化数据
InitializeData(path,channel_alias)
#加载数据
batch = '2020-10-19'
df = LoadData(batch,channel_alias,'deal')
#SKU排重
SkuMatch(df,channel_alias)
#SKU排重结果导出
SkuMatchResult(batch,channel_alias)
#SKU排重确认结果导入
path = ''
UpdateSkuMatchResult(path)
#加载数据
df = LoadData(batch,channel_alias,'deal')
#分析获取数据基本信息(品牌,类别,型号)
AnalyseBasicInfo(df)
#导出数据(导出文件至当前文件执行路径)
ExportToExcelBasicConfirm(batch,channel_alias)
#返回数据并更新
path = "/Users/rico/project/客户数据处理(调接口)/原始客户数据/成都CD/2020-10-21/汇总-CD基础信息确认2020-10-21(1).xlsx"
UpdateBasicData(path)
#获取重点类确认数据(导出文件至当前文件执行路径)
df = GetPointCategoryData(batch,channel_alias)
GetPointCategoryDataDetail(df,channel_alias)
#获取非重点类确认数据(导出文件至当前文件执行路径)
df = GetNonpointCategoryData(batch,channel_alias)
match_result = GetNonpointCategoryDataDetail(df)
writer = pd.ExcelWriter(f"{channel_alias}非重点类匹配结果确认及命名.xlsx")
export_excel_1 = match_result[match_result['fuzzy_match_result'] == 1]
export_excel_1.to_excel(writer,'匹配到的数据')
export_excel_2 = match_result[match_result['fuzzy_match_result'] == 0]
export_excel_2.to_excel(writer,'未匹配数据处理')
writer.save()
#处理重点类数据
path = "/Users/rico/Downloads/反馈-HB重点类参数确认-1112.xlsx"
##校验参数数据
std_value_dict = CheckPointParamsData(path)
##导入补充的数据字典
data_dict_path = "/Users/rico/project/客户数据处理(调接口)/原始客户数据/湖北HB/2020-10-19/反馈-HB重点类参数确认(数据字典补充)(1)(2)(1).xlsx"
ComplicatedDataDict(data_dict_path)
##入库(std_value_dict为空时不可往下执行)
new_df_all = create_to_db(path,std_value_dict)
##记录标准参数项参数值
save_standard_params_info(std_value_dict)
##检查配件价格
check_configure(new_df_all)
##计算还原价格
calculate_spu_price(new_df_all)
#处理非重点类数据
##更新非重点类数据
path = "/Users/rico/Downloads/000001hn非重点类匹配结果确认及命名-汇总(1).xlsx"
mode = 'all_data'
update_non_point_to_db(path,mode)
'''
mode = 'match_data'
update_non_point_to_db(path,mode)
df = pd.read_excel(path,sheet_name = '未匹配数据处理')
match_second_result = GetNonpointCategoryDataDetail(df)
writer = pd.ExcelWriter(f"{channel_alias}非重点类匹配结果确认及命名(二次匹配).xlsx")
export_excel_1 = match_second_result[match_second_result['fuzzy_match_result'] == 1]
export_excel_1.to_excel(writer,'匹配到的数据')
export_excel_2 = match_second_result[match_second_result['fuzzy_match_result'] == 0]
export_excel_2.to_excel(writer,'未匹配数据处理')
writer.save()
path = "/Users/rico/project/客户数据处理(调接口)/原始客户数据/安徽AH/20201018/反馈-AH非重点类匹配结果确认及命名(二次匹配)(1).xlsx"
mode = 'all_data'
update_non_point_to_db(path,mode)
'''
#存储对应关系(重点类和非重点类都处理完毕后再执行)
save_sku_relationship(batch,channel_alias)
#填充数据
df = get_data_all()
get_price(df)
get_name(df)
#导出处理结果
df = LoadData(batch,channel_alias,'result')
df.to_excel(f"{channel_alias}-{batch}结果数据.xlsx")
'''
path = '/Users/rico/Downloads/ZSYH-2020-09-29结果数据.xlsx'
def update_non_point_to_db(path,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
df_match = pd.read_excel(path,sheet_name = 'Sheet1',converters = {'productcode':str,'zi_subcategorycode':str})
df_match['remark'] = df_match['remark'].apply(lambda x : str(x))
channel_alias = 'ZSYH'
df_match_channel = df_match[df_match['channel_alias'] == channel_alias]
#实例化进度条
index_ = Index()
counter = 1
for _id,remark,matchcode in zip(df_match_channel['id'].tolist(),df_match_channel['remark'].tolist(),df_match_channel['productcode'].tolist()):
print(index_(counter, len(df_match_channel)-1), end='%')
counter += 1
if remark == 'nan':
if matchcode == 'nan':
pass
else:
matchcode = str(matchcode).strip()
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{matchcode}',pic_state = '0',remark = Null where id = '{_id}'")
else:
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{remark}' where id = '{_id}'")
print(f"完成{channel_alias}匹配数据更新")
'''
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 18 11:11:31 2020
@author: rico
"""
from db import MSSQL
from API import zgc_api
import math
import pandas as pd
import numpy as np
from public_function import *
df = pd.read_excel("/Users/rico/project/中信银行/5000条/中信5000测试数据.xlsx",converters = {'供应商SKU':str})
supplier_list = df['供应商名称'].unique().tolist()
engine = 'SQL'
df_list = []
for supplier in supplier_list:
sup_df = df[df['供应商名称'] == supplier]
sku_list = sup_df['供应商SKU'].tolist()
#获取SKU匹配结果
data = {
"params_info": {
"channelAlias": f"{supplier}",
"cust_sku_list": sku_list,
"engine":f"{engine}"
}
}
res = zgc_api("GetSKUMatchResInfo",data)
if res['code'] == 0:
res_list = [False for i in range(len(sup_df))]
else:
res_list = [res['res_dict'][sku] for sku in sku_list]
sup_df['productcode'] = res_list
df_list.append(sup_df)
result_df = pd.concat(df_list)
result_df.to_excel("中信银行5000条.xlsx")
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 16 14:27:52 2020
@author: rico
"""
import pandas as pd
from API import zgc_api
import pymssql
from public_function import *
df = pd.read_excel("/Users/rico/Downloads/京东重复链接(修正后链接).xls",sheet_name = "保留",converters = {'productcode':str})
sku_list = df['productcode'].tolist()
source_name_list = df['productname'].tolist()
price_list = df['参考价'].tolist()
url_list = df['goods_url'].tolist()
channelId_list = ['DS-JD' for i in range(len(df))]
#价格关系入库
data = {
"params_info": {
"sku_list": sku_list,
"url_name_list": source_name_list,
"url_price_list":price_list,
"url_list": url_list,
"channelId_list": channelId_list
}
}
res = zgc_api("Stock-InPriceInfo",data)
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'zdindex',autocommit=True)
cursor = conn.cursor()
cursor.execute(f"select id,goods_id from zd_electricity_price where mall_id ='DS-JD' and (periods = '20200917' or periods = '20200921') and goods_url is Null")
data = cursor.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
id_ = row['id']
code = row['goods_id']
cursor.execute(f"select goods_url from zd_electricity_price where goods_id = '{code}' and periods = '20200914' and mall_id = 'DS-JD'")
try:
url = cursor.fetchone()[0]
except:
continue
cursor.execute(f"update zd_electricity_price set goods_url = '{url}' where id = {id_}")
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 14 10:11:38 2020
@author: rico
"""
import pymssql
import pandas as pd
from API import zgc_api
df = pd.read_excel("/Users/rico/project/路桥项目/JD数据/反馈-JD未匹配数据处理基本信息确认0909.xlsx",sheet_name = '建库')
conn_zi_lq = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708',database= f"SXLQ_JD",autocommit=True)
cursor_zi_lq = conn_zi_lq.cursor()
#新建品牌
newbrand_df = df[df['zi_brandcode'] == "空"][['id','zi_brandname']]
newbrand_list = newbrand_df['zi_brandname'].apply(lambda x : x.replace("新建品牌:","").strip()).unique().tolist()
data = {
"params_info": {
"brand_list": newbrand_list
}
}
res = zgc_api("Stock-InbrandInfo",data)
brand_dict = res['res_dict']
#更新product_all表
for index,row in newbrand_df.iterrows():
_id = row['id']
zi_brand_dict = brand_dict[row['zi_brandname'].replace("新建品牌:","").strip()]
zi_brand = [key for key in zi_brand_dict][0]
zi_brandcode = zi_brand_dict[zi_brand]
cursor_zi_lq.execute(f"update product_all set zi_brandname = '{zi_brand}',zi_brandcode = {zi_brandcode} where id = {_id}")
#入库
id_list = df['id'].tolist()
cust_sku_list = df['sku'].tolist()
channelAlias_list = df['channel_alias'].apply(lambda x:x.replace("SXLQ_","")).tolist()
source_name_list = df['name'].tolist()
price_list = df['price'].apply(lambda x:str(x)).tolist()
url_list = df['url'].tolist()
channelId_list = ["DS-JD" for i in range(len(df))]
brand_list = df['zi_brandname'].tolist()
brandId_list = df['zi_brandcode'].apply(lambda x:str(x)).tolist()
category_list = df['zi_subcategoryname'].tolist()
categoryId_list = df['zi_subcategorycode'].apply(lambda x:str(x)).tolist()
params_list = df['params'].apply(lambda x: eval(x)).tolist()
name_list = df['productname'].apply(lambda x: x.replace("'","''")).tolist()
'''
for params,category in zip(params_list,category_list):
cust_paramsAttr_list = [key for key in params]
data = {
"params_info": {
"category": category,
"cust_paramsAttr_list": cust_paramsAttr_list
}
}
res = zgc_api("GetZGCParamsInfo",data)
res['paramsAttr_dict']
'''
data = {
"params_info": {
"brand_list": brand_list,
"brandId_list": brandId_list,
"category_list": category_list,
"categoryId_list": categoryId_list,
"params_list": params_list,
"name_list": name_list
}
}
res = zgc_api("Stock-InProductInfo",data)
sku_list = res['sku_list']
#更新product_all表
for _id,sku in zip(id_list,sku_list):
cursor_zi_lq.execute(f"update product_all set productcode = '{sku}',remark = Null,state = '9' where id = {_id}")
#价格关系入库
data = {
"params_info": {
"sku_list": sku_list,
"url_name_list": source_name_list,
"url_price_list":price_list,
"url_list": url_list,
"channelId_list": channelId_list
}
}
res = zgc_api("Stock-InPriceInfo",data)
#对应关系入库
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channelAlias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 11:07:18 2020
@author: rico
"""
import requests
def zgc_api(func,data):
headers = {
'Connection': 'Keep-Alive'
}
key = 'eRo1#ZFHY5N&GEzV'
api = f"http://59.110.219.171:8000/{func}/"
print(api)
data.update({'key':key})
session = requests.session()
result = session.post(api,json=data,headers=headers,timeout=600).json()
return result
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 7 23:08:30 2020
@author: rico
"""
import pymssql
class MSSQL:
def __init__(self,host,db):
self.host = host
self.db = db
self._conn = self.GetConnect()
if(self._conn):
self._cur = self._conn.cursor()
#连接数据库
def GetConnect(self):
conn = False
if self.host == '123.57.45.119':
user = 'zgcprice'
pwd = 'zgcprice20200708'
elif self.host == '123.56.115.207':
user = 'zgcindex'
pwd = 'jiayou202006'
elif self.host == '10.0.120.131':
user = 'sa'
pwd = '1qaz@WSX'
elif self.host == '10.0.120.79':
user = 'sa'
pwd = '1qaz@WSX'
try:
conn = pymssql.connect(
host=self.host,
user=user,
password=pwd,
database =self.db,
autocommit=True
)
except Exception as err:
print("连接数据库失败, %s" % err)
else:
return conn
#获取连接信息
def GetConnectInfo(self):
print( "连接信息:" )
print( "服务器:%s , 用户名:%s , 数据库:%s " % (self.host,self.user,self.db))
def Close(self):
self._cur.close()
self._conn.close()
'''
ms = MSSQL('123.56.115.207','zdindex')
conn = ms._conn
cursor = ms._cur
cursor.execute(f"select top 10 * from zd_week_price")
cursor.fetchall()
ms.Close()
cursor.close()
conn.close()
'''
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 19 22:26:55 2020
@author: rico
"""
import pymssql
import pandas as pd
import time
import warnings
from public_function import Index
warnings.filterwarnings("ignore")
def delete_spu_price():
create_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
conn_zdindex = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'zdindex',autocommit=True)
cursor_zdindex = conn_zdindex.cursor()
cursor_zdindex.execute("select top 1 max(periods) from zd_entry_goods_price")
periods = str(cursor_zdindex.fetchone()[0])
cursor_zdindex.execute(f"delete from zd_electricity_price where periods = '{periods}' and mall_id = 'DS-SPU-HY'")
cursor_zdindex.execute(f"delete from zd_entry_goods_price where periods = '{periods}' and shop_id = 'ST-SPU-HY'")
cursor_zdindex.execute(f"delete from zd_purchase_price where periods = '{periods}' and purchase_id = 'ZC-SPU-HY'")
conn_zdindex.close()
print(f"SPU还原价删除完毕 -- {create_time}")
def get_configure_price():
#获取配件价格
try:
conn = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708', database='price_calculate',autocommit=True)
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select category_code,category_name,part,detail,price from configure_price")
configure_price = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
configure_price['price'] = pd.to_numeric(configure_price['price'])
cursor.close()
conn.close()
return configure_price
except:
print('连接失败,重新连接')
return get_configure_price()
def get_product_code(category_list):
#获取指定类别 SPU对应的SKU(new)
try:
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
#cursor.execute("select a.spuid,b.spuname,a.sku,a.skuname,c.name as categoryname from p_sku a \
# left join p_spu b \
# on a.spuid = b.id \
# left join p_category c \
# on b.categoryid = c.id \
# where a.state in (1,4) and c.name in (%s)" % ','.join(['%s'] * len(category_list)),tuple(category_list))
cursor.execute("select a.spuid,b.spuname,a.sku,a.skuname,c.name as categoryname,b.categoryid,b.brandid from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join p_category c \
on b.categoryid = c.id \
where a.state in (1,4) and c.name in (%s)" % ','.join(['%s'] * len(category_list)),tuple(category_list))
spu_sku_df = pd.DataFrame(cursor.fetchall(),columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return spu_sku_df
except:
print('连接失败,重新连接')
return get_product_code()
def get_attr_data(category_list):
#获取指定类别参数数据
try:
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute("select * from vw_sku_params where categoryname in (%s)" % ','.join(['%s'] * len(category_list)),tuple(category_list))
attr_data = pd.DataFrame(cursor.fetchall(),columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return attr_data
except:
print('连接失败,重新连接')
return get_attr_data(category_list)
def calculate_configure_price(cat_params_df,productCode,configure_price_df):
#计算配件价格
#productCode = '0101020260021'
filter_df = cat_params_df[cat_params_df['sku'] == productCode]
price = 0
for name,value in zip(list(filter_df['name']),list(filter_df['value'])):
if name not in list(configure_price_df['part']):
continue
try:
current_price = configure_price_df[(configure_price_df['part'] == name) & (configure_price_df['detail'] == value)]['price'].tolist()[0]
except:
current_price = 0
print("无该配件价格:",name,value)
price+=current_price
return price
def create_price_difference():
#创建SKU 与 SPU 配件差价表
category_list = ['笔记本','台式机','一体电脑']
category_id_list = ['672','673','12798']
category_dict = dict(zip(category_id_list,category_list))
configure_price_df = get_configure_price()
spu_sku_df = get_product_code(category_list)
params_df = get_attr_data(category_list)
#清空原表
conn = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708', database='price_calculate',autocommit=True)
cursor = conn.cursor()
cursor.execute("delete from configure_price_difference")
create_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
#sku_list = list()
#spuid_list = list()
#configure_difference_list = list()
for category_id in category_id_list:
category = category_dict[category_id]
cat_params_df = params_df[params_df['categoryname'] == category]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
sku_source_list = spu_sku_df[spu_sku_df['categoryname'] == category]['sku'].unique().tolist()
process_index = 0
index = Index()
for sku in sku_source_list:
print(index(process_index,len(sku_source_list)-1), end='%')
process_index+=1
skuname = str(spu_sku_df[spu_sku_df['sku'] == sku]['skuname'].tolist()[0]).replace("'","''")
categoryid = spu_sku_df[spu_sku_df['sku'] == sku]['categoryid'].tolist()[0]
brandid = spu_sku_df[spu_sku_df['sku'] == sku]['brandid'].tolist()[0]
spuid = spu_sku_df[spu_sku_df['sku'] == sku]['spuid'].tolist()[0]
configure_difference = calculate_configure_price(cat_params_df,sku,cat_configure_price_df)
cursor.execute(f"insert into configure_price_difference (sku,skuname,categoryid,brandid,spuid,price_difference,create_time) values ('{sku}','{skuname}','{categoryid}','{brandid}','{spuid}',{configure_difference},'{create_time}')")
#sku_list.append(sku)
#spuid_list.append(spuid)
#configure_difference_list.append(configure_difference)
print(f"{category}计算完成")
conn.close()
def create_price_difference_single(category,sku_list):
#创建SKU 与 SPU 配件差价表
category_list = [category]
configure_price_df = get_configure_price()
spu_sku_df = get_product_code(category_list)
params_df = get_attr_data(category_list)
#清空原表
conn = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708', database='price_calculate',autocommit=True)
cursor = conn.cursor()
#cursor.execute("delete from configure_price_difference")
create_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
#sku_list = list()
#spuid_list = list()
#configure_difference_list = list()
cat_params_df = params_df[params_df['categoryname'] == category]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
#sku_source_list = spu_sku_df[spu_sku_df['categoryname'] == category]['sku'].unique().tolist()
for sku in sku_list:
skuname = str(spu_sku_df[spu_sku_df['sku'] == sku]['skuname'].tolist()[0]).replace("'","''")
categoryid = spu_sku_df[spu_sku_df['sku'] == sku]['categoryid'].tolist()[0]
brandid = spu_sku_df[spu_sku_df['sku'] == sku]['brandid'].tolist()[0]
spuid = spu_sku_df[spu_sku_df['sku'] == sku]['spuid'].tolist()[0]
configure_difference = calculate_configure_price(cat_params_df,sku,cat_configure_price_df)
cursor.execute(f"select sku from configure_price_difference where sku = '{sku}'")
check_df = pd.DataFrame(cursor.fetchall(),columns=[tuple[0] for tuple in cursor.description])
if check_df.empty:
cursor.execute(f"insert into configure_price_difference (sku,skuname,categoryid,brandid,spuid,price_difference,create_time) values ('{sku}','{skuname}','{categoryid}','{brandid}','{spuid}',{configure_difference},'{create_time}')")
else:
cursor.execute(f"update configure_price_difference set price_difference = {configure_difference} , update_time = '{create_time}' where sku = '{sku}'")
#sku_list.append(sku)
#spuid_list.append(spuid)
#configure_difference_list.append(configure_difference)
print(f"{category}新增配件差价计算完成")
conn.close()
def check_configure_price(category,sku_list):
category_list = [category]
configure_price_df = get_configure_price()
params_df = get_attr_data(category_list)
cat_params_df = params_df[params_df['categoryname'] == category]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
#sku_source_list = spu_sku_df[spu_sku_df['categoryname'] == category]['sku'].unique().tolist()
for sku in sku_list:
filter_df = cat_params_df[cat_params_df['sku'] == sku]
for name,value in zip(list(filter_df['name']),list(filter_df['value'])):
if name not in list(cat_configure_price_df['part']):
continue
try:
current_price = cat_configure_price_df[(cat_configure_price_df['part'] == name) & (cat_configure_price_df['detail'] == value)]['price'].tolist()[0]
except:
print("无该配件价格:",category,name,value)
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 13 11:42:04 2020
@author: rico
"""
import hashlib
from API import zgc_api
from db import MSSQL
import pandas as pd
from public_function import *
'''
#加载数据
def LoadData():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute("select * from product_all where sku is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df
'''
def hash_name(name):
m = hashlib.md5(name.encode("utf8"))
hasn_name = m.hexdigest()
return hasn_name
def get_pre_deal():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute("select * from product_all where sku is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
#创建数据库链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
cursor_zi_new.execute("select * from p_brand where id not in (select pid from p_brand)")
brand_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description]).fillna('无品牌信息')
cursor_zi_new.execute("select * from p_category where id not in (select pid from p_category)")
category_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#hash产品名称至SKU
df['sku'] = df['name'].apply(lambda x: hash_name(x))
name_list = df['name'].tolist()
#获取预测类别结果
data = {
"category_info": {
"cust_name_list": name_list,
"url_name_list":name_list
}
}
res = zgc_api("GetPredictCategory",data)
df['predict_cat'] = res['category_list']
df['predict_cat_code'] = res['categoryCode_list']
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
id_ = row['id']
sku = row['sku']
name = row['name'].strip().upper()
print(index_(counter, len(df)-1), end='%')
counter += 1
pre_cat = row['predict_cat']
pre_cat_code = row['predict_cat_code']
zi_brandname = '未抽取到数据'
zi_brandcode = '未抽取到数据'
zi_categoryname = '未抽取到数据'
zi_categorycode = '未抽取到数据'
#抽取品牌
search_name = name.split(' ')[0]
for index_b,row_brand in brand_df.iterrows():
brandcode = row_brand['id']
brandname = row_brand['name']
cname = row_brand['cname']
ename = row_brand['ename'].upper()
if len(cname) == 1 or len(ename) == 1:
continue
if len(re.findall(r"[\u4e00-\u9fa5]", search_name)) == 0:
if ename in search_name:
zi_brandname = brandname
zi_brandcode = brandcode
break
else:
if cname in search_name:
zi_brandname = brandname
zi_brandcode = brandcode
break
#抽取类别
for index_c,row_cat in category_df.iterrows():
categorycode = row_cat['id']
categoryname = row_cat['name']
categoryname_list = row_cat['name'].split('/')
for category in categoryname_list:
if category in name:
zi_categoryname = categoryname
zi_categorycode = categorycode
break
if zi_categoryname == '未抽取到数据':
zi_categoryname = pre_cat
zi_categorycode = pre_cat_code
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = {zi_brandcode},\
zi_subcategoryname = '{zi_categoryname}',zi_subcategorycode = {zi_categorycode}, \
predict_category = '{pre_cat}',predict_category_code = {pre_cat_code},sku = '{sku}' where id = {id_}")
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 9 23:37:15 2020
@author: rico
"""
import pymssql
import pandas as pd
import os
import requests
import tensorflow as tf
from lxml import etree
import re
import datetime
import json
import time
#from text_moudle.run_cnn import name2subcategory as generl_name2subcategory
#from text_moudle_LXWL.run_cnn import name2subcategory as LXWL_name2subcategory
#print (os.getcwd())
class Index(object):
def __init__(self, number=50, decimal=2):
"""
:param decimal: 你保留的保留小数位
:param number: # 号的 个数
"""
self.decimal = decimal
self.number = number
self.a = 100/number # 在百分比 为几时增加一个 # 号
def __call__(self, now, total):
# 1. 获取当前的百分比数
percentage = self.percentage_number(now, total)
# 2. 根据 现在百分比计算
well_num = int(percentage / self.a)
# print("well_num: ", well_num, percentage)
# 3. 打印字符进度条
progress_bar_num = self.progress_bar(well_num)
# 4. 完成的进度条
result = "\r%s %s" % (progress_bar_num, percentage)
return result
def percentage_number(self, now, total):
"""
计算百分比
:param now: 现在的数
:param total: 总数
:return: 百分
"""
return round(now / total * 100, self.decimal)
def progress_bar(self, num):
"""
显示进度条位置
:param num: 拼接的 “#” 号的
:return: 返回的结果当前的进度条
"""
# 1. "#" 号个数
well_num = "#" * num
# 2. 空格的个数
space_num = " " * (self.number - num)
return '[%s%s]' % (well_num, space_num)
def match_sku(sku_list,frm):
'''
对供应商提供的链接进行排重
frm : DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
'''
#sku_list = ['11867713605']
#frm = 'SN'
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
conn_zi_database = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_DataBase',autocommit=True)
cursor_zi_database = conn_zi_database.cursor()
#print('正在删除库中产品状态为6的sku,稍等。。。')#首先将库中错误状态产品的JD/SN/GM的SKU删除
#cursor.execute("DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')")
#print('正在删除库中重复的SKU,稍等。。。')#需先删除库中重复的SKU,避免匹配相同的编码
'''
cursor.execute("declare OperCursor Cursor for\
SELECT productcode,sku,frm,count(*) ca FROM productcode_sku\
GROUP BY productcode,sku,frm\
HAVING COUNT(*)>1\
open OperCursor\
declare @PRODUCTCODE as nvarchar(20)\
declare @sku as nvarchar(50)\
declare @frm as nvarchar(20)\
declare @ca as int\
declare @return_value as int\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
\
while @@fetch_status=0\
begin\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
end\
close OperCursor\
deallocate OperCursor")
conn.commit()
'''
print('正在获取当前库中所有sku,请稍等.....')#获取库中所有的SKU,以供检查
try:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm in {frm} and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
except:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm='{frm}' and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
#获取op_productcode_sku
if frm == 'SN':
frm = 'SN-NEW'
get_all_sku = f"select productcode,sku from op_product_sku where frm='{frm}' and productcode not like '78%' "
cursor_zi_database.execute(get_all_sku)
data_sku = (cursor_zi_database.fetchall())
data_sku_file_op = pd.DataFrame(data_sku,columns=['productcode','sku'])
data_sku_file = pd.concat([data_sku_file,data_sku_file_op]).drop_duplicates()
data_sku_file['productcode'] = data_sku_file['productcode'].apply(lambda x:x.strip())
data_sku_file['sku'] = data_sku_file['sku'].apply(lambda x:x.strip())
print('sku获取完毕')
conn.close()
cursor.close()
conn_zi_database.close()
cursor_zi_database.close()
sku_check = {}
for sku in set(sku_list):
sku = sku.strip()
#print(sku)
#sku = 100004460761
if str(sku) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
if frm == 'SN' or frm == 'SN-NEW':
sku_add = '0000000000/'+ str(sku)
if str(sku_add) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku_add) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
sku_check[f'{sku}'] = '0'
else:
sku_check[f'{sku}'] = '0'
return sku_check
def supporturlDataDeal(source,batch):
'''
对反爬的数据进行审核排重
:return:
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'reverse_data',autocommit=True)
cursor = conn.cursor()
cursor.execute(f'select * from product where channel_url_validate is Null and 来源={source} and batch={batch}')
data = (cursor.fetchall())
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])[['product_id', 'channel_sku', 'product_name', 'channel_id', 'channel_product_id']]
df.rename(columns={'product_id': 'id', 'channel_sku': 'sku', 'product_name': 'name', 'channel_id': 'source','channel_product_id': 'url'}, inplace=True)
df['price'] = '0'
df_otherDS = df[(df['source'] != 'JD') & (df['source'] != 'GM') & (df['source'] != 'SN')]
for id_ in df_otherDS['id'].tolist():
cursor.execute(f"update product set channel_url_validate='通过',zgc_productcode='无' where product_id='{id_}'")
df_DS = df[(df['source'] == 'JD') | (df['source'] == 'GM') | (df['source'] == 'SN')].reset_index(drop=True)
# 审核三大电商
check_data = check_reverse_data(df_DS)
for id_, suggestion in zip(check_data['update_id'], check_data['审核意见']):
cursor.execute(f"update product set channel_url_validate='{suggestion}' where product_id='{id_}'")
# 三大电商SKU与库内排重
for source in list(df_DS['source'].unique()):
print(source + 'sku排重中')
sku_list = df_DS[df_DS['source'] == source]['sku'].tolist()
sku_check = match_sku(sku_list, source)
for k, v in sku_check.items():
if v == '0':
cursor.execute(f"update product set zgc_productcode='无' where channel_sku='{k}'")
else:
cursor.execute(f"update product set zgc_productcode='{v}' where channel_sku='{k}'")
print(source + 'sku排重完毕')
conn.close()
def check_reverse_data(check_data):
'''
审核爬到的数据
:param check_data: 需要审核的数据
:return: 审核完毕的数据
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
cursor.execute('select stop_word,white_word from Stopwords')
data = (cursor.fetchall())
word_df = pd.DataFrame(data, columns=['stopword', 'whiteword'])
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
if check_data.empty:
print('今日无新增数据')
return check_data
print('共' + str(len(check_data)) + '条数据待审核')
result = []
id_all = []
sku_list = []
name_list = []
url_list = []
source_list = []
price_list = []
for i in range(len(check_data)):
df = check_data.loc[i]
date_id = df['id']
sku = df['sku']
name = df['name']
main_url = df['url'].strip()
source = df['source']
price = df['price']
brand = df['brand']
subcategory = df['category']
print(main_url)
if "jd" in str(main_url):
try:
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = get_response(session, url, headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
jd_price = price
if jd_price == '-1.00':
jd_price = price
price_list.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
price_list.append(jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in is_purchase['stock'][
'StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "gome" in str(main_url):
try:
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
price_list.append(gm_price)
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = get_response(session, url, headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "suning" in str(main_url):
try:
# main_url = 'http://product.suning.com/0000000000/10643583782.html'
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = price
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = get_response(session, url_json, headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1':
if '此款有货' in str(youhuo_):
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
str_price = html.xpath("//a[@id='addCart2']/@sa-data")
str1 = re.findall("'prdid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str2 = re.findall("'shopid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str3 = html.xpath("//input[@name='procateCode']/@value")[0] # 为了拼接所需要的url,需要三个字段
real_url = f'https://pas.suning.com/nspcsale_0_{str1}_{str1}_{str2}_10_010_0100100_157122_1000000_9017_10106_Z001___{str3}.html?callback=pcData'
try:
price_response = requests.get(real_url, timeout=5)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
except:
sn_price = price
if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result.append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result.append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
#if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result. append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result. append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('非自营,请按要求提供在销渠道证明')
result.append('非自营,请按要求提供在销渠道证明')
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非三大电商,请按要求提供在销渠道证明")
result.append("非三大电商,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
print(f"已经处理{i + 1}条数据")
check_data['审核意见'] = result
check_data['update_id'] = id_all
check_data['sku'] = sku_list
check_data['name'] = name_list
check_data['url'] = url_list
check_data['source'] = source_list
check_data['price'] = price_list
conn.close()
return check_data
def get_response(session, url, headers):
'''
deal timeout request
'''
network_status = True
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
network_status = False
if network_status == False:
'''timeout'''
for i in range(1, 10):
print('请求超时,第%s次重复请求' % i)
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
continue
return -1
def check_data(fileName,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
reverse_conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='reverse_data',autocommit=True)
reverse_cursor = reverse_conn.cursor()
result = []
price = []
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
cursor.execute("select stop_word,white_word from stopwords")
stopwords = (cursor.fetchall())
word_list = pd.DataFrame(stopwords, columns=['stopword', 'white_word'])
#获取需要审核的政采的供应商的id
#cursor.execute("select id from users where frm is not null and frm != ''")
#need_check_id_from_zc = [i[0] for i in cursor.fetchall()]
# 审核链接
for i in range(len(fileName)):
df = fileName.loc[i]
if 'otherurl' in df.index.tolist():
if df['otherurl']:
try:
otherurl = eval(df['otherurl'])[0].strip()
except:
otherurl = df['otherurl'].strip()
else:
otherurl = ''
elif 'supporturl' in df.index.tolist():
other = eval(df['supporturl'].strip())
#other = df['supporturl'].strip()
if isinstance(other,list) and len(other) != 0:
otherurl = other[0]
else:
otherurl = other
else:
otherurl = ''
print('该渠道为API流程中的没有提供其他链接的渠道,其product_all表既没有other字段,也没有supporturl字段')
if df['url']:
try:
main_url = eval(df['url'])[0].strip()
except:
main_url = df['url'].strip()
else:
main_url = otherurl
if "jd" not in str(main_url) and "gome" not in str(main_url) and "suning" not in str(main_url):
main_url = otherurl
# if len(main_url) == 0 and len(otherurl) != 0:
# main_url = otherurl
print(f'开始处理:{main_url}')
if len(main_url) == 0:
print("其他销售渠道证明")
result.append("其他销售渠道证明")
price.append("其他销售渠道价格")
elif "jd" in str(main_url) or "gome" in str(main_url) or "suning" in str(main_url):
if "jd" in str(main_url):
if "i-item" in str(main_url):
jd_price = df['price']
price.append(jd_price)
result.append("通过")
else:
try:
try_ = session.get(main_url, headers=headers)
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = session.get(url, headers=headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
print('未获取到价格,使用供应商提交价格!!!')
jd_price = df['price']
if jd_price == '-1.00':
jd_price = df['price']
price.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
else:
print('价格为:', jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath("//div[@class='sku-name']/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or
# ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name))
# or ("军迷"in str(name)) or ("携行具"in str(name)) or ("酒"in str(name) and "酒精" not in str(name))
# or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name))
# or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name))
# or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name))
# or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
# name = "地图"
pass_word_jd = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_jd = '暂通过'
else:
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
if pass_word_jd == '暂通过':
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(
sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in \
is_purchase['stock']['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(jd_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "gome" in str(main_url):
# 获取价格
try:
try_ = session.get(main_url, headers=headers)
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("酒"in str(name) and "酒精" not in str(name)) or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name)) or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name)) or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name)) or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
pass_word_gm = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_gm = '暂通过'
else:
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
if pass_word_gm == '暂通过':
# print("定制/专用/含禁止上架关键词,产品暂不通过")
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = session.get(url, headers=headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(gm_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "suning" in str(main_url):
# sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
# main_url = 'https://product.suning.com/0000000000/11768660427.html?safp=d488778a.13701.productWrap.2&safc=prd.3.ssdsn_pic01-1_jz'
print(f'苏宁:{main_url}')
try:
try_ = session.get(main_url, headers=headers)
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = df.price
# sn_price = '58.00-558.00'
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = session.get(url_json, headers=headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1' or isPublished == '0':
if '此款有货' in str(youhuo_):
state = '有货'
# if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
# or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
# or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
# or ("军迷"in str(product_name)) or ("携行具"in str(product_name)) or ("酒"in str(product_name) and "酒精" not in str(product_name)) or ("军用" in str(product_name)) or ("作战" in str(product_name)) or ("部队" in str(product_name)) or ("迷彩" in str(product_name)) or ("军队" in str(product_name)) or ("陆军" in str(product_name)) or ("海军" in str(product_name)) or ("空军" in str(product_name)) or ("火箭军" in str(product_name)) or ("涉密" in str(product_name)) or ("保密" in str(product_name)) or ("补给单元" in str(product_name)) or ("书籍" in str(product_name)) or ("出版物" in str(product_name)) or ("地图" in str(product_name)):
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# price.append(sn_price)
pass_word_sn = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_sn = '暂通过'
else:
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
if pass_word_sn == '暂通过':
# else:
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
if len(str22) == 0:
str22 = '0000000000'
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_0000000000_10_010_0100100_501126_1000000_9017_10106_Z001___{str33}_1.0_0___000321NJB____0__.html?callback=pcData'
# real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"refPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) == 0:
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
else:
pass
else:
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) != 0:
try:
sn_price = float(sn_price)
price.append(sn_price)
result.append('通过')
except:
print(f'该链接获取的价格有问题:{sn_price},{main_url_}')
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
result.append('定制/专用/含禁止上架关键词,产品暂不通过')
price.append(sn_price)
else:
state = '无货,请按要求提供在销渠道证明'
sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
state = "无货,请按要求提供在销渠道证明"
# sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
result.append('非自营,请按要求提供在销渠道证明')
price.append(df['price'])
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
else:
#系统申请的、非三大电商的、有供应商的需要判断停用词
if 'otherurl' in df.index.tolist():
#main_url = "http://shanxi.gpmart.cn/productInfo/3300947.html"
# 去reverse_data获取对应id的产品名称和价格
print(main_url)
try:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
except:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}'")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
# 非系统申请、非三大电商
# 去reverse_data获取对应id的产品名称和价格
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['sku']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
if name_price:
print(f'name_price:{name_price},价格:{str(name_price[1])}')
name = name_price[0]
price_ = str(name_price[1])
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
print(f"已经处理{i + 1}条数据\n")
if len(result)!=len(price):
print("问题链接:",main_url)
break
print(f'审核意见:{len(result)}')
print(f'price:{len(price)}')
print(f'总条数:{len(fileName)}')
fileName['审核意见'] = result
fileName['price'] = price
urlss_all = []
for i in range(len(fileName)):
dw = fileName.loc[i]
if dw.url:
url_u = dw.url.strip()
if len(url_u) == 0:
pass
elif "jd" in str(url_u) or "gome" in str(url_u) or "suning" in str(url_u):
print(url_u)
try:
if "jd" in str(url_u):
skuu = url_u.split('/')[-1].split('.')[0]
elif "suning" in str(url_u):
skuu = re.findall(".com/(.*?).html", url_u)[0]
elif "gome" in str(url_u):
skuu = re.findall(".cn/(.*?).html", url_u)[0]
# print(skuu)
if str(skuu) in urlss_all:
fileName.loc[i, '审核意见'] = '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等'
else:
urlss_all.append(skuu)
except:
fileName.loc[i, '审核意见'] = '链接有误,请按要求提供在销渠道证明'
else:
pass
print('更新状态中。。。')
id_pass = []
id_nopass = []
id_nojd = []
id_noname = []
id_qita = []
id_sku = []
id_buxiaoshou = []
id_ljcw = []
id_suning = []
id_cfcf = []
id_noding = []
# elif suggestion == '苏宁产品链接略过,人工审核':
# id_suning.append(id_name)
# elif sugestion == '已在库中':
# id_sku.append(id_name)
if source == 'zi_zh':
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['product_zh_id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_zh set state='1' where id= '{id_num}'")
cursor.execute(f"update product_all set state='1' where product_zh_id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_zh set state='2',remark='无货,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='无货,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_zh set state='2',remark='非自营,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='非自营,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_zh set state='1',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where product_zh_id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到最终产品,存在多个价格' where product_zh_id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_zh set state='1',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='链接有误,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_zh set state='1',remark='其他销售渠道证明,需人工审核' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='其他销售渠道证明,需人工审核' where product_zh_id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_zh set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where product_zh_id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到唯一商品' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到唯一商品' where product_zh_id='{i}'")
conn.commit()
else:
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_all_api set state='1' where id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_all_api set state='2',remark='无货,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_all_api set state='2',remark='非自营,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_all_api set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_all_api set state='2',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_all_api set state='2',remark='其他销售渠道证明,需人工审核' where id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_all_api set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到唯一商品' where id='{i}'")
conn.commit()
print('更新完成~')
def get_zgc_brand_info(url_brand_list,brand_list):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute('select id,name,cname,ename from p_brand where id not in (select distinct(pid) from p_brand where pid <> 0)') # 不匹配主品牌
data = (cursor.fetchall())
brand_table = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates().fillna('EMPTY')
brand_table['name'] = brand_table['name'].apply(lambda x : str(x).strip().upper())
brand_table['cname'] = brand_table['cname'].apply(lambda x : str(x).strip().upper())
brand_table['ename'] = brand_table['ename'].apply(lambda x : str(x).strip().upper())
zgc_brand_list = []
zgc_brand_code_list = []
for brand,source_brand in zip(url_brand_list,brand_list):
combine_list = [brand,source_brand]
temp_name = []
temp_code = []
for brand in combine_list:
if brand:
if "联想" in str(brand):
brand = "联想"
#if "国产" in str(brand):
# brand = "错误信息"
else:
brand = '无信息'
brand = str(brand)
try:
country = brand.split('[')[1].split(']')[-2]
brand = brand.replace(country, '')
except IndexError:
pass
cn_res = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', brand))
if cn_res.isdigit():
cn_res = ''
cnbrand = cn_res
en_res = (''.join(re.findall(r'[a-zA-Z0-9-]', brand))).upper()
if en_res.isdigit():
en_res = ''
enbrand = en_res
if cnbrand == '':
cnbrand = '无对应'
if enbrand == '' or len(enbrand) == 1:
enbrand = '无对应'
findres = [[brandcode, name] for brandcode, enname,name in
zip(brand_table['id'].tolist(), brand_table['ename'].tolist(),brand_table['name'].tolist()) if enbrand == str(enname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
if brand == "联想":
findres = [[brandcode, name] for brandcode, cnname,enname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['ename'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname) or "THINKPAD" in str(enname)]
else:
findres = [[brandcode, name] for brandcode, cnname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
findres = [[brandcode, name] for brandcode,name in
zip(brand_table['id'].tolist(), brand_table['name'].tolist()) if brand == str(name)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
temp_name.append('无对应')
temp_code.append('无对应')
continue
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
temp_name.append('无对应')
temp_code.append('无对应')
zgc_brand_list.append(temp_name)
zgc_brand_code_list.append(temp_code)
conn.close()
return zgc_brand_list, zgc_brand_code_list
def get_zgc_classify_info(classify_list,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_DataBase')
cursor = conn.cursor()
if source == 'zi_zh':
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
#subcategory_name = '打印机'
zd_category_single = []
zd_category_code_single = []
cursor.execute(f"select a.subcategoryname,b.SubCategoryCode from op_zh_zd_type a \
left join VW_Relation_Property b \
on a.subcategoryname = b.SubCategoryName \
where a.second = '{subcategory_name}'")
data = (cursor.fetchall())
data = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None or "错误" in name:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
if zd_category_single:
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
else:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
return zd_category,zd_category_code
else:
source = source.replace('_', '').replace('ZH', '')
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
zd_category_single = []
zd_category_code_single = []
cursor.execute(f'''select b.name 'subcategoryname',a.ZI_SubCategoryCode 'SubCategoryCode' from Product_Relation_Attribute_SubTitle a LEFT JOIN ZI_SubCategory b
on a.zi_subcategorycode=b.subcategorycode
where a.source='{source}' and a.SourceSubCategory='{subcategory_name}' ''')
data = (cursor.fetchall())
data = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
return zd_category, zd_category_code
def remove_error_productcode(productcodes):
'''
排除productcode对应的子类或品牌包含错误两个字的productcode
:param productcodes: 排重之后的productcode列表
:return: 删除错误品牌之后的新productcode列表
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_DataBase', autocommit=True)
cursor = conn.cursor()
error_code = []
sql = '''
select distinct c.productcode,c.zi_brandname,d.name 'zi_subcategoryname' from
(select a.productcode,a.subcategorycode,b.name 'zi_brandname' from info_product a left join zi_brand b on a.brandcode=b.brandcode) c
left join zi_subcategory d on c.subcategorycode=d.subcategorycode
where c.productcode in (%s)
''' % ','.join(['%s'] * len(productcodes))
cursor.execute(sql,tuple(productcodes))
all_productcodes = cursor.fetchall()
for i in all_productcodes:
if '错误' in i[1] or '错误' in i[2]:
error_code.append(i[0])
return error_code
def remove_old_productcode(productcodes):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW', autocommit=True)
cursor = conn.cursor()
error_code = []
for code in productcodes:
cursor.execute(f"select sku from p_sku where sku = '{code}'")
check_df = pd.DataFrame(cursor.fetchall(), columns=[tuple[0] for tuple in cursor.description])
if check_df.empty:
error_code.append(code)
else:
pass
conn.close()
return error_code
def get_cn_number(char):
"""
判断字符串中,中文的个数
:param char: 字符串
:return:
"""
count = 0
for item in char:
if 0x4E00 <= ord(item) <= 0x9FA5:
count += 1
return count
def judge_unit(string):
unit_list = ['MM','CM', 'DM', 'ML', 'W', 'KW','KG','G','M','L','KBPS','MMM','P','V','KM']
error_unit_list = ['公斤','NULL','PVC','MONITOR','QIANG','I3','I5','I7','I9','LED','NO','寸','USB','OPTIX','OSN','ZXMP','BASE','ZFSD']
for error_unit in error_unit_list:
if error_unit in string:
return False
if get_cn_number(string) >= 3:
return False
elif not string[0].isdigit():
return True
m = 0
n = 0
for char in string:
if char.isdigit() or char == '.' or char == '-' or char == 'X':
m += 1
if n != 0:
m = m-1
break
elif char.isalpha():
n += 1
if string[m:m+n].upper() in unit_list:
return False
return True
def get_model(name, params, brand):
#name = '得力(deli)A4浅红色复印纸 彩色打印纸手工折纸卡纸 ZFSD 80g100张/包 7757'
#params = "{'产品型号': '得力7757', '产品详细参数': '得力\t彩色复印纸 A4 80g 210*297mm 100张/包 浅红 '}"
#brand = '[得力]'
try:
params = eval(params)
except:
params = ''
try:
name = name.split('(')[0] + name.split(')')[1]
except:
try:
name = name.split('(')[0] + name.split(')')[1]
except:
name = name
brand = str(brand)
brand_flag = 0
speical_brand_list = ['得力','世达','华为']
for special_brand in speical_brand_list:
if special_brand in brand:
brand_flag = 1
try:
brand_remove = re.findall(r"[A-Za-z0-9-]+", brand)[0].upper()
except IndexError:
brand_remove = '没有英文品牌!'
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
elif '产品型号' in params.keys():
param_model = params['产品型号']
elif '型号' in params:
param_model = params['型号']
else:
pass
param_model = param_model.upper()
param_model = param_model.replace(brand_remove,'')
if param_model:
if judge_unit(param_model) == False:
param_model = ''
if '*' in param_model:
param_model = ''
if '×' in param_model:
param_model = ''
if param_model.isdigit() and brand_flag !=1:
param_model = ''
else:
param_model = ''
name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-]+", name)))
for i in range(len(name_xinghao_lyst)):
name_xinghao_lyst[i] = name_xinghao_lyst[i].upper()
name_xinghao_lyst[i] = name_xinghao_lyst[i].replace(brand_remove,'')
if name_xinghao_lyst[i]:
if judge_unit(name_xinghao_lyst[i]) == False:
name_xinghao_lyst[i] = '?'
if '*' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = '?'
if '×' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = ''
if name_xinghao_lyst[i].isdigit() and brand_flag !=1:
name_xinghao_lyst[i] = '?'
else:
name_xinghao_lyst[i] = '?'
if len(name_xinghao_lyst) == 0:
return_model = param_model
else:
return_model = max(name_xinghao_lyst, key=len)
if param_model:
res_model = param_model
else:
res_model = return_model
#res_model = max([param_model,return_model], key=len)
if res_model == '?' or res_model == '':
res_model = '无型号'
if '/T' in res_model or '\T' in res_model:
res_model = res_model.replace('/T','').replace('\T','')
return res_model
'''
if len(name_xinghao_lyst) == 0:
#type_lyst.append(param_xinghao.upper())
if param_xinghao.isdigit():
param_xinghao = '无型号'
return param_xinghao
else:
return param_xinghao.upper()
else:
if param_xinghao in name_xinghao_lyst:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
xinghao_data = max(name_xinghao_lyst, key=len)
for xinghao in name_xinghao_lyst:
if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
xinghao_data = xinghao
break
if not judge_unit(xinghao_data):
xinghao_data == '无型号'
#type_lyst.append(xinghao_data.upper())
return xinghao_data
'''
def predict_category(name_list):
'''
获取预测类别结果
:param name_list: 原始名称
:return: 预测结果列表
'''
os.chdir("./text_moudle_LXWL")
tf.reset_default_graph()
model = LXWL_name2subcategory()
category_list = model.namelyst_predict(name_list)
os.chdir("../")
return category_list
def fuzzy_match(zgc_brand_code_list,model_list,price_list):
#获取库中所有产品信息
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou202006','ZI_NEW')
cursor = conn.cursor()
cursor.execute(f"select a.sku,a.skuname,b.brandid,c.index_price_wave from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join (select goods_id,index_price_wave from zdindex.dbo.zd_week_price where periods in (select top 1 periods from zdindex.dbo.zd_entry_goods_price)) c \
on a.sku = c.goods_id\
where a.state in ('1','4')")
data = cursor.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
res_name_list = []
res_code_list = []
for brand_code,model,price in zip(zgc_brand_code_list,model_list,price_list):
try:
price = int(float(price))
except:
price = 0
try:
brand_df = df_db[df_db['brandid'] == int(brand_code)]
except:
brand_df = df_db[df_db['brandid'] == str(brand_code)]
temp_df = pd.DataFrame()
temp_res_name_list = []
temp_res_code_list = []
temp_res_price_diff_list = []
for name,product_code,index_price in zip(brand_df['skuname'].tolist(),brand_df['sku'].tolist(),brand_df['index_price_wave'].tolist()):
if index_price == None:
index_price = 0
if str(model).strip().upper() in name.upper():
if product_code not in temp_res_code_list:
temp_res_name_list.append(name)
temp_res_code_list.append(product_code)
temp_res_price_diff_list.append(abs(price-index_price))
temp_df['name'] = temp_res_name_list
temp_df['code'] = temp_res_code_list
temp_df['diff'] = temp_res_price_diff_list
temp_df = temp_df.sort_values('diff',ascending=True).head(3)
res_name_list.append(temp_df['name'].tolist())
res_code_list.append(temp_df['code'].tolist())
return res_name_list,res_code_list
def standard_point_sku_name(category_name,name_rules,cat_subtitle_df):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 09:59:23 2020
@author: rico
"""
update nopoint_match_result set product = '' where productcode is Null
from API import zgc_api
from db import MSSQL
import pandas as pd
import time
from public_function import *
import uuid
import datetime
from delCalPrice_UpdatePriceDiff import check_configure_price,create_price_difference_single
#初始化数据
def InitializeData(path,channel_alias):
#加载上传数据
df_product = pd.read_excel(path,converters = {'供应商SKU':str})
df_product = df_product.fillna('无')
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
insert_data = list()
#实例化进度条
index_ = Index()
counter = 1
#上传产品数据
for index,row in df_product.iterrows():
print(index_(counter, len(df_product)-1), end='%')
counter += 1
category = row['三级品类']
brand = row['品牌']
sku = row['供应商SKU']
productname = row['商品名称']
channel = row['客户名称']
channel_alias = channel_alias
params_ori = str(row['参数值'])
batch = time.strftime("%Y-%m-%d",time.localtime())
#处理参数
if channel_alias == 'CQ':
try:
params = str(dict(zip([kv['key'].replace(':','') for kv in eval(row['参数值'])],[kv['value'] for kv in eval(row['参数值'])])))
except:
params = ""
else:
params = str(row['参数值'])
if params == "无":
params = "{}"
insert_data.append((sku,productname,brand,category,params,url,price,channel,channel_alias,channel_alias,batch,params_ori))
cursor_zi_service.executemany(f"insert into product_all (sku,name,brand,category,params,url,price,channel,channel_id,channel_alias,batch,params_ori) \
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",insert_data)
mssql.Close()
if df_category_map.empty:
pass
else:
#上传类别对应关系
data = {
"params_info": {
"channelAlias": f"{channel_alias}",
"cust_category_list": df_category_map['三级类别'].tolist(),
"category_list": df_category_map['指数末级类'].tolist(),
"categoryId_list": df_category_map['指数末级类编码'].tolist()
}
}
print(zgc_api("Stock-InCategoryRelationshipToServerInfo",data))
if df_attr_map.empty:
pass
else:
#上传参数项对应关系
data = {
"params_info": {
"channelAlias": f"{channel_alias}",
"category_list": df_attr_map['指数末级类'].tolist(),
"paramsAttr": df_attr_map['指数参数项'].tolist(),
"cust_paramsAttr_list": df_attr_map['参数项'].tolist()
}
}
print(zgc_api("Stock-InAttrRelationshipInfo",data))
#加载数据
def LoadData(batch,channel_alias,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
if batch == "all":
if mode == 'deal':
cursor_zi_service.execute("select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute("select * from product_all")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
if mode == 'deal':
cursor_zi_service.execute(f"select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute(f"select * from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df
#SKU排重
def SkuMatch(df,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#初始化参数
id_list = df['id'].tolist()
sku_list = df['sku'].tolist()
name_list = df['name'].tolist()
batch = df['batch'].unique().tolist()[0]
#获取SKU匹配结果
data = {
"params_info": {
"channelAlias": f"{channel_alias}",
"cust_sku_list": sku_list
}
}
res = zgc_api("GetSKUMatchResInfo",data)
#排重失败
if res['code'] == 0:
return res['msg']
#排重成功
res_dict = res['res_dict']
#实例化进度条
index_ = Index()
counter = 1
#更新排重信息
for id_,sku,name in zip(id_list,sku_list,name_list):
print(index_(counter, len(id_list)-1), end='%')
counter += 1
#查询历史数据
cursor_zi_service.execute(f"select zi_brandname,zi_brandcode,zi_subcategoryname,zi_subcategorycode,model,params_standard,name from product_all where id = (select max(id) from product_all where batch != '{batch}' and channel_alias = '{channel_alias}' and sku = '{sku}')")
df_his = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
if res_dict[sku] == False:
if df_his.empty:
continue
else:
zi_brandname = df_his['zi_brandname'].tolist()[0].replace("'","''")
zi_brandcode = str(df_his['zi_brandcode'].tolist()[0])
zi_subcategoryname = df_his['zi_subcategoryname'].tolist()[0]
zi_subcategorycode = df_his['zi_subcategorycode'].tolist()[0]
model = df_his['model'].tolist()[0]
try:
params_standard = df_his['params_standard'].tolist()[0].replace("'","''")
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = {zi_subcategorycode},model = '{model}',params_standard = '{params_standard}',state = '8' where id = {id_}")
except:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = {zi_subcategorycode},model = '{model}',state = '8' where id = {id_}")
else:
code = res_dict[sku]
if df_his.empty:
name_his = "往期该渠道无数据"
else:
name_his = df_his['name'].tolist()[0]
cursor_zi_new.execute(f"select skuname from p_sku where sku = '{code}'")
try:
input_name = cursor_zi_new.fetchone()[0]
except:
input_name = '未查询到数据'
if name == name_his:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{code}',new_name = '{input_name}' ,pic_state = '0',is_different = '0' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{code}',new_name = '{input_name}' ,pic_state = '0',is_different = '1' where id = {id_}")
mssql.Close()
mssql_new.Close()
def SkuMatchResult(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select id,name,brand,category,productcode,new_name,is_different,state from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state = '9'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
df.to_excel(f"{channel_alias}-{batch}sku排重结果确认.xlsx")
mssql.Close()
def UpdateSkuMatchResult(path):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
df = pd.read_excel(path,converters = {'productcode':str,'state':str})
for index,row in df.iterrows():
id_ = int(row['id'])
productcode = row['productcode']
new_name = row['new_name']
state = row['state']
if state == '' or state == 'None' or state == 'null':
cursor_zi_service.execute(f"upadte product_all set state = Null,productcode = Null,new_name = Null where id = {id_}")
else:
cursor_zi_service.execute(f"upadte product_all set state = '{state}',productcode = '{productcode}',new_name = '{new_name}' where id = {id_}")
mssql.Close()
#分析获取数据基本信息
def AnalyseBasicInfo(df):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#初始化参数
name_list = df['name'].tolist()
url_name_list = df['name'].tolist()
brand_list = df['brand'].tolist()
url_brand_list = df['brand'].tolist()
category_list = df['category'].tolist()
params_list = df['params'].tolist()
channelAlias_list = df['channel_alias'].tolist()
#获取预测类别结果
data = {
"category_info": {
"cust_name_list": name_list,
"url_name_list":url_name_list
}
}
res = zgc_api("GetPredictCategory",data)
df['predict_cat'] = res['category_list']
df['predict_cat_code'] = res['categoryCode_list']
#获取指数类别信息
data = {
"category_info": {
"cust_category_list": category_list,
"pre_category_list":df['predict_cat'].tolist()
}
}
res = zgc_api("GetZGCCategoryInfo",data)
df['zi_category'] = res['category_list']
df['zi_category_code'] = res['categoryCode_list']
#获取指数品牌信息
data = {
"brand_info": {
"cust_brand_list":brand_list,
"url_brand_list":url_brand_list,
"cust_name_list":name_list,
"url_name_list": url_name_list
}
}
res = zgc_api("GetZGCBrandInfo",data)
df['zi_brandname'] = res['brand_list']
df['zi_brandcode'] = res['brandId_list']
#获取型号信息
try:
params_list_ = [eval(params) for params in params_list]
except:
params_list_ = [{} for params in params_list]
_params_list_ = list()
for params in params_list_:
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
_params_list_.append({'认证型号':param_model})
elif '产品型号' in params.keys():
param_model = params['产品型号']
_params_list_.append({'产品型号':param_model})
elif '型号' in params:
param_model = params['型号']
_params_list_.append({'型号':param_model})
else:
_params_list_.append({})
else:
_params_list_.append({})
params_list_ = [str(params) for params in _params_list_]
data = {
"model_info": {
"channelAlias_list": channelAlias_list,
"cust_category_list":category_list,
"cust_name_list": name_list,
"url_name_list": url_name_list,
"cust_brand_list": brand_list,
"cust_params_list": params_list_
}
}
res = zgc_api("GetModelInfo",data)
df['model'] = res['model_list']
df['model_flag'] = res['modelFlag_list']
insert_data =list()
#实例化进度条
index_ = Index()
counter = 1
#更新基础信息
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
predict_category = row['predict_cat']
predict_category_code = row['predict_cat_code']
zi_subcategoryname = row['zi_category']
zi_subcategorycode = row['zi_category_code']
predict_result = 1 if predict_category == zi_subcategoryname else 0
zi_brandname = row['zi_brandname']
zi_brandcode = row['zi_brandcode']
#获取品牌对比结果
cust_name = str(row['name']).upper()
cust_brand = str(row['brand']).upper()
cn_name = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', zi_brandname))
en_name = (''.join(re.findall(r'[a-zA-Z0-9-]', zi_brandname))).upper()
if cn_name == '':
cn_name = '无数据'
if en_name == '':
en_name = '无数据'
if '新建品牌' in zi_brandname:
brand_match_result = '2'
elif (cn_name in cust_brand and cn_name in cust_name) or (en_name in cust_brand and en_name in cust_name):
brand_match_result = '1'
else:
brand_match_result = '0'
model = row['model']
modelFlag = row['model_flag']
_id = row['id']
insert_data.append((predict_category,predict_category_code,zi_subcategoryname,zi_subcategorycode,predict_result,zi_brandname,zi_brandcode,model,modelFlag,brand_match_result,_id))
cursor_zi_service.executemany("update product_all set predict_category = (%s) ,predict_category_code = (%s) ,zi_subcategoryname = (%s) ,zi_subcategorycode = (%s) ,predict_result = (%s) ,zi_brandname = (%s) ,zi_brandcode = (%s) ,model = (%s),model_flag = (%s),brand_match_result = (%s) where id = (%d)",insert_data)
mssql.Close()
#导出基础信息至excel
def ExportToExcelBasicConfirm(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select id,name,brand,zi_brandname,zi_brandcode,category,zi_subcategoryname,zi_subcategorycode,predict_category,predict_category_code,predict_result,model,model_flag,params,price,url,channel,channel_alias,productcode,remark from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
df.to_excel(f"{channel_alias}基础信息确认{batch}.xlsx")
mssql.Close()
#更新基础信息确认后数据
def UpdateBasicData(path):
#读取Excel数据
df = pd.read_excel(path,sheet_name = "Sheet1",converters={'zi_brandcode':str,'zi_subcategorycode':str})
df['zi_brandcode'] = df['zi_brandcode'].apply(lambda x:str(x))
df['zi_subcategorycode'] = df['zi_subcategorycode'].apply(lambda x:str(x))
df['remark'] = df['remark'].apply(lambda x:str(x))
#创建数据库链接
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#获取重点类列表
cursor_zi_new.execute("select category_name from important_category")
important_cat_list = pd.DataFrame((cursor_zi_new.fetchall()),columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#实例化进度条
index_ = Index()
counter = 1
#更新数据
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
id_ = row['id']
remark = row['remark']
if remark != 'nan':#驳回数据
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{row['remark']}' where id = {id_}")
continue
zi_brandname = row['zi_brandname'].strip().replace("'","''").replace("[","").replace("]","")
zi_brandcode = row['zi_brandcode'].strip().replace("?","").replace("[","").replace("]","")
zi_subcategoryname = row['zi_subcategoryname'].strip()
zi_subcategorycode = row['zi_subcategorycode'].strip().replace("?","")
model = str(row['model']).strip()
if zi_brandcode == 'nan':
cursor_zi_new.execute(f"select id from p_brand where name = '{zi_brandname}'")
zi_brandcode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategorycode == 'nan' and zi_subcategoryname != '该类别非中电类别':
cursor_zi_new.execute(f"select id from p_category where name = '{zi_subcategoryname}'")
zi_subcategorycode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategoryname in important_cat_list:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '1',model = '{model}' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '0',model = '{model}' where id = {id_}")
mssql_service.Close()
mssql_new.Close()
#获取重点类产品数据
def GetPointCategoryData(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where (state is Null or state = '8') and zi_subcategoryname in ('笔记本','台式机','一体电脑','复印纸') and batch = '{batch}' and channel_alias = '{channel_alias}'")
data = cursor_zi_service.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df_db
#解析重点类产品数据并导出
def GetPointCategoryDataDetail(df,channel_alias):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取属性规格项对应关系
cursor_zi_new.execute(f"select * from p_skusubtitle_out_map")
data = cursor_zi_new.fetchall()
subtitle_map_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data = cursor_zi_new.fetchall()
data_dict = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取系列子系列关系
cursor_zi_new.execute("select * from series_relationship")
data = cursor_zi_new.fetchall()
series_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
'''
#依据品牌+型号获取ZOL参数信息(标准参数项信息)
zol_params_list = []
for index,row in df.iterrows():
print(index)
category = row['zi_subcategoryname']
brand = row['zi_brandname']
model = row['model']
data = {
"params_info": {
"category": category,
"brand": brand,
"model": model
}
}
res = zgc_api("GetZOLParamsInfo",data)
zol_params_list.append(res['params_dict'])
df['ZOL_params'] = zol_params_list
'''
#df['ZOL_params'] = []
#标准化链接爬取参数项
zi_category_list = df['zi_subcategoryname'].unique().tolist()
params_res = list()
for zi_category in zi_category_list:
cat_df = df[df['zi_subcategoryname'] == zi_category]
params_list_temp = list()
for params in cat_df['params'].tolist():
params = params.replace('nan',"无")
for key in eval(params):
params_list_temp.append(key)
params_list = list(set(params_list_temp))
data = {
"params_info": {
"category": zi_category,
"cust_paramsAttr_list": params_list
}
}
res = zgc_api("GetZGCParamsInfo",data)
params_res.append(res['paramsAttr_dict'])
params_dict = dict(zip(zi_category_list,params_res))
params_standard_list = []
for index,row in df.iterrows():
category = row['zi_subcategoryname']
name = row['name']
brand = row['zi_brandname']
try:
params = eval(row['params'])
except:
params = {}
key_list = []
value_list = []
for key in params:
if params_dict[category][key] == '无参数项对应关系':
continue
else:
transform_key = params_dict[category][key]
value = params[key]
key_list.append(transform_key)
value_list.append(value)
params_std_dict = dict(zip(key_list, value_list))
#处理产品子系列问题
if category in ['笔记本','台式机','一体电脑']:
flag = False
temp_series_list = []
for child_series,series in zip(series_df[(series_df['子类名称'] == category) & (series_df['品牌名称'] == brand)]['产品子系列'].tolist(),series_df[(series_df['子类名称'] == category) & (series_df['品牌名称'] == brand)]['产品系列'].tolist()):
if child_series.upper() in name.upper():
temp_dict = {"产品系列":series,"产品子系列":child_series}
flag = True
break
elif series.upper() in name.upper():
temp_series_list.append(series)
if flag:
params_std_dict.update(temp_dict)
else:
if temp_series_list:
params_std_dict.update({'产品系列':temp_series_list[0]})
params_standard_list.append(params_std_dict)
print(index+1)
df['params_standard_create'] = params_standard_list
writer = pd.ExcelWriter(f"{channel_alias}重点类参数确认.xlsx")
index = 0
for category in df['zi_subcategoryname'].unique().tolist():
#获取每一个品类的dataframe
cat_df = df[df['zi_subcategoryname'] == category]
cat_df['remark'] = cat_df['remark'].apply(lambda x:str(x))
#获取这个类的属性规格项
single_subtitle_df = subtitle_df[subtitle_df['name'] == category]
#获取这个类的必填属性规格(CPU属性无需填写,系统在建库时自动给出)
requier_param_list = single_subtitle_df[single_subtitle_df['require_param'] == '1']['subtitle'].tolist()
try:
requier_param_list.remove('CPU属性')
except:
pass
#获取这个类的标准属性规格
#standard_param_list = single_subtitle_df[single_subtitle_df['standard_param'] == '1']['subtitle'].tolist()
#获取这个类别的数据字典
cat_data_dict = data_dict[data_dict['categoryname'] == category]
cat_data_dict['stdvalue'] = cat_data_dict['stdvalue'].apply(lambda x:str(x).strip())
cat_data_dict['primitive'] = cat_data_dict['primitive'].apply(lambda x:str(x).strip().upper())
requier_param_var_list = []
for requier_param in requier_param_list:
requier_param_var_list.append(requier_param.replace('/','_').replace('(','_').replace(')','').replace('(','_').replace(')',''))
for requier_param_var in requier_param_var_list:
exec("%s_list=[]"%requier_param_var)
error_flag_list = [0,"0",1,"1","-","/","\\","无","其他",'--','---']
id_list = list()
url_params_list = list()
#zol_params_list = list()
for index,row in cat_df.iterrows():
url_params_dict = row['params_standard_create']
#zol_params_dict = row['ZOL_params']
if row['remark'] != 'nan' and row['remark'] != 'None' :
continue
id_ = row['id']
#category = row['zi_subcategoryname']
id_list.append(id_)
temp_key_list = list()
temp_url_value_list = list()
#temp_zol_value_list = list()
for db_param in requier_param_list:
if db_param in ['产品型号','CPU属性']:
continue
temp_key_list.append(db_param)
try:
url_value = url_params_dict[db_param]
except:
url_value = ''
'''
try:
zol_value = zol_params_dict[db_param]
except:
zol_value = ''
'''
temp_url_value_list.append(url_value)
#temp_zol_value_list.append(zol_value)
url_params_list.append(dict(zip(temp_key_list,temp_url_value_list)))
#zol_params_list.append(dict(zip(temp_key_list,temp_zol_value_list)))
print(f"{category}数据准备完成")
data ={
"params_info": {
"category": category,
"id_list":id_list,
"params_dict_list": url_params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
url_res_dict = res['paramsValue_dict']
'''
data ={
"params_info": {
"category": category,
"id_list":id_list,
"params_dict_list": zol_params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
zol_res_dict = res['paramsValue_dict']
'''
for index,row in cat_df.iterrows():
_id = str(row['id'])
try:
params_standard = eval(row['params_standard'])
except:
params_standard = '无历史参数信息'
if params_standard == '无历史参数信息':
for requier_param,requier_param_var in zip(requier_param_list,requier_param_var_list):
try:
url_value,flag = tuple(url_res_dict[_id][requier_param].items())[0]
url_value = url_value.replace("\t"," ").replace("\n"," ")
except:
url_value = ''
#if url_value == '' or flag == False:
if url_value == '':
exec("%s_list.append('%s')"%(requier_param_var,"暂无数据"))
'''
不使用zol数据
try:
zol_value,flag = tuple(zol_res_dict[_id][requier_param].items())[0]
zol_value = zol_value.replace("\t"," ").replace("\n"," ")
except:
zol_value = ''
if zol_value == '' or flag == False or zol_value == '爬取不到数据':
exec("%s_list.append('%s')"%(requier_param_var,"暂无数据"))
else:
exec("%s_list.append('%s')"%(requier_param_var,zol_value))
'''
else:
exec("%s_list.append('%s')"%(requier_param_var,url_value))
else:
for requier_param,requier_param_var in zip(requier_param_list,requier_param_var_list):
exec("%s_list.append('%s')"%(requier_param_var,params_standard[requier_param]))
for requier_param,requier_param_var in zip(requier_param_list,requier_param_var_list):
exec("cat_df['%s'] = %s_list"%(requier_param,requier_param_var))
columns_list = ['id','name','url_name','brand','url_brand','zi_brandname','zi_brandcode','zi_subcategoryname','zi_subcategorycode','params','model','url','channel','channel_alias','remark','productcode','new_name'] + requier_param_list
export_df = cat_df[columns_list]
#export_df['产品详细参数'] = detail_filled_params_list
category = category.replace('/','_')
export_df.to_excel(writer,f"{category}参数数据")
writer.save()
mssql.Close()
#获取非重点类产品数据
def GetNonpointCategoryData(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where (state is Null or state = '8') and zi_subcategoryname not in ('笔记本','台式机','一体电脑','复印纸') and batch = '{batch}' and channel_alias = '{channel_alias}'")
data = cursor_zi_service.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df_db
#获取非重点类数据详情
def GetNonpointCategoryDataDetail(df_):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#初始化参数
all_channel_df = df_
export_excel = pd.DataFrame()
product_zh_id_ = []
name_ = []
url_name_ = []
zi_brandname_ = []
zi_brandcode_ = []
params_ = []
model_ = []
category_ = []
zi_subcategoryname_ = []
zi_subcategorycode_ = []
url_ = []
price_ = []
channel_ = []
channel_alias_ = []
fuzzy_match_result_ = []
fuzzy_match_name_ = []
fuzzy_match_code_ = []
warranty_ = []
state_ = []
channel_alias_list_ = all_channel_df['channel_alias'].unique().tolist()
for channel_alias in channel_alias_list_:
df = all_channel_df[all_channel_df['channel_alias'] == channel_alias]
product_zh_id_list = df['id'].tolist()
name_list = df['name'].tolist()
url_name_list = df['url_name'].tolist()
zi_brandname_list = df['zi_brandname'].tolist()
zi_brandcode_list = df['zi_brandcode'].tolist()
params_list = df['params'].tolist()
zi_brand_code_list_ = df['zi_brandcode'].apply(lambda x:x.replace('[','').replace(']','')).tolist()
zi_brand_code_list = []
for zi_brand_code in zi_brand_code_list_:
try:
zi_brand_code = int(zi_brand_code)
except:
zi_brand_code = str(zi_brand_code)
zi_brand_code_list.append(zi_brand_code)
model_list = df['model'].tolist()
category_list = df['category'].tolist()
zi_subcategoryname_list = df['zi_subcategoryname'].tolist()
zi_subcategorycode_list = df['zi_subcategorycode'].tolist()
url_list = [url_ if url_ else otherurl_ for url_,otherurl_ in zip(df['url'].tolist(),df['url'].tolist())]
channel_list = df['channel'].tolist()
channel_alias_list = df['channel_alias'].tolist()
price_list = df['price'].tolist()
#品牌+型号+类别模糊匹配
data = {
"params_info": {
"cust_category_list": category_list,
"brandId_list": zi_brandcode_list,
"model_list": model_list,
"cust_price_list": price_list
}
}
res = zgc_api("GetModelMatchResInfo",data)
res_name_list = res['res_name_list']
res_code_list = res['res_sku_list']
res_warranty_list = res['res_warranty_list']
res_state_list = res['res_state_list']
#实例化进度条
index_ = Index()
counter = 1
for _id,name,url_name,zi_brand_name,zi_brand_code,params,model,category,zi_subcategoryname,zi_subcategorycode,name_list,code_list,url,channel,channel_alias,price,warranty_list,state_list in zip(product_zh_id_list,name_list,url_name_list,zi_brandname_list,zi_brandcode_list,params_list,model_list,category_list,zi_subcategoryname_list,zi_subcategorycode_list,res_name_list,res_code_list,url_list,channel_list,channel_alias_list,price_list,res_warranty_list,res_state_list):
print(index_(counter, len(df)-1), end='%')
counter += 1
if len(name_list) ==1:
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '1',fuzzy_match_name = '{name_list[0]}',fuzzy_match_code = '{code_list[0]}' where id = '{_id}'")
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(1)
fuzzy_match_name_.append(name_list[0])
fuzzy_match_code_.append(code_list[0])
warranty_.append(warranty_list[0])
state_.append(state_list[0])
elif len(name_list)>1 and len(name_list)<=3:
for match_name,match_code,warranty,state in zip(name_list,code_list,warranty_list,state_list):
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(1)
fuzzy_match_name_.append(match_name)
fuzzy_match_code_.append(match_code)
warranty_.append(warranty)
state_.append(state)
name_list = str(name_list).replace("'","")
code_list = str(code_list).replace("'","")
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '1',fuzzy_match_name = '{name_list}',fuzzy_match_code = '{code_list}' where id = '{_id}'")
elif len(name_list) > 3:
name_list = name_list[:3]
code_list = code_list[:3]
for match_name,match_code,warranty,state in zip(name_list,code_list,warranty_list,state_list):
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(1)
fuzzy_match_name_.append(match_name)
fuzzy_match_code_.append(match_code)
warranty_.append(warranty)
state_.append(state)
name_list = str(name_list).replace("'","")
code_list = str(code_list).replace("'","")
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '1',fuzzy_match_name = '{name_list}',fuzzy_match_code = '{code_list}' where id = '{_id}'")
else:
product_zh_id_.append(_id)
name_.append(name)
url_name_.append(url_name)
zi_brandname_.append(zi_brand_name)
zi_brandcode_.append(zi_brand_code)
params_.append(params)
model_.append(model)
category_.append(category)
zi_subcategoryname_.append(zi_subcategoryname)
zi_subcategorycode_.append(zi_subcategorycode)
url_.append(url)
channel_.append(channel)
channel_alias_.append(channel_alias)
price_.append(price)
fuzzy_match_result_.append(0)
fuzzy_match_name_.append('')
fuzzy_match_code_.append('')
warranty_.append('')
state_.append('')
cursor_zi_service.execute(f"update product_all set fuzzy_match_result = '0' where id = '{_id}'")
print(channel_alias,"完成")
export_excel['id'] = product_zh_id_
export_excel['name'] = name_
export_excel['url_name'] = url_name_
export_excel['zi_brandname'] = zi_brandname_
export_excel['zi_brandcode'] = zi_brandcode_
export_excel['params'] = params_
export_excel['model'] = model_
export_excel['category'] = category_
export_excel['zi_subcategoryname'] = zi_subcategoryname_
export_excel['zi_subcategorycode'] = zi_subcategorycode_
export_excel['url'] = url_
export_excel['channel'] = channel_
export_excel['channel_alias'] = channel_alias_
export_excel['price'] = price_
export_excel['fuzzy_match_result'] = fuzzy_match_result_
export_excel['fuzzy_match_name'] = fuzzy_match_name_
export_excel['fuzzy_match_code'] = fuzzy_match_code_
export_excel['warranty'] = warranty_
export_excel['state'] = state_
export_excel['url'] = export_excel['url'].apply(lambda x:str(x).replace('[\'','').replace('\']',''))
export_excel['new_name'] = 'null'
export_excel['remark'] = 'null'
mssql.Close()
return export_excel
#校验重点类参数数据,若有问题,添加数据字典
def CheckPointParamsData(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#加载库中参数项数据
cursor_zi_new.execute("select * from vw_property")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params = db_params[db_params['identy'] != '0000']
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
return_dict = dict()
complicated_data_dict_id_list = list()
flag = True
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
except:
continue
cat_df['remark'] = cat_df['remark'].apply(lambda x:str(x))
#实例化进度条
index_ = Index()
counter = 1
id_list = list()
params_list = list()
for index,row in cat_df.iterrows():
try:
print(index_(counter, len(cat_df)-1), end='%')
counter += 1
except:
print(index_(counter, 1), end='%')
if row['remark'] != 'nan' and row['remark'] != 'None':
continue
id_ = row['id']
category = row['zi_subcategoryname']
id_list.append(id_)
temp_key_list = list()
temp_value_list = list()
for db_param in db_params[db_params['name'] == category]['subtitle'].tolist():
if db_param in ['产品型号','CPU属性']:
continue
temp_key_list.append(db_param)
temp_value_list.append(row[db_param])
params_list.append(dict(zip(temp_key_list,temp_value_list)))
print(f"{category}数据准备完成")
#获取标准参数值
data ={
"params_info": {
"category": category,
"id_list": id_list,
"params_dict_list": params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
res_value_dict = res['paramsValue_dict']
return_dict.update(res_value_dict)
#实例化进度条
index_ = Index()
counter = 1
for index,row in cat_df.iterrows():
try:
print(index_(counter, len(cat_df)-1), end='%')
counter += 1
except:
print(index_(counter, 1), end='%')
if row['remark'] != 'nan' and row['remark'] != 'None':
continue
id_ = str(row['id'])
category = row['zi_subcategoryname']
for db_param in db_params[db_params['name'] == category]['subtitle'].tolist():
if db_param in ['产品型号','CPU属性']:
continue
pri_value = row[db_param]
value,value_flag = tuple(res_value_dict[id_][db_param].items())[0]
if value_flag == False:
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{db_param}' and primitive = '{pri_value}'")
check_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
flag = False
cursor_zi_new.execute(f"insert into ShuJuZiDian_Cfg (categoryname,subtitle,primitive) values ('{category}','{db_param}','{pri_value}')")
cursor_zi_new.execute(f"select id from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{db_param}' and primitive = '{pri_value}'")
complicated_data_dict_id = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
complicated_data_dict_id_list.append(complicated_data_dict_id)
print(f"完成{category}参数校验")
if flag:
mssql.Close()
print("本批数据校验通过!,返回标准值字典")
return return_dict
else:
data_dict_path = path.split(".xlsx")[0] + '(数据字典补充).xlsx'
if len(complicated_data_dict_id_list) == 1:
_id = complicated_data_dict_id_list[0]
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where stdvalue is Null and id = {_id}")
else:
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg where stdvalue is Null and id in (%s)" % ','.join(['%s'] * len(complicated_data_dict_id_list)),tuple(complicated_data_dict_id_list))
fill_shujuzidian_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
fill_shujuzidian_df.to_excel(data_dict_path)
mssql.Close()
return {}
#上传数据字典补充数据
def ComplicatedDataDict(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
df = pd.read_excel(path)
for index,row in df.iterrows():
cursor_zi_new.execute(f"update ShuJuZiDian_Cfg set stdvalue = '{row['stdvalue']}',simplevalue = '{row['simplevalue']}' where id = {row['id']}")
print(index)
mssql.Close()
#记录标准参数项参数值
def save_standard_params_info(std_value_dict):
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for id_ in std_value_dict.keys():
print(index_(counter, len(std_value_dict)-1), end='%')
counter += 1
subtitle_list = []
value_list = []
for subtitle in std_value_dict[id_].keys():
if subtitle == 'CPU型号':
subtitle_list.append(subtitle)
cpu_model = list(std_value_dict[id_][subtitle].keys())[0]
value_list.append(cpu_model)
subtitle_list.append('CPU属性')
if '飞腾' in cpu_model or '龙芯' in cpu_model or '兆芯' in cpu_model:
pass
else:
cpu_model = cpu_model.split('-')[0]
value_list.append(cpu_model)
else:
subtitle_list.append(subtitle)
value_list.append(list(std_value_dict[id_][subtitle].keys())[0])
params_dict = str(dict(zip(subtitle_list,value_list))).replace("'","''")
cursor_zi_service.execute(f"update product_all set params_standard = '{params_dict}' where id = {id_}")
mssql_service.Close()
print("完成记录标准参数项参数值")
def transform_simplevalue(cursor_zi_new,shujuzidiandf,categoryname,subtitle,stdvalue):
stdvalue = stdvalue.strip()
simple_subtitle_list = ['CPU型号','显存容量','操作系统','双面器','双面输稿器','网络打印','标配外服务及配件','标配外耗材','镜头描述','碎纸效果']
if subtitle not in simple_subtitle_list:
return stdvalue
simplevalue_list = list(set(shujuzidiandf[(shujuzidiandf['categoryname'] == categoryname) & (shujuzidiandf['subtitle'] == subtitle) & (shujuzidiandf['stdvalue'] == stdvalue)]['simplevalue'].tolist()))
if len(simplevalue_list) == 0:
print(f"非法值,不存在数据字典中。{subtitle},{stdvalue}")
return False
elif len(simplevalue_list) == 1:
if simplevalue_list[0] == '无简称':
#add_shujuzidian(categoryname,subtitle,stdvalue)
return " "
else:
return simplevalue_list[0]
else:
print(f"异常数据,具有多个简称。{subtitle},{stdvalue}")
return False
def dael_name_content(skuname,brand):
#处理产品名称中 括号里的内容
skuname = skuname.replace(brand,'')
name_head = skuname.split('(')[0]
name_tail = skuname.split('(')[1].split(')')[1]
name_content = skuname.split('(')[1].split(')')[0].split('/')
if (len(list(set(name_content))) == 1 and name_content[0] == ' ') or (len(list(set(name_content))) == 2 and ' ' in name_content and '/' in name_content):
if name_tail == '':
res = brand + name_head
else:
res = brand + name_head + " " + name_tail
else:
temp_content = ''
for element in name_content:
if element == ' ':
continue
else:
temp_content += element + '/'
temp_content = "(" + temp_content.strip('/') + ")"
if name_tail == '':
res = brand + name_head + temp_content
else:
if temp_content == '':
res = brand + name_head + " " + name_tail
else:
res = brand + name_head + temp_content + name_tail
return res.strip()
def create_to_db(path,std_value_dict):
#创建链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#获取重点类命名规则
cursor_zi_new.execute(f"select * from skuname_named_rule")
named_rules_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取重点类属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取重点类SKU数据
cursor_zi_new.execute(f"select * from vw_sku_params where categoryname in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
sku_db_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data_dict = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取spu价格黑名单
cursor_zi_price.execute("select spuid from spu_calculate_price where zc_price = 0 and ds_price = 0 and st_price = 0")
hmd_spuid_list = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])['spuid'].to_list()
new_df_combine_list = list()
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
channel_list = cat_df['channel_alias'].unique().tolist()
except:
continue
for channel_alias in channel_list:
df = cat_df[cat_df['channel_alias'] == channel_alias]
cat_subtitle_df = subtitle_df[subtitle_df['name'] == category]#获取该类别的属性规格项数据
#cat_spu_db_df = spu_db_df[spu_db_df['categoryname'] == point_category]#获取该类别的所有SPU数据
cat_sku_db_df = sku_db_df[sku_db_df['categoryname'] == category]#获取该类别的所有SKU数据
cat_sku_db_df['skuname'] = cat_sku_db_df['skuname'].apply(lambda x :x.upper())
#获取这个类的数据字典
single_data_dict = data_dict[data_dict['categoryname'] == category]
single_data_dict['stdvalue'] = single_data_dict['stdvalue'].apply(lambda x : x.upper())
#该类别命名规则
named_rule = named_rules_df[named_rules_df['categoryname'] == category]['rule'].str.cat()
new_product_flag_list = []#新建产品标识符号 1:需要创建的产品 0:不需要创建的产品
computer_cat = ['笔记本','台式机','一体电脑']#整机类 需要进行匹配
#spu_id = [] #若只匹配到SPU,则记录spuid,方便后续建库使用
df['remark'] = df['remark'].apply(lambda x:str(x))
df['productcode'] = df['productcode'].apply(lambda x:str(x))
#产品命名
new_name_list = []
sku_list = list()
spuid_list = list()
for index,row in df.iterrows():
if (row['remark'] != 'nan' and row['remark'] != 'None') or (row['productcode'] != 'nan' and row['productcode'] != 'None'):
new_name_list.append(" ")
continue
id_ = str(row['id'])
skuname = ''
brandname = row['zi_brandname']
for element in named_rule.split(" "):
if element == '品牌名称':
value = brandname
elif element == '类别名称':
value = category
elif element[0] == "(" and "/" in element:
element = element.replace("(","").replace(")","")
detail_element_list = element.split("/")
for i in range(len(detail_element_list)):
#value = cat_sku_df[(cat_sku_df['sku'] == sku) & (cat_sku_df['name'] == detail_element_list[i])]['value'].tolist()[0]
#获取标准参数值
param_ = detail_element_list[i]
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,detail_element_list[i],value.upper())
#if value:
# pass
#else:
# flag = False
# continue
if i == 0:
skuname += "(" + value + "/"
elif i == len(detail_element_list)-1:
skuname += value + ") "
else:
skuname += value + "/"
continue
elif element[0] == "(" and "/" not in element:
element = element.replace("(","").replace(")","")
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += "(" + value + ") "
continue
else:
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += value + " "
#if flag == False:
# continue
skuname = skuname.strip()
#处理名称中的空格
if category in ['复印纸','扫描仪']:
skuname = skuname
elif category in ['笔记本','台式机','一体电脑','碎纸机']:
skuname = dael_name_content(skuname,brandname)
elif category in ['复印机','多功能一体机','单反相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
elif category in ['投影机','激光打印机','空调']:
if category == '空调':
skuname_part1 = skuname.split("空调 (")[0].strip()
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.replace("中央空调","").replace("空调扇","").split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " + category + skuname_2
elif category in ['单电/微单相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " +category + skuname_2
print(skuname)
new_name_list.append(skuname)
df['new_name'] = new_name_list
for index,row in df.iterrows():
if row['remark'] != 'nan' and row['remark'] != 'None':#驳回数据
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '2',new_name = '{row['new_name']}',remark = '{row['remark']}' where id = {row['id']}")
elif row['productcode'] != 'nan' and row['productcode'] != 'None':
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{row['productcode']}' where id = {row['id']}")
else:
newname = row['new_name']
#匹配数据(整机类进行匹配)
if cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()].empty:#未匹配上数据
new_product_flag_list.append('1')
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',new_name = '{newname}' where id = {row['id']}")
else:#匹配上数据
productcode = cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()]['sku'].tolist()[0]
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{productcode}',new_name = '{newname}',remark = Null,pic_state = '0' where id = {row['id']}")
new_product_flag_list.append('0')
print('完成名称匹配')
df['new_product_flag'] = new_product_flag_list
new_df = df[df['new_product_flag'] == '1']#需要新建的产品
if category in computer_cat:
cpu_attr_list = []
for cpu in new_df['CPU型号'].tolist():
if '飞腾' in cpu or '龙芯' in cpu or '兆芯' in cpu:
pass
else:
cpu = cpu.split('-')[0]
cpu_attr_list.append(cpu)
new_df['CPU属性'] = cpu_attr_list
else:
continue
#新建产品
print(f"开始新建产品,共计:'{len(new_df)}'件产品")
index_ = 0
delete_index_list = list()
for index,row in new_df.iterrows():
id_ = str(row['id'])
brand = row['zi_brandname'].strip()
brandcode = row['zi_brandcode'] # 建库需要的品牌编码
#创建SPU 获取SPUid
name = str(brand)
for spu_param in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].sort_values("Expr1")['subtitle'].tolist():
if spu_param == '产品品牌':
continue
else:
name += " "
name += str(row[spu_param])
name = name + " " +str(row['zi_subcategoryname'])#spu名称
spu = str(uuid.uuid1()).replace('-','')#spu编码
category_code = int(str(row['zi_subcategorycode']).replace("?",""))#类别编码
brand_code = brandcode#品牌编码
cursor_zi_new.execute(f"select id from p_spu where categoryid = {category_code} and brandid = {brand_code} and spuname = '{name}'")
data = cursor_zi_new.fetchall()
spu_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False #是否处理参数
if spu_df.empty:
#cursor_zi_new.execute(f"insert into p_spu (spuname,spu,categoryid,brandid) values ('{name}','{spu}',{category_code},{brand_code})")
#param_deal_flag = True
delete_index_list.append(index)
continue
else:
spuid = spu_df['id'].tolist()[0]
if spuid in hmd_spuid_list:
delete_index_list.append(index)
continue
spuid_list.append(spuid)
#cursor_zi_service.execute(f"update product_all_original_list set spuid = {spuid} where id = {row['id']}")
if param_deal_flag:
#创建SPU属性
for index,spu_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].iterrows():
subtitle = spu_param_row['subtitle'].strip()
subtitleid = spu_param_row['subtitleid']
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
cursor_zi_new.execute(f"select a.valueid from p_valuemap a \
left join p_value b \
on a.valueid = b.id \
left join p_subtitle c \
on b.subtitleid = c.id \
where a.spuid = {spuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_valuemap where spuid = {spuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_value where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
#创建SKU
skuname = str(row['new_name']).strip()
sku = datetime.datetime.now().strftime("%Y-%m-%d%H:%M:%S.%f").replace('-','').replace(':','').replace('.','')[:-3]
time.sleep(0.001)
#spuid = int(row['spuid'])
state = 1
source = '1'
createddate = datetime.datetime.now().strftime("%Y-%m-%d %X")
cursor_zi_new.execute(f"select sku from p_sku where skuname = '{skuname}' and spuid = {spuid}")
data = cursor_zi_new.fetchall()
sku_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False#是否处理参数
if sku_df.empty:
cursor_zi_new.execute(f"insert into p_sku (skuname,sku,spuid,state,source,createddate) values ('{skuname}','{sku}',{spuid},{state},'{source}','{createddate}')")
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku}',new_name = '{row['new_name']}',remark = Null,pic_state = '1' where id = {row['id']}")
param_deal_flag = True
else:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku_df['sku'].tolist()[0]}',new_name = '{row['new_name']}',remark = Null,pic_state = '0' where id = {row['id']}")
sku = sku_df['sku'].tolist()[0]
cursor_zi_new.execute(f"select id from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
skuid = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0] #skuid
sku_list.append(sku)
if param_deal_flag:
#录入产品详细参数
for index,sku_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'sku'].iterrows():
subtitle = sku_param_row['subtitle'].strip()
subtitleid = sku_param_row['subtitleid']
if subtitle == '产品品牌':
continue
try:
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
except:
continue
cursor_zi_new.execute(f"select a.valueid from p_skuvaluemap a \
left join p_skuvalue b \
on a.valueid = b.id \
left join p_skusubtitle c \
on b.subtitleid = c.id \
where a.skuid = {skuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_skuvalue where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
index_+=1
print(f"已处理完'{index_}'件产品")
print(f"完成{channel_alias}{category}数据建库")
#计算价格
new_df = new_df.drop(index= delete_index_list)
new_df['sku'] = sku_list
new_df['spuid'] = spuid_list
export_df = new_df[['id','sku','spuid','zi_subcategoryname','zi_brandname','new_name','channel_alias']]
new_df_combine_list.append(export_df)
new_df_combine = pd.concat(new_df_combine_list)
mssql_new.Close()
mssql_price.Close()
mssql_service.Close()
return new_df_combine
def check_configure(new_df_all):
category_list = new_df_all['zi_subcategoryname'].unique().tolist()
for category in category_list:
new_df = new_df_all[new_df_all['zi_subcategoryname'] == category]
sku_list = new_df['sku'].unique().tolist()
#录入新增产品配件差异价格
check_configure_price(category,sku_list)
print("检查完毕")
def calculate_spu_price(new_df_all):
mssql= MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql._cur
category_list = new_df_all['zi_subcategoryname'].unique().tolist()
for category in category_list:
new_df = new_df_all[new_df_all['zi_subcategoryname'] == category]
sku_list = new_df['sku'].unique().tolist()
#录入新增产品配件差异价格
create_price_difference_single(category,sku_list)
#读取配件差价表和SPU价格表
cursor_zi_price.execute("select spuid,zc_price,ds_price,st_price from spu_calculate_price")
spu_price_df = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])
cursor_zi_price.execute("select sku,price_difference from configure_price_difference")
price_difference_df = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])
new_df['zc_price'] = [0 if spu_price_df[spu_price_df['spuid'] == spuid]['zc_price'].tolist()[0] == 0 else spu_price_df[spu_price_df['spuid'] == spuid]['zc_price'].tolist()[0] + float(price_difference_df[price_difference_df['sku'] == sku]['price_difference'].tolist()[0]) for spuid,sku in zip(new_df['spuid'].tolist(),new_df['sku'].tolist())]
new_df['ds_price'] = [0 if spu_price_df[spu_price_df['spuid'] == spuid]['ds_price'].tolist()[0] == 0 else spu_price_df[spu_price_df['spuid'] == spuid]['ds_price'].tolist()[0] + float(price_difference_df[price_difference_df['sku'] == sku]['price_difference'].tolist()[0]) for spuid,sku in zip(new_df['spuid'].tolist(),new_df['sku'].tolist())]
new_df['st_price'] = [0 if spu_price_df[spu_price_df['spuid'] == spuid]['st_price'].tolist()[0] == 0 else spu_price_df[spu_price_df['spuid'] == spuid]['st_price'].tolist()[0] + float(price_difference_df[price_difference_df['sku'] == sku]['price_difference'].tolist()[0]) for spuid,sku in zip(new_df['spuid'].tolist(),new_df['sku'].tolist())]
new_df['zc_price_weight'] = new_df['zc_price'].apply(lambda x: 0 if x == 0 else 0.2)
new_df['ds_price_weight'] = new_df['ds_price'].apply(lambda x: 0 if x == 0 else 0.1)
new_df['st_price_weight'] = new_df['st_price'].apply(lambda x: 0 if x == 0 else 0.7)
new_df['weight'] = new_df['zc_price_weight']+ new_df['ds_price_weight'] + new_df['st_price_weight']
new_df['cal_price'] = (new_df['st_price']*0.7 + new_df['zc_price']*0.2 + new_df['ds_price']*0.1)/new_df['weight']
new_df['cal_price'] = new_df['cal_price'].fillna("SPU下无可用价格")
createddate = datetime.datetime.now().strftime("%Y-%m-%d %X")
#价格存入临时表
insert_list = list()
for index,row in new_df.iterrows():
insert_list.append((row['spuid'],row['sku'],row['new_name'],row['zi_brandname'],category,row['zc_price'],row['ds_price'],row['st_price'],str(row['cal_price']),createddate))
sql = "insert into sku_calculate_price (spuid,sku,skuname,brand,category,zc_price,ds_price,st_price,sku_price,create_time) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
cursor_zi_price.executemany(sql,insert_list)
print(f"完成{category}价格还原")
mssql.Close()
def update_non_point_to_db(path,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
df_match = pd.read_excel(path,sheet_name = '匹配到的数据',converters = {'fuzzy_match_code':str,'zi_subcategorycode':str})
df_match['remark'] = df_match['remark'].apply(lambda x : str(x))
df_match['fuzzy_match_code'] = df_match['fuzzy_match_code'].apply(lambda x : str(x))
del df_match['Unnamed: 0']
del df_match['warranty']
del df_match['state']
df_match = df_match.drop_duplicates()
channel_list = df_match['channel_alias'].unique().tolist()
for channel_alias in channel_list:
df_match_channel = df_match[df_match['channel_alias'] == channel_alias]
#实例化进度条
index_ = Index()
counter = 1
for _id,remark,matchcode,brand,brandcode,newname in zip(df_match_channel['id'].tolist(),df_match_channel['remark'].tolist(),df_match_channel['fuzzy_match_code'].tolist(),df_match_channel['zi_brandname'].tolist(),df_match_channel['zi_brandcode'].tolist(),df_match_channel['new_name'].tolist()):
try:
print(index_(counter, len(df_match_channel)-1), end='%')
counter += 1
except:
print(index_(counter,1), end='%')
if remark == 'nan':
if matchcode == 'nan':
newname = str(newname).replace("'","''")
brand = str(brand).replace("'","''")
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',productcode = Null,zi_brandname = '{brand}',zi_brandcode = '{brandcode}',new_name = '{newname}' where id = '{_id}' and productcode is Null")
else:
matchcode = str(matchcode).strip()
newname = str(newname).replace("'","''")
cursor_zi_service.execute(f"update product_all set state = '9',remark = Null,productcode = '{matchcode}',new_name = '{newname}',pic_state = '0' where id = '{_id}'")
else:
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{remark}' where id = '{_id}'")
print(f"完成{channel_alias}匹配数据更新")
if mode == 'match_data':
mssql.Close()
return "完成数据更新"
df_unmatch = pd.read_excel(path,sheet_name = '未匹配数据处理',converters = {'fuzzy_match_code':str,'zi_subcategorycode':str})
df_unmatch['remark'] = df_unmatch['remark'].apply(lambda x : str(x))
df_unmatch['fuzzy_match_code'] = df_unmatch['fuzzy_match_code'].apply(lambda x : str(x))
channel_list = df_unmatch['channel_alias'].unique().tolist()
for channel_alias in channel_list:
df_unmatch_channel = df_unmatch[df_unmatch['channel_alias'] == channel_alias]
#实例化进度条
index_ = Index()
counter = 1
for _id,remark,matchcode,brand,brandcode,newname in zip(df_unmatch_channel['id'].tolist(),df_unmatch_channel['remark'].tolist(),df_unmatch_channel['fuzzy_match_code'].tolist(),df_unmatch_channel['zi_brandname'].tolist(),df_unmatch_channel['zi_brandcode'].tolist(),df_unmatch_channel['new_name'].tolist()):
try:
print(index_(counter, len(df_unmatch_channel)-1), end='%')
counter += 1
except:
print(index_(counter,1), end='%')
if remark == 'nan':
if matchcode == 'nan':
newname = str(newname).replace("'","''")
brand = str(brand).replace("'","''")
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',productcode = Null,zi_brandname = '{brand}',zi_brandcode = '{brandcode}',new_name = '{newname}' where id = '{_id}'")
else:
matchcode = str(matchcode).strip()
cursor_zi_service.execute(f"update product_all set state = '9',remark = Null,productcode = '{matchcode}',pic_state = '0' where id = '{_id}'")
else:
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{remark}' where id = '{_id}'")
print(f"完成{channel_alias}未匹配数据更新")
mssql.Close()
def get_data_all():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and (cal_price is Null or new_name is Null)")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
#df_point = df[df['point_category_flag'] == '1']
#df_unpoint = df[df['point_category_flag'] == '0']
mssql.Close()
return df
def get_price(df):
#创建链接
mssql_index = MSSQL('123.56.115.207','zdindex')
cursor_zdindex = mssql_index._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = str(row['productcode']).strip()
#cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods ='20200824' and goods_id = '{sku}'")
cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods in (select top 1 max(periods) from zd_entry_goods_price) and goods_id = '{sku}'")
data = cursor_zdindex.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zdindex.description])
if check_df.empty:
cursor_zi_price.execute(f"select sku_price from sku_calculate_price where sku = '{sku}'")
data = cursor_zi_price.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_price.description])
if check_df.empty:
cal_price = "无法获取价格,请核查"
else:
cal_price = check_df['sku_price'].tolist()[0]
else:
cal_price = check_df['index_price_wave'].tolist()[0]
cursor_zi_service.execute(f"update product_all set cal_price = '{cal_price}',productcode='{sku}' where id = {row['id']}")
mssql_index.Close()
mssql_price.Close()
mssql_service.Close()
def get_name(df):
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = row['productcode']
cursor_zi_new.execute(f"select skuname from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
try:
name = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['skuname'].tolist()[0]
except:
name = "无法获取产品名称"
cursor_zi_service.execute(f"update product_all set new_name = '{name}',remark = Null where id = {row['id']}")
mssql_new.Close()
mssql_service.Close()
def save_sku_relationship(filter,channel_alias):
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and channel_alias = '{channel_alias}' and batch = '{filter}'")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
cust_sku_list = df['sku'].tolist()
sku_list = df['productcode'].tolist()
channel_alias_list = df['channel_alias'].tolist()
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channel_alias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
print(res)
mssql.Close()
path = '/Users/rico/project/客户数据处理(调接口)/原始客户数据/成都CD/2020-10-21/客户数据整理模板-成都电子商城.xlsx'
channel_alias = 'AH'
#初始化数据
InitializeData(path,channel_alias)
#加载数据
batch = '2020-10-19'
df = LoadData(batch,channel_alias,'deal')
#SKU排重
SkuMatch(df,channel_alias)
#SKU排重结果导出
SkuMatchResult(batch,channel_alias)
#SKU排重确认结果导入
path = ''
UpdateSkuMatchResult(path)
#加载数据
df = LoadData(batch,channel_alias,'deal')
#分析获取数据基本信息(品牌,类别,型号)
AnalyseBasicInfo(df)
#导出数据(导出文件至当前文件执行路径)
ExportToExcelBasicConfirm(batch,channel_alias)
#返回数据并更新
path = "/Users/rico/project/客户数据处理(调接口)/原始客户数据/成都CD/2020-10-21/汇总-CD基础信息确认2020-10-21(1).xlsx"
UpdateBasicData(path)
#获取重点类确认数据(导出文件至当前文件执行路径)
df = GetPointCategoryData(batch,channel_alias)
GetPointCategoryDataDetail(df,channel_alias)
#获取非重点类确认数据(导出文件至当前文件执行路径)
df = GetNonpointCategoryData(batch,channel_alias)
match_result = GetNonpointCategoryDataDetail(df)
writer = pd.ExcelWriter(f"{channel_alias}非重点类匹配结果确认及命名.xlsx")
export_excel_1 = match_result[match_result['fuzzy_match_result'] == 1]
export_excel_1.to_excel(writer,'匹配到的数据')
export_excel_2 = match_result[match_result['fuzzy_match_result'] == 0]
export_excel_2.to_excel(writer,'未匹配数据处理')
writer.save()
#处理重点类数据
path = "/Users/rico/Downloads/反馈-HB重点类参数确认-1112.xlsx"
##校验参数数据
std_value_dict = CheckPointParamsData(path)
##导入补充的数据字典
data_dict_path = "/Users/rico/project/客户数据处理(调接口)/原始客户数据/湖北HB/2020-10-19/反馈-HB重点类参数确认(数据字典补充)(1)(2)(1).xlsx"
ComplicatedDataDict(data_dict_path)
##入库(std_value_dict为空时不可往下执行)
new_df_all = create_to_db(path,std_value_dict)
##记录标准参数项参数值
save_standard_params_info(std_value_dict)
##检查配件价格
check_configure(new_df_all)
##计算还原价格
calculate_spu_price(new_df_all)
#处理非重点类数据
##更新非重点类数据
path = "/Users/rico/Downloads/000001hn非重点类匹配结果确认及命名-汇总(1).xlsx"
mode = 'all_data'
update_non_point_to_db(path,mode)
'''
mode = 'match_data'
update_non_point_to_db(path,mode)
df = pd.read_excel(path,sheet_name = '未匹配数据处理')
match_second_result = GetNonpointCategoryDataDetail(df)
writer = pd.ExcelWriter(f"{channel_alias}非重点类匹配结果确认及命名(二次匹配).xlsx")
export_excel_1 = match_second_result[match_second_result['fuzzy_match_result'] == 1]
export_excel_1.to_excel(writer,'匹配到的数据')
export_excel_2 = match_second_result[match_second_result['fuzzy_match_result'] == 0]
export_excel_2.to_excel(writer,'未匹配数据处理')
writer.save()
path = "/Users/rico/project/客户数据处理(调接口)/原始客户数据/安徽AH/20201018/反馈-AH非重点类匹配结果确认及命名(二次匹配)(1).xlsx"
mode = 'all_data'
update_non_point_to_db(path,mode)
'''
#存储对应关系(重点类和非重点类都处理完毕后再执行)
save_sku_relationship(batch,channel_alias)
#填充数据
df = get_data_all()
get_price(df)
get_name(df)
#导出处理结果
df = LoadData(batch,channel_alias,'result')
df.to_excel(f"{channel_alias}-{batch}结果数据.xlsx")
'''
path = '/Users/rico/Downloads/ZSYH-2020-09-29结果数据.xlsx'
def update_non_point_to_db(path,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
df_match = pd.read_excel(path,sheet_name = 'Sheet1',converters = {'productcode':str,'zi_subcategorycode':str})
df_match['remark'] = df_match['remark'].apply(lambda x : str(x))
channel_alias = 'ZSYH'
df_match_channel = df_match[df_match['channel_alias'] == channel_alias]
#实例化进度条
index_ = Index()
counter = 1
for _id,remark,matchcode in zip(df_match_channel['id'].tolist(),df_match_channel['remark'].tolist(),df_match_channel['productcode'].tolist()):
print(index_(counter, len(df_match_channel)-1), end='%')
counter += 1
if remark == 'nan':
if matchcode == 'nan':
pass
else:
matchcode = str(matchcode).strip()
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{matchcode}',pic_state = '0',remark = Null where id = '{_id}'")
else:
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{remark}' where id = '{_id}'")
print(f"完成{channel_alias}匹配数据更新")
'''
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 18 11:11:31 2020
@author: rico
"""
from db import MSSQL
from API import zgc_api
import math
import pandas as pd
import numpy as np
from public_function import *
df = pd.read_excel("/Users/rico/project/中信银行/5000条/中信5000测试数据.xlsx",converters = {'供应商SKU':str})
supplier_list = df['供应商名称'].unique().tolist()
engine = 'SQL'
df_list = []
for supplier in supplier_list:
sup_df = df[df['供应商名称'] == supplier]
sku_list = sup_df['供应商SKU'].tolist()
#获取SKU匹配结果
data = {
"params_info": {
"channelAlias": f"{supplier}",
"cust_sku_list": sku_list,
"engine":f"{engine}"
}
}
res = zgc_api("GetSKUMatchResInfo",data)
if res['code'] == 0:
res_list = [False for i in range(len(sup_df))]
else:
res_list = [res['res_dict'][sku] for sku in sku_list]
sup_df['productcode'] = res_list
df_list.append(sup_df)
result_df = pd.concat(df_list)
result_df.to_excel("中信银行5000条.xlsx")
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 16 14:27:52 2020
@author: rico
"""
import pandas as pd
from API import zgc_api
import pymssql
from public_function import *
df = pd.read_excel("/Users/rico/Downloads/京东重复链接(修正后链接).xls",sheet_name = "保留",converters = {'productcode':str})
sku_list = df['productcode'].tolist()
source_name_list = df['productname'].tolist()
price_list = df['参考价'].tolist()
url_list = df['goods_url'].tolist()
channelId_list = ['DS-JD' for i in range(len(df))]
#价格关系入库
data = {
"params_info": {
"sku_list": sku_list,
"url_name_list": source_name_list,
"url_price_list":price_list,
"url_list": url_list,
"channelId_list": channelId_list
}
}
res = zgc_api("Stock-InPriceInfo",data)
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'zdindex',autocommit=True)
cursor = conn.cursor()
cursor.execute(f"select id,goods_id from zd_electricity_price where mall_id ='DS-JD' and (periods = '20200917' or periods = '20200921') and goods_url is Null")
data = cursor.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
id_ = row['id']
code = row['goods_id']
cursor.execute(f"select goods_url from zd_electricity_price where goods_id = '{code}' and periods = '20200914' and mall_id = 'DS-JD'")
try:
url = cursor.fetchone()[0]
except:
continue
cursor.execute(f"update zd_electricity_price set goods_url = '{url}' where id = {id_}")
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 14 10:11:38 2020
@author: rico
"""
import pymssql
import pandas as pd
from API import zgc_api
df = pd.read_excel("/Users/rico/project/路桥项目/JD数据/反馈-JD未匹配数据处理基本信息确认0909.xlsx",sheet_name = '建库')
conn_zi_lq = pymssql.connect(host='123.57.45.119', user='zgcprice', password='zgcprice20200708',database= f"SXLQ_JD",autocommit=True)
cursor_zi_lq = conn_zi_lq.cursor()
#新建品牌
newbrand_df = df[df['zi_brandcode'] == "空"][['id','zi_brandname']]
newbrand_list = newbrand_df['zi_brandname'].apply(lambda x : x.replace("新建品牌:","").strip()).unique().tolist()
data = {
"params_info": {
"brand_list": newbrand_list
}
}
res = zgc_api("Stock-InbrandInfo",data)
brand_dict = res['res_dict']
#更新product_all表
for index,row in newbrand_df.iterrows():
_id = row['id']
zi_brand_dict = brand_dict[row['zi_brandname'].replace("新建品牌:","").strip()]
zi_brand = [key for key in zi_brand_dict][0]
zi_brandcode = zi_brand_dict[zi_brand]
cursor_zi_lq.execute(f"update product_all set zi_brandname = '{zi_brand}',zi_brandcode = {zi_brandcode} where id = {_id}")
#入库
id_list = df['id'].tolist()
cust_sku_list = df['sku'].tolist()
channelAlias_list = df['channel_alias'].apply(lambda x:x.replace("SXLQ_","")).tolist()
source_name_list = df['name'].tolist()
price_list = df['price'].apply(lambda x:str(x)).tolist()
url_list = df['url'].tolist()
channelId_list = ["DS-JD" for i in range(len(df))]
brand_list = df['zi_brandname'].tolist()
brandId_list = df['zi_brandcode'].apply(lambda x:str(x)).tolist()
category_list = df['zi_subcategoryname'].tolist()
categoryId_list = df['zi_subcategorycode'].apply(lambda x:str(x)).tolist()
params_list = df['params'].apply(lambda x: eval(x)).tolist()
name_list = df['productname'].apply(lambda x: x.replace("'","''")).tolist()
'''
for params,category in zip(params_list,category_list):
cust_paramsAttr_list = [key for key in params]
data = {
"params_info": {
"category": category,
"cust_paramsAttr_list": cust_paramsAttr_list
}
}
res = zgc_api("GetZGCParamsInfo",data)
res['paramsAttr_dict']
'''
data = {
"params_info": {
"brand_list": brand_list,
"brandId_list": brandId_list,
"category_list": category_list,
"categoryId_list": categoryId_list,
"params_list": params_list,
"name_list": name_list
}
}
res = zgc_api("Stock-InProductInfo",data)
sku_list = res['sku_list']
#更新product_all表
for _id,sku in zip(id_list,sku_list):
cursor_zi_lq.execute(f"update product_all set productcode = '{sku}',remark = Null,state = '9' where id = {_id}")
#价格关系入库
data = {
"params_info": {
"sku_list": sku_list,
"url_name_list": source_name_list,
"url_price_list":price_list,
"url_list": url_list,
"channelId_list": channelId_list
}
}
res = zgc_api("Stock-InPriceInfo",data)
#对应关系入库
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channelAlias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 11:07:18 2020
@author: rico
"""
import requests
def zgc_api(func,data):
headers = {
'Connection': 'Keep-Alive'
}
key = 'eRo1#ZFHY5N&GEzV'
api = f"http://59.110.219.171:8000/{func}/"
print(api)
data.update({'key':key})
session = requests.session()
result = session.post(api,json=data,headers=headers,timeout=14400).json()
return result
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 7 23:08:30 2020
@author: rico
"""
import pymssql
class MSSQL:
def __init__(self,host,db):
self.host = host
self.db = db
self._conn = self.GetConnect()
if(self._conn):
self._cur = self._conn.cursor()
#连接数据库
def GetConnect(self):
conn = False
if self.host == '123.57.45.119':
user = 'zgcprice'
pwd = 'zgcprice20200708'
elif self.host == '123.56.115.207':
user = 'zgcindex'
pwd = 'jiayou202006'
elif self.host == '10.0.120.131':
user = 'sa'
pwd = '1qaz@WSX'
try:
conn = pymssql.connect(
host=self.host,
user=user,
password=pwd,
database =self.db,
autocommit=True
)
except Exception as err:
print("连接数据库失败, %s" % err)
else:
return conn
#获取连接信息
def GetConnectInfo(self):
print( "连接信息:" )
print( "服务器:%s , 用户名:%s , 数据库:%s " % (self.host,self.user,self.db))
def Close(self):
self._cur.close()
self._conn.close()
'''
ms = MSSQL('123.56.115.207','zdindex')
conn = ms._conn
cursor = ms._cur
cursor.execute(f"select top 10 * from zd_week_price")
cursor.fetchall()
ms.Close()
cursor.close()
conn.close()
'''
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 09:59:23 2020
@author: rico
"""
from API import zgc_api
from db import MSSQL
import pandas as pd
import time
from public_function import *
import os
import base64
import paramiko
import uuid
import datetime
#初始化数据
def InitializeData(path):
#加载上传数据
df_product = pd.read_excel(path,sheet_name = '产品表模板')
df_product = df_product.fillna('无')
if df_product['操作人'].unique().tolist()[0] == '无':
print("请填写操作人信息,eg:CL-MBJK")
return False
df_price = pd.read_excel(path,sheet_name = '价格表模板')
df_price = df_price.fillna('无')
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#价格渠道字典
cursor_zi_new.execute(f"select channel_alias_cn,channel_alias_code from zdindex_channel_rel")
price_source = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
price_source_dict = dict(zip(price_source['channel_alias_cn'].tolist(),price_source['channel_alias_code'].tolist()))
#实例化进度条
index_ = Index()
counter = 1
#上传产品数据
insert_data = []
for index,row in df_product.iterrows():
try:
print(index_(counter, len(df_product)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['序号']
sku = row['供应商SKU']
channel_alias = row['供应商简称']
category = row['指数类别']
brand = row['指数品牌']
name = row['指数名称']
model = row['型号']
channel = row['操作人']
batch = time.strftime("%Y-%m-%d",time.localtime())
url = str(df_price[df_price['序号'] == id_]['链接'].tolist())
#转换链接来源
url_source = []
for url_source_element in df_price[df_price['序号'] == id_]['来源(简称)'].tolist():
try:
url_source.append(price_source_dict[url_source_element])
except:
print(f"{url_source_element}无价格系统渠道对应")
return False
url_source = str(url_source)
url_price = str(df_price[df_price['序号'] == id_]['电子商城价'].tolist())
insert_data.append((sku,name,brand,category,model,url_price,url,url_source,url_price,channel,channel_alias,channel_alias,batch))
cursor_zi_service.executemany(f"insert into product_all (sku,name,brand,category,model,price,url,url_source,url_price,channel,channel_id,channel_alias,batch) \
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",insert_data)
mssql.Close()
mssql_new.Close()
#加载数据
def LoadData(batch,channel_alias,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
if batch == "all":
if mode == 'deal':
cursor_zi_service.execute("select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute("select * from product_all")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
if mode == 'deal':
cursor_zi_service.execute(f"select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
elif mode == 'collectDataConfirm':
cursor_zi_service.execute(f"select id,category,brand,sku,channel_alias,name,params,url,url_source,url_price,new_name,params_standard,zi_subcategoryname,zi_subcategorycode,zi_brandname,zi_brandcode,channel_alias,state from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
elif mode == 'dealPicInfo':
cursor_zi_service.execute(f"select id,sku,productcode,name,batch,url,url_pic from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute(f"select * from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df
#获取其他建库信息(品牌编码,类别编码,产品名称)
def GetOtherProductInfo(df):
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#实例化进度条
index_ = Index()
counter = 1
print("开始获取其他建库信息(品牌编码,类别编码,产品名称)")
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['id']
new_name = row['name']
zi_brandname = row['brand']
zi_subcategoryname = row['category']
cursor_zi_new.execute(f"select id from p_category where id not in (select distinct pid from p_category) and name = '{zi_subcategoryname}'")
zi_subcategorycode = cursor_zi_new.fetchone()[0]
cursor_zi_new.execute(f"select id from p_brand where id not in (select distinct pid from p_brand) and name = '{zi_brandname}'")
zi_brandcode = cursor_zi_new.fetchone()[0]
cursor_zi_service.execute(f"update product_all set new_name = '{new_name}',zi_brandname = '{zi_brandname}',zi_brandcode = 'zi_brandcode{}', \
zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}' where id = {id_}")
mssql_new.Close()
mssql.Close()
#分析获取数据基本信息
def AnalyseBasicInfo(df):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#初始化参数
name_list = df['name'].tolist()
url_name_list = df['name'].tolist()
brand_list = df['brand'].tolist()
url_brand_list = df['brand'].tolist()
category_list = df['category'].tolist()
params_list = df['params'].tolist()
channelAlias_list = df['channel_alias'].tolist()
#获取预测类别结果
data = {
"category_info": {
"cust_name_list": name_list,
"url_name_list":url_name_list
}
}
res = zgc_api("GetPredictCategory",data)
df['predict_cat'] = res['category_list']
df['predict_cat_code'] = res['categoryCode_list']
#获取指数类别信息
data = {
"category_info": {
"cust_category_list": category_list,
"pre_category_list":df['predict_cat'].tolist()
}
}
res = zgc_api("GetZGCCategoryInfo",data)
df['zi_category'] = res['category_list']
df['zi_category_code'] = res['categoryCode_list']
#获取指数品牌信息
data = {
"brand_info": {
"cust_brand_list":brand_list,
"url_brand_list":url_brand_list,
"cust_name_list":name_list,
"url_name_list": url_name_list
}
}
res = zgc_api("GetZGCBrandInfo",data)
df['zi_brandname'] = res['brand_list']
df['zi_brandcode'] = res['brandId_list']
#获取型号信息
try:
params_list_ = [eval(params) for params in params_list]
except:
params_list_ = [{} for params in params_list]
_params_list_ = list()
for params in params_list_:
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
_params_list_.append({'认证型号':param_model})
elif '产品型号' in params.keys():
param_model = params['产品型号']
_params_list_.append({'产品型号':param_model})
elif '型号' in params:
param_model = params['型号']
_params_list_.append({'型号':param_model})
else:
_params_list_.append({})
else:
_params_list_.append({})
params_list_ = [str(params) for params in _params_list_]
data = {
"model_info": {
"channelAlias_list": channelAlias_list,
"cust_category_list":category_list,
"cust_name_list": name_list,
"url_name_list": url_name_list,
"cust_brand_list": brand_list,
"cust_params_list": params_list_
}
}
res = zgc_api("GetModelInfo",data)
df['model'] = res['model_list']
df['model_flag'] = res['modelFlag_list']
insert_data =list()
#实例化进度条
index_ = Index()
counter = 1
#更新基础信息
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
predict_category = row['predict_cat']
predict_category_code = row['predict_cat_code']
zi_subcategoryname = row['zi_category']
zi_subcategorycode = row['zi_category_code']
predict_result = 1 if predict_category == zi_subcategoryname else 0
zi_brandname = row['zi_brandname']
zi_brandcode = row['zi_brandcode']
#获取品牌对比结果
cust_name = str(row['name']).upper()
cust_brand = str(row['brand']).upper()
cn_name = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', zi_brandname))
en_name = (''.join(re.findall(r'[a-zA-Z0-9-]', zi_brandname))).upper()
if cn_name == '':
cn_name = '无数据'
if en_name == '':
en_name = '无数据'
if '新建品牌' in zi_brandname:
brand_match_result = '2'
elif (cn_name in cust_brand and cn_name in cust_name) or (en_name in cust_brand and en_name in cust_name):
brand_match_result = '1'
else:
brand_match_result = '0'
model = row['model']
modelFlag = row['model_flag']
_id = row['id']
insert_data.append((predict_category,predict_category_code,zi_subcategoryname,zi_subcategorycode,predict_result,zi_brandname,zi_brandcode,model,modelFlag,brand_match_result,_id))
cursor_zi_service.executemany("update product_all set predict_category = (%s) ,predict_category_code = (%s) ,zi_subcategoryname = (%s) ,zi_subcategorycode = (%s) ,predict_result = (%s) ,zi_brandname = (%s) ,zi_brandcode = (%s) ,model = (%s),model_flag = (%s),brand_match_result = (%s) where id = (%d)",insert_data)
mssql.Close()
#导出基础信息至excel
def ExportToExcelBasicConfirm(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select id,sku,name,brand,zi_brandname,zi_brandcode,category,zi_subcategoryname,zi_subcategorycode,model,model_flag,params,price,url,channel,productcode,remark from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
df.to_excel(f"{channel_alias}基础信息确认{batch}.xlsx")
mssql.Close()
#更新基础信息确认后数据
def UpdateBasicData(path):
#读取Excel数据
df = pd.read_excel(path,sheet_name = "Sheet1",converters={'zi_brandcode':str,'zi_subcategorycode':str})
df['zi_brandcode'] = df['zi_brandcode'].apply(lambda x:str(x))
df['zi_subcategorycode'] = df['zi_subcategorycode'].apply(lambda x:str(x))
df['remark'] = df['remark'].apply(lambda x:str(x))
#创建数据库链接
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#获取重点类列表
cursor_zi_new.execute("select category_name from important_category")
important_cat_list = pd.DataFrame((cursor_zi_new.fetchall()),columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#实例化进度条
index_ = Index()
counter = 1
#更新数据
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['id']
remark = row['remark']
if remark != 'nan':#驳回数据
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{row['remark']}' where id = {id_}")
continue
zi_brandname = row['zi_brandname'].strip().replace("'","''").replace("[","").replace("]","")
zi_brandcode = row['zi_brandcode'].strip().replace("?","").replace("[","").replace("]","")
zi_subcategoryname = row['zi_subcategoryname'].strip()
zi_subcategorycode = row['zi_subcategorycode'].strip().replace("?","")
model = str(row['model']).strip()
if zi_brandcode == 'nan':
cursor_zi_new.execute(f"select id from p_brand where name = '{zi_brandname}'")
zi_brandcode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategorycode == 'nan' and zi_subcategoryname != '该类别非中电类别':
cursor_zi_new.execute(f"select id from p_category where name = '{zi_subcategoryname}'")
zi_subcategorycode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategoryname in important_cat_list:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '1',model = '{model}' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '0',model = '{model}' where id = {id_}")
mssql_service.Close()
mssql_new.Close()
#获取重点类产品数据
def GetCollectData(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where (state is Null or state = '8') and batch = '{batch}' and channel_alias = '{channel_alias}'")
data = cursor_zi_service.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df_db
#爬去链接参数及图片 (暂时只支持一个产品对应一个链接,多链接取第一个链接信息)
def GetParamsinfoAndPic(df):
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#价格渠道字典
cursor_zi_new.execute(f"select channel_alias_cn,channel_alias_code from zdindex_channel_rel")
price_source = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#请求地址
request_url = "http://59.110.219.171:8092/return_data"
#组织请求数据
price_source_dict = dict(zip(price_source['channel_alias_code'].tolist(),price_source['channel_alias_cn'].tolist()))
data_list = str({'data':[[str(eval(url)[0]),str(price_source_dict[eval(source)[0]])] for url,source in zip(df['url'].tolist(),df['url_source'].tolist())]}).replace("'","\"")
payload={'dataList': data_list}
response = requests.request("POST", request_url, data=payload)
res = eval(response.text)
#处理未爬取到的数据
for i in range(len(res)):
if res[i]:
pass
else:
res[i] = {'img_list':[],'class_list':{},'url':''}
df['url_pic'] = [str(element['img_list']) for element in res]
crawl_params_list = [str(element['class_list']).replace("'': ''","").replace(", ,",",").replace("{,","{").replace(" ","") for element in res]
url_params_list = []
for element,url_params in zip(res,crawl_params_list):
params_dict = eval(url_params)
#params_dict.update({'爬取链接':element['url']})
url_params_list.append(str(params_dict))
df['url_params'] = url_params_list
#储存爬取的信息
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['id']
url_pic = row['url_pic'].replace("'","''")
url_params = row['url_params'].replace("'","''")
cursor_zi_service.execute(f"update product_all set url_params = '{url_params}',url_pic = '{url_pic}' where id = {id_}")
print('爬去数据存储完成')
mssql.Close()
mssql_new.Close()
return df
#解析重点类产品数据并导出
def GetCollectDataDetail(df,channel_alias,batch):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取属性规格项对应关系
cursor_zi_new.execute(f"select * from p_skusubtitle_out_map")
data = cursor_zi_new.fetchall()
subtitle_map_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data = cursor_zi_new.fetchall()
data_dict = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
'''
#依据品牌+型号获取ZOL参数信息(标准参数项信息)
zol_params_list = []
for index,row in df.iterrows():
print(index)
category = row['zi_subcategoryname']
brand = row['zi_brandname']
model = row['model']
data = {
"params_info": {
"category": category,
"brand": brand,
"model": model
}
}
res = zgc_api("GetZOLParamsInfo",data)
zol_params_list.append(res['params_dict'])
df['ZOL_params'] = zol_params_list
'''
#df['ZOL_params'] = []
writer = pd.ExcelWriter(f"{channel_alias}建库产品参数确认{batch}.xlsx")
for category in df['zi_subcategoryname'].unique().tolist():
#获取每一个品类的dataframe
cat_df = df[df['zi_subcategoryname'] == category]
cat_df['remark'] = cat_df['remark'].apply(lambda x:str(x))
#获取这个类的属性规格项
single_subtitle_df = subtitle_df[subtitle_df['name'] == category]
#获取这个类的必填属性规格(CPU属性无需填写,系统在建库时自动给出)
requier_param_list = single_subtitle_df[single_subtitle_df['require_param'] == '1']['subtitle'].tolist()
#获取这个类的参数项对应关系
single_subtitle_map_df = subtitle_map_df[subtitle_map_df['categoryname'] == category]
#获取这个类的非必填
non_requier_param_list = []
for param in single_subtitle_df[single_subtitle_df['require_param'] != '1']['subtitle'].tolist():
if param in requier_param_list:
continue
else:
non_requier_param_list.append(param)
try:
requier_param_list.remove('CPU属性')
except:
pass
requier_param_list = [str(param) + "(*)" for param in requier_param_list]
#将爬取信息放到最后
param_list_all = requier_param_list + non_requier_param_list
try:
#param_list_all.remove("原始参数及链接信息")
#param_list_all.append("原始参数及链接信息")
param_list_all.remove("详细参数信息")
param_list_all.append("详细参数信息")
except:
pass
id_list = list()
name_list = list()
zi_brandname_list = list()
zi_subcategoryname_list = list()
param_list = list()
value_list = list()
new_name_list = list()
#实例化进度条
index_ = Index()
counter = 1
for index,row in cat_df.iterrows():
try:
print(index_(counter, len(cat_df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['id']
name = row['name']
zi_brandname = row['zi_brandname']
zi_subcategoryname = row['zi_subcategoryname']
try:
url_params = eval(row['url_params'])
except:
url_params = {}
std_key_list = []
std_value_list = []
for key in url_params.keys():
judge_df = single_subtitle_map_df[single_subtitle_map_df['outsubtitle'] == key][['subtitle']]
if judge_df.empty:
continue
else:
std_key_list.append(judge_df['subtitle'].tolist()[0])
std_value_list.append(url_params[key])
std_url_params = dict(zip(std_key_list,std_value_list))
for param in param_list_all:
id_list.append(id_)
name_list.append(name)
zi_brandname_list.append(zi_brandname)
zi_subcategoryname_list.append(zi_subcategoryname)
param_list.append(param)
new_name_list.append('')
#if param == '原始参数及链接信息':
if param == '详细参数信息':
value = row['url_params']
else:
try:
value = url_params[param]
except:
try:
value = std_url_params[param]
except:
value = ''
value_list.append(value)
export_df = pd.DataFrame()
export_df['id'] = id_list
export_df['name'] = name_list
export_df['zi_brandname'] = zi_brandname_list
export_df['zi_subcategoryname'] = zi_subcategoryname_list
export_df['param'] = param_list
export_df['value'] = value_list
#export_df['new_name'] = new_name_list
category = category.replace('/','_')
export_df.to_excel(writer,f"{category}参数数据")
print(f"完成{category}参数整理")
writer.save()
mssql.Close()
#校验建库参数数据,若有问题,添加数据字典
def CheckParamsData(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#加载库中参数项数据
cursor_zi_new.execute("select * from vw_property")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params['standard_param'] = db_params['identy'].apply(lambda x: x[2])
db_params = db_params[db_params['standard_param'] != '0']
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
return_dict = dict()
complicated_data_dict_id_list = list()
flag = True
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
except:
continue
#实例化进度条
index_ = Index()
counter = 1
id_list = list()
params_list = list()
for id_ in cat_df['id'].unique().tolist():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
id_list.append(id_)
single_df = cat_df[cat_df['id'] == id_]
single_df['filter'] = ['1' if '(*)' in param else '0' for param in single_df['param'].tolist()]
single_df = single_df[single_df['filter'] == '1']
params_dict = dict(zip(single_df['param'].apply(lambda x:x.replace("(*)","")).tolist(),single_df['value'].tolist()))
params_list.append(params_dict)
#获取标准参数值
data ={
"params_info": {
"category": category,
"id_list": id_list,
"params_dict_list": params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
res_value_dict = res['paramsValue_dict']
return_dict.update(res_value_dict)
#实例化进度条
index_ = Index()
counter = 1
for id_ in res_value_dict.keys():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
for param in res_value_dict[id_].keys():
value,value_flag = tuple(res_value_dict[id_][param].items())[0]
if value_flag == False:
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{param}' and primitive = '{value}'")
check_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
flag = False
cursor_zi_new.execute(f"insert into ShuJuZiDian_Cfg (categoryname,subtitle,primitive) values ('{category}','{param}','{value}')")
cursor_zi_new.execute(f"select id from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{param}' and primitive = '{value}'")
complicated_data_dict_id = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
complicated_data_dict_id_list.append(complicated_data_dict_id)
print(f"完成{category}参数校验")
if flag:
mssql.Close()
print("本批数据校验通过!,返回标准值字典")
return return_dict
else:
data_dict_path = path.split(".xlsx")[0] + '(数据字典补充).xlsx'
if len(complicated_data_dict_id_list) == 1:
_id = complicated_data_dict_id_list[0]
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where stdvalue is Null and id = {_id}")
else:
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg where stdvalue is Null and id in (%s)" % ','.join(['%s'] * len(complicated_data_dict_id_list)),tuple(complicated_data_dict_id_list))
fill_shujuzidian_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
fill_shujuzidian_df.to_excel(data_dict_path)
mssql.Close()
return {}
#上传数据字典补充数据
def ComplicatedDataDict(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
df = pd.read_excel(path)
for index,row in df.iterrows():
cursor_zi_new.execute(f"update ShuJuZiDian_Cfg set stdvalue = '{row['stdvalue']}',simplevalue = '{row['simplevalue']}' where id = {row['id']}")
print(index)
mssql.Close()
#记录标准参数项参数值
def save_collect_data_info(path,std_value_dict):
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#实例化进度条
index_ = Index()
counter = 1
for key in pd.read_excel(path,sheet_name = None):
category = key.replace("参数数据","")
cat_df = pd.read_excel(path,sheet_name = key)
if category in point_category_list:
for id_ in cat_df['id'].unique().tolist():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
subtitle_list = []
value_list = []
single_df = cat_df[cat_df['id'] == id_]
single_df['filter'] = ['1' if '(*)' in param else '0' for param in single_df['param'].tolist()]
single_df['param'] = single_df['param'].apply(lambda x:x.replace("(*)",""))
necessary_single_df = single_df[single_df['filter'] == '1']
unnecessary_single_df = single_df[single_df['filter'] == '0'].fillna("无该参数信息")
for index,row in necessary_single_df.iterrows():
id_ = str(id_)
params = row['param']
if params == 'CPU型号':
subtitle_list.append(params)
cpu_model = list(std_value_dict[id_][params].keys())[0]
value_list.append(cpu_model)
subtitle_list.append('CPU属性')
if '飞腾' in cpu_model or '龙芯' in cpu_model or '兆芯' in cpu_model:
pass
else:
cpu_model = cpu_model.split('-')[0]
value_list.append(cpu_model)
else:
subtitle_list.append(params)
value_list.append(list(std_value_dict[id_][params].keys())[0])
params_dict = str(dict(zip(subtitle_list + unnecessary_single_df['param'].tolist(),value_list + unnecessary_single_df['value'].tolist()))).replace("'","''")
cursor_zi_service.execute(f"update product_all set params_standard = '{params_dict}' where id = {id_}")
else:
for id_ in cat_df['id'].unique().tolist():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
subtitle_list = []
value_list = []
single_df = cat_df[cat_df['id'] == id_].fillna("无该参数信息")
single_df['param'] = single_df['param'].apply(lambda x:x.replace("(*)",""))
new_name = single_df['new_name'].unique().tolist()[0].replace("'","''")
params_dict = str(dict(zip(single_df['param'].tolist(),single_df['value'].tolist()))).replace("'","''")
cursor_zi_service.execute(f"update product_all set params_standard = '{params_dict}',new_name = '{new_name}' where id = {id_}")
mssql.Close()
mssql_service.Close()
print("完成记录标准参数项参数值")
def product_named(batch,channel_alias,std_value_dict):
collect_data_df = LoadData(batch,channel_alias,'collectDataConfirm')
#创建链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#获取重点类命名规则
cursor_zi_new.execute(f"select * from skuname_named_rule")
named_rules_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取重点类属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取重点类SKU数据
cursor_zi_new.execute(f"select * from vw_sku_params where categoryname in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
sku_db_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data_dict = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
for category in point_category_list:
cat_df = collect_data_df[collect_data_df['zi_subcategoryname'] == category]
if cat_df.empty:
continue
channel_list = cat_df['channel_alias'].unique().tolist()
for channel_alias in channel_list:
df = cat_df[cat_df['channel_alias'] == channel_alias]
cat_subtitle_df = subtitle_df[subtitle_df['name'] == category]#获取该类别的属性规格项数据
#cat_spu_db_df = spu_db_df[spu_db_df['categoryname'] == point_category]#获取该类别的所有SPU数据
cat_sku_db_df = sku_db_df[sku_db_df['categoryname'] == category]#获取该类别的所有SKU数据
cat_sku_db_df['skuname'] = cat_sku_db_df['skuname'].apply(lambda x :x.upper())
#获取这个类的数据字典
single_data_dict = data_dict[data_dict['categoryname'] == category]
single_data_dict['stdvalue'] = single_data_dict['stdvalue'].apply(lambda x : x.upper())
#该类别命名规则
named_rule = named_rules_df[named_rules_df['categoryname'] == category]['rule'].str.cat()
#spu_id = [] #若只匹配到SPU,则记录spuid,方便后续建库使用
#产品命名
for index,row in df.iterrows():
id_ = str(row['id'])
skuname = ''
brandname = row['zi_brandname']
for element in named_rule.split(" "):
if element == '品牌名称':
value = brandname
elif element == '类别名称':
value = category
elif element[0] == "(" and "/" in element:
element = element.replace("(","").replace(")","")
detail_element_list = element.split("/")
for i in range(len(detail_element_list)):
#value = cat_sku_df[(cat_sku_df['sku'] == sku) & (cat_sku_df['name'] == detail_element_list[i])]['value'].tolist()[0]
#获取标准参数值
param_ = detail_element_list[i]
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,detail_element_list[i],value.upper())
#if value:
# pass
#else:
# flag = False
# continue
if i == 0:
skuname += "(" + value + "/"
elif i == len(detail_element_list)-1:
skuname += value + ") "
else:
skuname += value + "/"
continue
elif element[0] == "(" and "/" not in element:
element = element.replace("(","").replace(")","")
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += "(" + value + ") "
continue
else:
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += value + " "
#if flag == False:
# continue
skuname = skuname.strip()
#处理名称中的空格
if category in ['复印纸','扫描仪']:
skuname = skuname
elif category in ['笔记本','台式机','一体电脑','碎纸机']:
skuname = dael_name_content(skuname,brandname)
elif category in ['复印机','多功能一体机','单反相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
elif category in ['投影机','激光打印机','空调']:
if category == '空调':
skuname_part1 = skuname.split("空调 (")[0].strip()
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.replace("中央空调","").replace("空调扇","").split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " + category + skuname_2
elif category in ['单电/微单相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " +category + skuname_2
print(skuname)
id_ = int(id_)
cursor_zi_service.execute(f"update product_all set new_name = '{skuname}' where id = {id_}")
mssql_new.Close()
mssql_service.Close()
def UpdateCollectDataConfirm(path):
#读取Excel数据
df = pd.read_excel(path,converters={'state':str})
#创建数据库链接
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
#更新数据
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = int(row['id'])
state = row['state']
url = str(row['url']).strip().replace("'","''")
url_source = str(row['url_source']).strip().replace("'","''")
url_price = str(row['url_price']).strip()
new_name = row['new_name'].strip().replace("'","''")
zi_brandname = row['zi_brandname'].strip().replace("'","''").replace("[","").replace("]","")
zi_brandcode = str(row['zi_brandcode']).strip().replace("?","").replace("[","").replace("]","")
zi_subcategoryname = row['zi_subcategoryname'].strip()
zi_subcategorycode = str(row['zi_subcategorycode']).strip().replace("?","")
params_standard = row['params_standard'].strip().replace("'","''")
if state == '1':
cursor_zi_service.execute(f"update product_all set state = '8.5' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set url = '{url}',url_source = '{url_source}',url_price = '{url_price}',zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',new_name = '{new_name}',params_standard = '{params_standard}',state = '8.5' where id = {id_}")
mssql_service.Close()
def transform_simplevalue(cursor_zi_new,shujuzidiandf,categoryname,subtitle,stdvalue):
stdvalue = stdvalue.strip()
simple_subtitle_list = ['CPU型号','显存容量','操作系统','双面器','双面输稿器','网络打印','标配外服务及配件','标配外耗材','镜头描述','碎纸效果']
if subtitle not in simple_subtitle_list:
return stdvalue
simplevalue_list = list(set(shujuzidiandf[(shujuzidiandf['categoryname'] == categoryname) & (shujuzidiandf['subtitle'] == subtitle) & (shujuzidiandf['stdvalue'] == stdvalue)]['simplevalue'].tolist()))
if len(simplevalue_list) == 0:
print(f"非法值,不存在数据字典中。{subtitle},{stdvalue}")
return False
elif len(simplevalue_list) == 1:
if simplevalue_list[0] == '无简称':
#add_shujuzidian(categoryname,subtitle,stdvalue)
return " "
else:
return simplevalue_list[0]
else:
print(f"异常数据,具有多个简称。{subtitle},{stdvalue}")
return False
def dael_name_content(skuname,brand):
#处理产品名称中 括号里的内容
skuname = skuname.replace(brand,'')
name_head = skuname.split('(')[0]
name_tail = skuname.split('(')[1].split(')')[1]
name_content = skuname.split('(')[1].split(')')[0].split('/')
if (len(list(set(name_content))) == 1 and name_content[0] == ' ') or (len(list(set(name_content))) == 2 and ' ' in name_content and '/' in name_content):
if name_tail == '':
res = brand + name_head
else:
res = brand + name_head + " " + name_tail
else:
temp_content = ''
for element in name_content:
if element == ' ':
continue
else:
temp_content += element + '/'
temp_content = "(" + temp_content.strip('/') + ")"
if name_tail == '':
res = brand + name_head + temp_content
else:
if temp_content == '':
res = brand + name_head + " " + name_tail
else:
res = brand + name_head + temp_content + name_tail
return res.strip()
def create_to_db(path,std_value_dict):
#创建链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#获取重点类命名规则
cursor_zi_new.execute(f"select * from skuname_named_rule")
named_rules_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取重点类属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取重点类SKU数据
cursor_zi_new.execute(f"select * from vw_sku_params where categoryname in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
sku_db_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data_dict = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取spu价格黑名单
cursor_zi_price.execute("select spuid from spu_calculate_price where zc_price = 0 and ds_price = 0 and st_price = 0")
hmd_spuid_list = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])['spuid'].to_list()
new_df_combine_list = list()
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
channel_list = cat_df['channel_alias'].unique().tolist()
except:
continue
for channel_alias in channel_list:
df = cat_df[cat_df['channel_alias'] == channel_alias]
cat_subtitle_df = subtitle_df[subtitle_df['name'] == category]#获取该类别的属性规格项数据
#cat_spu_db_df = spu_db_df[spu_db_df['categoryname'] == point_category]#获取该类别的所有SPU数据
cat_sku_db_df = sku_db_df[sku_db_df['categoryname'] == category]#获取该类别的所有SKU数据
cat_sku_db_df['skuname'] = cat_sku_db_df['skuname'].apply(lambda x :x.upper())
#获取这个类的数据字典
single_data_dict = data_dict[data_dict['categoryname'] == category]
single_data_dict['stdvalue'] = single_data_dict['stdvalue'].apply(lambda x : x.upper())
#该类别命名规则
named_rule = named_rules_df[named_rules_df['categoryname'] == category]['rule'].str.cat()
new_product_flag_list = []#新建产品标识符号 1:需要创建的产品 0:不需要创建的产品
computer_cat = ['笔记本','台式机','一体电脑']#整机类 需要进行匹配
#spu_id = [] #若只匹配到SPU,则记录spuid,方便后续建库使用
df['remark'] = df['remark'].apply(lambda x:str(x))
df['productcode'] = df['productcode'].apply(lambda x:str(x))
#产品命名
new_name_list = []
sku_list = list()
spuid_list = list()
for index,row in df.iterrows():
if (row['remark'] != 'nan' and row['remark'] != 'None') or (row['productcode'] != 'nan' and row['productcode'] != 'None'):
new_name_list.append(" ")
continue
id_ = str(row['id'])
skuname = ''
brandname = row['zi_brandname']
for element in named_rule.split(" "):
if element == '品牌名称':
value = brandname
elif element == '类别名称':
value = category
elif element[0] == "(" and "/" in element:
element = element.replace("(","").replace(")","")
detail_element_list = element.split("/")
for i in range(len(detail_element_list)):
#value = cat_sku_df[(cat_sku_df['sku'] == sku) & (cat_sku_df['name'] == detail_element_list[i])]['value'].tolist()[0]
#获取标准参数值
param_ = detail_element_list[i]
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,detail_element_list[i],value.upper())
#if value:
# pass
#else:
# flag = False
# continue
if i == 0:
skuname += "(" + value + "/"
elif i == len(detail_element_list)-1:
skuname += value + ") "
else:
skuname += value + "/"
continue
elif element[0] == "(" and "/" not in element:
element = element.replace("(","").replace(")","")
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += "(" + value + ") "
continue
else:
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += value + " "
#if flag == False:
# continue
skuname = skuname.strip()
#处理名称中的空格
if category in ['复印纸','扫描仪']:
skuname = skuname
elif category in ['笔记本','台式机','一体电脑','碎纸机']:
skuname = dael_name_content(skuname,brandname)
elif category in ['复印机','多功能一体机','单反相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
elif category in ['投影机','激光打印机','空调']:
if category == '空调':
skuname_part1 = skuname.split("空调 (")[0].strip()
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.replace("中央空调","").replace("空调扇","").split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " + category + skuname_2
elif category in ['单电/微单相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " +category + skuname_2
print(skuname)
new_name_list.append(skuname)
df['new_name'] = new_name_list
for index,row in df.iterrows():
if row['remark'] != 'nan' and row['remark'] != 'None':#驳回数据
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '2',new_name = '{row['new_name']}',remark = '{row['remark']}' where id = {row['id']}")
elif row['productcode'] != 'nan' and row['productcode'] != 'None':
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{row['productcode']}' where id = {row['id']}")
else:
newname = row['new_name']
#匹配数据(整机类进行匹配)
if cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()].empty:#未匹配上数据
new_product_flag_list.append('1')
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',new_name = '{newname}' where id = {row['id']}")
else:#匹配上数据
productcode = cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()]['sku'].tolist()[0]
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{productcode}',new_name = '{newname}',remark = Null,pic_state = '0' where id = {row['id']}")
new_product_flag_list.append('0')
print('完成名称匹配')
df['new_product_flag'] = new_product_flag_list
new_df = df[df['new_product_flag'] == '1']#需要新建的产品
if category in computer_cat:
cpu_attr_list = []
for cpu in new_df['CPU型号'].tolist():
if '飞腾' in cpu or '龙芯' in cpu or '兆芯' in cpu:
pass
else:
cpu = cpu.split('-')[0]
cpu_attr_list.append(cpu)
new_df['CPU属性'] = cpu_attr_list
else:
continue
#新建产品
print(f"开始新建产品,共计:'{len(new_df)}'件产品")
index_ = 0
delete_index_list = list()
for index,row in new_df.iterrows():
id_ = str(row['id'])
brand = row['zi_brandname'].strip()
brandcode = row['zi_brandcode'] # 建库需要的品牌编码
#创建SPU 获取SPUid
name = str(brand)
for spu_param in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].sort_values("Expr1")['subtitle'].tolist():
if spu_param == '产品品牌':
continue
else:
name += " "
name += str(row[spu_param])
name = name + " " +str(row['zi_subcategoryname'])#spu名称
spu = str(uuid.uuid1()).replace('-','')#spu编码
category_code = int(str(row['zi_subcategorycode']).replace("?",""))#类别编码
brand_code = brandcode#品牌编码
cursor_zi_new.execute(f"select id from p_spu where categoryid = {category_code} and brandid = {brand_code} and spuname = '{name}'")
data = cursor_zi_new.fetchall()
spu_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False #是否处理参数
if spu_df.empty:
#cursor_zi_new.execute(f"insert into p_spu (spuname,spu,categoryid,brandid) values ('{name}','{spu}',{category_code},{brand_code})")
#param_deal_flag = True
delete_index_list.append(index)
continue
else:
spuid = spu_df['id'].tolist()[0]
if spuid in hmd_spuid_list:
delete_index_list.append(index)
continue
spuid_list.append(spuid)
#cursor_zi_service.execute(f"update product_all_original_list set spuid = {spuid} where id = {row['id']}")
if param_deal_flag:
#创建SPU属性
for index,spu_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].iterrows():
subtitle = spu_param_row['subtitle'].strip()
subtitleid = spu_param_row['subtitleid']
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
cursor_zi_new.execute(f"select a.valueid from p_valuemap a \
left join p_value b \
on a.valueid = b.id \
left join p_subtitle c \
on b.subtitleid = c.id \
where a.spuid = {spuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_valuemap where spuid = {spuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_value where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
#创建SKU
skuname = str(row['new_name']).strip()
sku = datetime.datetime.now().strftime("%Y-%m-%d%H:%M:%S.%f").replace('-','').replace(':','').replace('.','')[:-3]
time.sleep(0.001)
#spuid = int(row['spuid'])
state = 1
source = '1'
createddate = datetime.datetime.now().strftime("%Y-%m-%d %X")
cursor_zi_new.execute(f"select sku from p_sku where skuname = '{skuname}' and spuid = {spuid}")
data = cursor_zi_new.fetchall()
sku_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False#是否处理参数
if sku_df.empty:
cursor_zi_new.execute(f"insert into p_sku (skuname,sku,spuid,state,source,createddate) values ('{skuname}','{sku}',{spuid},{state},'{source}','{createddate}')")
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku}',new_name = '{row['new_name']}',remark = Null,pic_state = '1' where id = {row['id']}")
param_deal_flag = True
else:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku_df['sku'].tolist()[0]}',new_name = '{row['new_name']}',remark = Null,pic_state = '0' where id = {row['id']}")
sku = sku_df['sku'].tolist()[0]
cursor_zi_new.execute(f"select id from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
skuid = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0] #skuid
sku_list.append(sku)
if param_deal_flag:
#录入产品详细参数
for index,sku_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'sku'].iterrows():
subtitle = sku_param_row['subtitle'].strip()
subtitleid = sku_param_row['subtitleid']
if subtitle == '产品品牌':
continue
try:
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
except:
continue
cursor_zi_new.execute(f"select a.valueid from p_skuvaluemap a \
left join p_skuvalue b \
on a.valueid = b.id \
left join p_skusubtitle c \
on b.subtitleid = c.id \
where a.skuid = {skuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_skuvalue where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
index_+=1
print(f"已处理完'{index_}'件产品")
print(f"完成{channel_alias}{category}数据建库")
#计算价格
new_df = new_df.drop(index= delete_index_list)
new_df['sku'] = sku_list
new_df['spuid'] = spuid_list
export_df = new_df[['id','sku','spuid','zi_subcategoryname','zi_brandname','new_name','channel_alias']]
new_df_combine_list.append(export_df)
new_df_combine = pd.concat(new_df_combine_list)
mssql_new.Close()
mssql_price.Close()
mssql_service.Close()
return new_df_combine
def check_configure(new_df_all):
category_list = new_df_all['zi_subcategoryname'].unique().tolist()
for category in category_list:
new_df = new_df_all[new_df_all['zi_subcategoryname'] == category]
sku_list = new_df['sku'].unique().tolist()
#录入新增产品配件差异价格
check_configure_price(category,sku_list)
print("检查完毕")
def get_data_all():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and (cal_price is Null or new_name is Null)")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
#df_point = df[df['point_category_flag'] == '1']
#df_unpoint = df[df['point_category_flag'] == '0']
mssql.Close()
return df
def get_price(df):
#创建链接
mssql_index = MSSQL('123.56.115.207','zdindex')
cursor_zdindex = mssql_index._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = str(row['productcode']).strip()
#cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods ='20200824' and goods_id = '{sku}'")
cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods in (select top 1 max(periods) from zd_entry_goods_price) and goods_id = '{sku}'")
data = cursor_zdindex.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zdindex.description])
if check_df.empty:
cursor_zi_price.execute(f"select sku_price from sku_calculate_price where sku = '{sku}'")
data = cursor_zi_price.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_price.description])
if check_df.empty:
cal_price = "无法获取价格,请核查"
else:
cal_price = check_df['sku_price'].tolist()[0]
else:
cal_price = check_df['index_price_wave'].tolist()[0]
cursor_zi_service.execute(f"update product_all set cal_price = '{cal_price}',productcode='{sku}' where id = {row['id']}")
mssql_index.Close()
mssql_price.Close()
mssql_service.Close()
def get_name(df):
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = row['productcode']
cursor_zi_new.execute(f"select skuname from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
try:
name = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['skuname'].tolist()[0]
except:
name = "无法获取产品名称"
cursor_zi_service.execute(f"update product_all set new_name = '{name}',remark = Null where id = {row['id']}")
mssql_new.Close()
mssql_service.Close()
def save_sku_relationship(filter,channel_alias):
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and channel_alias = '{channel_alias}' and batch = '{filter}'")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
cust_sku_list = df['sku'].tolist()
sku_list = df['productcode'].tolist()
channel_alias_list = df['channel_alias'].tolist()
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channel_alias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
print(res)
mssql.Close()
def collect_df_to_db(path):
df = pd.read_excel(path)
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#入库
id_list = df['id'].tolist()
brand_list = df['zi_brandname'].tolist()
brandId_list = df['zi_brandcode'].apply(lambda x:str(x)).tolist()
category_list = df['zi_subcategoryname'].tolist()
categoryId_list = df['zi_subcategorycode'].apply(lambda x:str(x)).tolist()
name_list = df['new_name'].tolist()
params_list = df['params_standard'].apply(lambda x:eval(x)).tolist()
data = {
"params_info": {
"brand_list": brand_list,
"brandId_list": brandId_list,
"category_list": category_list,
"categoryId_list": categoryId_list,
"params_list": params_list,
"name_list": name_list
}
}
res = zgc_api("Stock-InProductInfo",data)
sku_list = res['sku_list']
print("产品入库完成")
#更新product_all表
for _id,sku in zip(id_list,sku_list):
cursor_zi_service.execute(f"update product_all set productcode = '{sku}',remark = Null,state = '9' where id = {_id}")
df['productcode'] = sku_list
print("产品编码同步完成")
#组织价格数据
sku_list = []
source_name_list = []
price_list = []
url_list = []
channelId_list = []
for index,row in df.iterrows():
sku_list_temp = row['productcode']
source_name_list_temp = row['new_name']
url_list_temp = eval(row['url'])
channelId_list_temp = eval(row['url_source'])
price_list_temp = eval(row['url_price'])
for url,channelId,price in zip(url_list_temp,channelId_list_temp,price_list_temp):
sku_list.append(sku_list_temp)
source_name_list.append(source_name_list_temp)
url_list.append(url)
channelId_list.append(channelId)
price_list.append(price)
#价格关系入库
data = {
"params_info": {
"sku_list": sku_list,
"url_name_list": source_name_list,
"url_price_list":price_list,
"url_list": url_list,
"channelId_list": channelId_list
}
}
res = zgc_api("Stock-InPriceInfo",data)
print(res)
print("产品价格录入完成")
#组织SKU对应关系数据
sku_df = df[df['sku'] != '无']
if sku_df.empty:
print(f"无SKU对应关系录入")
else:
cust_sku_list = sku_df['sku'].tolist()
sku_list = sku_df['productcode'].tolist()
channel_alias_list = sku_df['channel_alias'].tolist()
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channel_alias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
print(res)
print("SKU对应关系录入完成")
#导出待建库的产品图片
def confirm_product_picture(batch,channel_alias):
df = LoadData(batch,channel_alias,"dealPicInfo")
i = 1
current_path = os.getcwd()
if not os.path.exists('pic/'): #判断所在目录下是否有该文件名的文件夹
os.mkdir('pic/')
while os.path.exists(f"pic/{batch}-{i}"):
i+=1
os.mkdir(f"pic/{batch}-{i}")
save_pic_path = current_path + "/" + f"pic/{batch}-{i}"
for index,row in df.iterrows():
sku = str(row['productcode'])
url = str(eval(row['url'])[0])
url_pic_list = eval(row['url_pic'])
if url_pic_list:
sort_num = 0
for url_pic_base64 in url_pic_list:
sort_num +=1
img = base64.b64decode(url_pic_base64)
fh = open(f"{save_pic_path}/{sku}-{sort_num}.jpg","wb")
fh.write(img)
fh.close()
else:
print(f"{sku}未能获取产品图片,URL:{url}")
continue
#上传图片
def upload_pic(pic_batch):
if not os.path.exists(f"pic/{pic_batch}"):
print("请选传入确的图片批次",os.listdir("pic/"))
return False
upload_local_path = os.getcwd() + "/" + f"pic/{pic_batch}/"
uploda_target_path = f"G:\\pic\\n\\{pic_batch}\\"
transport = paramiko.Transport(('123.56.115.207',22))
transport.connect(username='root',password='123456')
sftp = paramiko.SFTPClient.from_transport(transport)
saved_path_list = []
#实例化进度条
index_ = Index()
counter = 1
img_path_list = os.listdir(upload_local_path)
for img_path in img_path_list:
try:
sftp.put(upload_local_path + img_path, uploda_target_path + img_path)
except:
sftp.mkdir(uploda_target_path)
sftp.put(upload_local_path + img_path, uploda_target_path + img_path)
saved_path_list.append(uploda_target_path + img_path)
try:
print(index_(counter, len(img_path_list)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
transport.close()
print("图片数据上传至服务器")
#记录图片地址
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
cursor_zi_new.execute(f"select id,sku from p_sku")
data = cursor_zi_new.fetchall()
skuid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#实例化进度条
index_ = Index()
counter = 1
for path in saved_path_list:
try:
print(index_(counter, len(saved_path_list)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
saved_path = path.split("G:\\pic")[1].replace("\\","/")
sku = saved_path.split("/")[-1].split("-")[0]
skuid = skuid_df[skuid_df['sku'] == sku]['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skupic (skuid,path) values ({skuid},'{saved_path}')")
mssql_new.Close()
print("图片数据位置信息存储至数据库")
#建库模板路径
path = '/Users/rico/project/模板建库v2/历史数据/20210311相机/相机建库模板3.11-何丽娟.xlsx'
#初始化数据
InitializeData(path)
#初始化参数
channel_alias = 'HLJ-CZBJK'
batch = '2021-03-11'
#加载数据
df = LoadData(batch,channel_alias,'deal')
#获取其他建库信息(品牌编码,类别编码,产品名称)
GetOtherProductInfo(df)
'''
#分析获取数据基本信息(品牌,类别,型号)
AnalyseBasicInfo(df)
#导出数据(导出文件至当前文件执行路径)
ExportToExcelBasicConfirm(batch,channel_alias)
#返回数据并更新
path = "/Users/rico/project/模板建库v2/历史数据/20210311相机/HLJ-CZBJK基础信息确认2021-03-11.xlsx"
UpdateBasicData(path)
'''
#获取建库数据
df = GetCollectData(batch,channel_alias)
#爬去链接参数信息及图片
df_crawled = GetParamsinfoAndPic(df)
#导出建库数据参数补充
GetCollectDataDetail(df_crawled,channel_alias,batch)
#处理建库数据
path = "/Users/rico/Downloads/HLJ-CDJK建库产品参数确认2021-01-21(1).xlsx"
##校验参数数据
std_value_dict = CheckParamsData(path)
##导入补充的数据字典
data_dict_path = "/Users/rico/project/模板建库v2/CL-MBJK建库产品参数确认2020-11-20(1)(数据字典补充).xlsx"
ComplicatedDataDict(data_dict_path)
if std_value_dict:
##记录建库信息(标准参数项参数值)
save_collect_data_info(path,std_value_dict)
##产品名称
product_named(batch,channel_alias,std_value_dict)
#导出建库前确认数据
df = LoadData(batch,channel_alias,'collectDataConfirm')
df.to_excel(f"{channel_alias}建库信息确认{batch}.xlsx")
#建库信息结果确认更新
path = '/Users/rico/Downloads/CL-MBJK建库信息确认2021-01-28(1).xlsx'
UpdateCollectDataConfirm(path)
#产品建库,价格入库
collect_df_to_db(path)
#处理图片
confirm_product_picture(batch,channel_alias)
print(f"请处理{batch}批次的图片数据,有问题的图片直接删除即可。(请不要修改图片路径)")
#上传图片至服务器和数据库
pic_batch = batch + "-1"
upload_pic(pic_batch)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 9 23:37:15 2020
@author: rico
"""
import pymssql
import pandas as pd
import os
import requests
import tensorflow as tf
from lxml import etree
import re
import datetime
import json
import time
#from text_moudle.run_cnn import name2subcategory as generl_name2subcategory
#from text_moudle_LXWL.run_cnn import name2subcategory as LXWL_name2subcategory
#print (os.getcwd())
class Index(object):
def __init__(self, number=50, decimal=2):
"""
:param decimal: 你保留的保留小数位
:param number: # 号的 个数
"""
self.decimal = decimal
self.number = number
self.a = 100/number # 在百分比 为几时增加一个 # 号
def __call__(self, now, total):
# 1. 获取当前的百分比数
percentage = self.percentage_number(now, total)
# 2. 根据 现在百分比计算
well_num = int(percentage / self.a)
# print("well_num: ", well_num, percentage)
# 3. 打印字符进度条
progress_bar_num = self.progress_bar(well_num)
# 4. 完成的进度条
result = "\r%s %s" % (progress_bar_num, percentage)
return result
def percentage_number(self, now, total):
"""
计算百分比
:param now: 现在的数
:param total: 总数
:return: 百分
"""
return round(now / total * 100, self.decimal)
def progress_bar(self, num):
"""
显示进度条位置
:param num: 拼接的 “#” 号的
:return: 返回的结果当前的进度条
"""
# 1. "#" 号个数
well_num = "#" * num
# 2. 空格的个数
space_num = " " * (self.number - num)
return '[%s%s]' % (well_num, space_num)
def match_sku(sku_list,frm):
'''
对供应商提供的链接进行排重
frm : DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
'''
#sku_list = ['11867713605']
#frm = 'SN'
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
conn_zi_database = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_DataBase',autocommit=True)
cursor_zi_database = conn_zi_database.cursor()
#print('正在删除库中产品状态为6的sku,稍等。。。')#首先将库中错误状态产品的JD/SN/GM的SKU删除
#cursor.execute("DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')")
#print('正在删除库中重复的SKU,稍等。。。')#需先删除库中重复的SKU,避免匹配相同的编码
'''
cursor.execute("declare OperCursor Cursor for\
SELECT productcode,sku,frm,count(*) ca FROM productcode_sku\
GROUP BY productcode,sku,frm\
HAVING COUNT(*)>1\
open OperCursor\
declare @PRODUCTCODE as nvarchar(20)\
declare @sku as nvarchar(50)\
declare @frm as nvarchar(20)\
declare @ca as int\
declare @return_value as int\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
\
while @@fetch_status=0\
begin\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
end\
close OperCursor\
deallocate OperCursor")
conn.commit()
'''
print('正在获取当前库中所有sku,请稍等.....')#获取库中所有的SKU,以供检查
try:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm in {frm} and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
except:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm='{frm}' and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
#获取op_productcode_sku
if frm == 'SN':
frm = 'SN-NEW'
get_all_sku = f"select productcode,sku from op_product_sku where frm='{frm}' and productcode not like '78%' "
cursor_zi_database.execute(get_all_sku)
data_sku = (cursor_zi_database.fetchall())
data_sku_file_op = pd.DataFrame(data_sku,columns=['productcode','sku'])
data_sku_file = pd.concat([data_sku_file,data_sku_file_op]).drop_duplicates()
data_sku_file['productcode'] = data_sku_file['productcode'].apply(lambda x:x.strip())
data_sku_file['sku'] = data_sku_file['sku'].apply(lambda x:x.strip())
print('sku获取完毕')
conn.close()
cursor.close()
conn_zi_database.close()
cursor_zi_database.close()
sku_check = {}
for sku in set(sku_list):
sku = sku.strip()
#print(sku)
#sku = 100004460761
if str(sku) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
if frm == 'SN' or frm == 'SN-NEW':
sku_add = '0000000000/'+ str(sku)
if str(sku_add) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku_add) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
sku_check[f'{sku}'] = '0'
else:
sku_check[f'{sku}'] = '0'
return sku_check
def supporturlDataDeal(source,batch):
'''
对反爬的数据进行审核排重
:return:
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'reverse_data',autocommit=True)
cursor = conn.cursor()
cursor.execute(f'select * from product where channel_url_validate is Null and 来源={source} and batch={batch}')
data = (cursor.fetchall())
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])[['product_id', 'channel_sku', 'product_name', 'channel_id', 'channel_product_id']]
df.rename(columns={'product_id': 'id', 'channel_sku': 'sku', 'product_name': 'name', 'channel_id': 'source','channel_product_id': 'url'}, inplace=True)
df['price'] = '0'
df_otherDS = df[(df['source'] != 'JD') & (df['source'] != 'GM') & (df['source'] != 'SN')]
for id_ in df_otherDS['id'].tolist():
cursor.execute(f"update product set channel_url_validate='通过',zgc_productcode='无' where product_id='{id_}'")
df_DS = df[(df['source'] == 'JD') | (df['source'] == 'GM') | (df['source'] == 'SN')].reset_index(drop=True)
# 审核三大电商
check_data = check_reverse_data(df_DS)
for id_, suggestion in zip(check_data['update_id'], check_data['审核意见']):
cursor.execute(f"update product set channel_url_validate='{suggestion}' where product_id='{id_}'")
# 三大电商SKU与库内排重
for source in list(df_DS['source'].unique()):
print(source + 'sku排重中')
sku_list = df_DS[df_DS['source'] == source]['sku'].tolist()
sku_check = match_sku(sku_list, source)
for k, v in sku_check.items():
if v == '0':
cursor.execute(f"update product set zgc_productcode='无' where channel_sku='{k}'")
else:
cursor.execute(f"update product set zgc_productcode='{v}' where channel_sku='{k}'")
print(source + 'sku排重完毕')
conn.close()
def check_reverse_data(check_data):
'''
审核爬到的数据
:param check_data: 需要审核的数据
:return: 审核完毕的数据
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
cursor.execute('select stop_word,white_word from Stopwords')
data = (cursor.fetchall())
word_df = pd.DataFrame(data, columns=['stopword', 'whiteword'])
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
if check_data.empty:
print('今日无新增数据')
return check_data
print('共' + str(len(check_data)) + '条数据待审核')
result = []
id_all = []
sku_list = []
name_list = []
url_list = []
source_list = []
price_list = []
for i in range(len(check_data)):
df = check_data.loc[i]
date_id = df['id']
sku = df['sku']
name = df['name']
main_url = df['url'].strip()
source = df['source']
price = df['price']
brand = df['brand']
subcategory = df['category']
print(main_url)
if "jd" in str(main_url):
try:
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = get_response(session, url, headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
jd_price = price
if jd_price == '-1.00':
jd_price = price
price_list.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
price_list.append(jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in is_purchase['stock'][
'StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "gome" in str(main_url):
try:
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
price_list.append(gm_price)
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = get_response(session, url, headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "suning" in str(main_url):
try:
# main_url = 'http://product.suning.com/0000000000/10643583782.html'
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = price
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = get_response(session, url_json, headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1':
if '此款有货' in str(youhuo_):
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
str_price = html.xpath("//a[@id='addCart2']/@sa-data")
str1 = re.findall("'prdid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str2 = re.findall("'shopid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str3 = html.xpath("//input[@name='procateCode']/@value")[0] # 为了拼接所需要的url,需要三个字段
real_url = f'https://pas.suning.com/nspcsale_0_{str1}_{str1}_{str2}_10_010_0100100_157122_1000000_9017_10106_Z001___{str3}.html?callback=pcData'
try:
price_response = requests.get(real_url, timeout=5)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
except:
sn_price = price
if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result.append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result.append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
#if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result. append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result. append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('非自营,请按要求提供在销渠道证明')
result.append('非自营,请按要求提供在销渠道证明')
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非三大电商,请按要求提供在销渠道证明")
result.append("非三大电商,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
print(f"已经处理{i + 1}条数据")
check_data['审核意见'] = result
check_data['update_id'] = id_all
check_data['sku'] = sku_list
check_data['name'] = name_list
check_data['url'] = url_list
check_data['source'] = source_list
check_data['price'] = price_list
conn.close()
return check_data
def get_response(session, url, headers):
'''
deal timeout request
'''
network_status = True
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
network_status = False
if network_status == False:
'''timeout'''
for i in range(1, 10):
print('请求超时,第%s次重复请求' % i)
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
continue
return -1
def check_data(fileName,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
reverse_conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='reverse_data',autocommit=True)
reverse_cursor = reverse_conn.cursor()
result = []
price = []
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
cursor.execute("select stop_word,white_word from stopwords")
stopwords = (cursor.fetchall())
word_list = pd.DataFrame(stopwords, columns=['stopword', 'white_word'])
#获取需要审核的政采的供应商的id
#cursor.execute("select id from users where frm is not null and frm != ''")
#need_check_id_from_zc = [i[0] for i in cursor.fetchall()]
# 审核链接
for i in range(len(fileName)):
df = fileName.loc[i]
if 'otherurl' in df.index.tolist():
if df['otherurl']:
try:
otherurl = eval(df['otherurl'])[0].strip()
except:
otherurl = df['otherurl'].strip()
else:
otherurl = ''
elif 'supporturl' in df.index.tolist():
other = eval(df['supporturl'].strip())
#other = df['supporturl'].strip()
if isinstance(other,list) and len(other) != 0:
otherurl = other[0]
else:
otherurl = other
else:
otherurl = ''
print('该渠道为API流程中的没有提供其他链接的渠道,其product_all表既没有other字段,也没有supporturl字段')
if df['url']:
try:
main_url = eval(df['url'])[0].strip()
except:
main_url = df['url'].strip()
else:
main_url = otherurl
if "jd" not in str(main_url) and "gome" not in str(main_url) and "suning" not in str(main_url):
main_url = otherurl
# if len(main_url) == 0 and len(otherurl) != 0:
# main_url = otherurl
print(f'开始处理:{main_url}')
if len(main_url) == 0:
print("其他销售渠道证明")
result.append("其他销售渠道证明")
price.append("其他销售渠道价格")
elif "jd" in str(main_url) or "gome" in str(main_url) or "suning" in str(main_url):
if "jd" in str(main_url):
if "i-item" in str(main_url):
jd_price = df['price']
price.append(jd_price)
result.append("通过")
else:
try:
try_ = session.get(main_url, headers=headers)
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = session.get(url, headers=headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
print('未获取到价格,使用供应商提交价格!!!')
jd_price = df['price']
if jd_price == '-1.00':
jd_price = df['price']
price.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
else:
print('价格为:', jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath("//div[@class='sku-name']/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or
# ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name))
# or ("军迷"in str(name)) or ("携行具"in str(name)) or ("酒"in str(name) and "酒精" not in str(name))
# or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name))
# or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name))
# or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name))
# or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
# name = "地图"
pass_word_jd = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_jd = '暂通过'
else:
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
if pass_word_jd == '暂通过':
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(
sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in \
is_purchase['stock']['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(jd_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "gome" in str(main_url):
# 获取价格
try:
try_ = session.get(main_url, headers=headers)
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("酒"in str(name) and "酒精" not in str(name)) or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name)) or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name)) or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name)) or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
pass_word_gm = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_gm = '暂通过'
else:
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
if pass_word_gm == '暂通过':
# print("定制/专用/含禁止上架关键词,产品暂不通过")
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = session.get(url, headers=headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(gm_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "suning" in str(main_url):
# sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
# main_url = 'https://product.suning.com/0000000000/11768660427.html?safp=d488778a.13701.productWrap.2&safc=prd.3.ssdsn_pic01-1_jz'
print(f'苏宁:{main_url}')
try:
try_ = session.get(main_url, headers=headers)
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = df.price
# sn_price = '58.00-558.00'
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = session.get(url_json, headers=headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1' or isPublished == '0':
if '此款有货' in str(youhuo_):
state = '有货'
# if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
# or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
# or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
# or ("军迷"in str(product_name)) or ("携行具"in str(product_name)) or ("酒"in str(product_name) and "酒精" not in str(product_name)) or ("军用" in str(product_name)) or ("作战" in str(product_name)) or ("部队" in str(product_name)) or ("迷彩" in str(product_name)) or ("军队" in str(product_name)) or ("陆军" in str(product_name)) or ("海军" in str(product_name)) or ("空军" in str(product_name)) or ("火箭军" in str(product_name)) or ("涉密" in str(product_name)) or ("保密" in str(product_name)) or ("补给单元" in str(product_name)) or ("书籍" in str(product_name)) or ("出版物" in str(product_name)) or ("地图" in str(product_name)):
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# price.append(sn_price)
pass_word_sn = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_sn = '暂通过'
else:
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
if pass_word_sn == '暂通过':
# else:
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
if len(str22) == 0:
str22 = '0000000000'
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_0000000000_10_010_0100100_501126_1000000_9017_10106_Z001___{str33}_1.0_0___000321NJB____0__.html?callback=pcData'
# real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"refPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) == 0:
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
else:
pass
else:
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) != 0:
try:
sn_price = float(sn_price)
price.append(sn_price)
result.append('通过')
except:
print(f'该链接获取的价格有问题:{sn_price},{main_url_}')
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
result.append('定制/专用/含禁止上架关键词,产品暂不通过')
price.append(sn_price)
else:
state = '无货,请按要求提供在销渠道证明'
sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
state = "无货,请按要求提供在销渠道证明"
# sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
result.append('非自营,请按要求提供在销渠道证明')
price.append(df['price'])
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
else:
#系统申请的、非三大电商的、有供应商的需要判断停用词
if 'otherurl' in df.index.tolist():
#main_url = "http://shanxi.gpmart.cn/productInfo/3300947.html"
# 去reverse_data获取对应id的产品名称和价格
print(main_url)
try:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
except:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}'")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
# 非系统申请、非三大电商
# 去reverse_data获取对应id的产品名称和价格
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['sku']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
if name_price:
print(f'name_price:{name_price},价格:{str(name_price[1])}')
name = name_price[0]
price_ = str(name_price[1])
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
print(f"已经处理{i + 1}条数据\n")
if len(result)!=len(price):
print("问题链接:",main_url)
break
print(f'审核意见:{len(result)}')
print(f'price:{len(price)}')
print(f'总条数:{len(fileName)}')
fileName['审核意见'] = result
fileName['price'] = price
urlss_all = []
for i in range(len(fileName)):
dw = fileName.loc[i]
if dw.url:
url_u = dw.url.strip()
if len(url_u) == 0:
pass
elif "jd" in str(url_u) or "gome" in str(url_u) or "suning" in str(url_u):
print(url_u)
try:
if "jd" in str(url_u):
skuu = url_u.split('/')[-1].split('.')[0]
elif "suning" in str(url_u):
skuu = re.findall(".com/(.*?).html", url_u)[0]
elif "gome" in str(url_u):
skuu = re.findall(".cn/(.*?).html", url_u)[0]
# print(skuu)
if str(skuu) in urlss_all:
fileName.loc[i, '审核意见'] = '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等'
else:
urlss_all.append(skuu)
except:
fileName.loc[i, '审核意见'] = '链接有误,请按要求提供在销渠道证明'
else:
pass
print('更新状态中。。。')
id_pass = []
id_nopass = []
id_nojd = []
id_noname = []
id_qita = []
id_sku = []
id_buxiaoshou = []
id_ljcw = []
id_suning = []
id_cfcf = []
id_noding = []
# elif suggestion == '苏宁产品链接略过,人工审核':
# id_suning.append(id_name)
# elif sugestion == '已在库中':
# id_sku.append(id_name)
if source == 'zi_zh':
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['product_zh_id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_zh set state='1' where id= '{id_num}'")
cursor.execute(f"update product_all set state='1' where product_zh_id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_zh set state='2',remark='无货,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='无货,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_zh set state='2',remark='非自营,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='非自营,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_zh set state='1',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where product_zh_id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到最终产品,存在多个价格' where product_zh_id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_zh set state='1',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='链接有误,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_zh set state='1',remark='其他销售渠道证明,需人工审核' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='其他销售渠道证明,需人工审核' where product_zh_id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_zh set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where product_zh_id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到唯一商品' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到唯一商品' where product_zh_id='{i}'")
conn.commit()
else:
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_all_api set state='1' where id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_all_api set state='2',remark='无货,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_all_api set state='2',remark='非自营,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_all_api set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_all_api set state='2',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_all_api set state='2',remark='其他销售渠道证明,需人工审核' where id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_all_api set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到唯一商品' where id='{i}'")
conn.commit()
print('更新完成~')
def get_zgc_brand_info(url_brand_list,brand_list):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute('select id,name,cname,ename from p_brand where id not in (select distinct(pid) from p_brand where pid <> 0)') # 不匹配主品牌
data = (cursor.fetchall())
brand_table = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates().fillna('EMPTY')
brand_table['name'] = brand_table['name'].apply(lambda x : str(x).strip().upper())
brand_table['cname'] = brand_table['cname'].apply(lambda x : str(x).strip().upper())
brand_table['ename'] = brand_table['ename'].apply(lambda x : str(x).strip().upper())
zgc_brand_list = []
zgc_brand_code_list = []
for brand,source_brand in zip(url_brand_list,brand_list):
combine_list = [brand,source_brand]
temp_name = []
temp_code = []
for brand in combine_list:
if brand:
if "联想" in str(brand):
brand = "联想"
#if "国产" in str(brand):
# brand = "错误信息"
else:
brand = '无信息'
brand = str(brand)
try:
country = brand.split('[')[1].split(']')[-2]
brand = brand.replace(country, '')
except IndexError:
pass
cn_res = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', brand))
if cn_res.isdigit():
cn_res = ''
cnbrand = cn_res
en_res = (''.join(re.findall(r'[a-zA-Z0-9-]', brand))).upper()
if en_res.isdigit():
en_res = ''
enbrand = en_res
if cnbrand == '':
cnbrand = '无对应'
if enbrand == '' or len(enbrand) == 1:
enbrand = '无对应'
findres = [[brandcode, name] for brandcode, enname,name in
zip(brand_table['id'].tolist(), brand_table['ename'].tolist(),brand_table['name'].tolist()) if enbrand == str(enname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
if brand == "联想":
findres = [[brandcode, name] for brandcode, cnname,enname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['ename'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname) or "THINKPAD" in str(enname)]
else:
findres = [[brandcode, name] for brandcode, cnname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
findres = [[brandcode, name] for brandcode,name in
zip(brand_table['id'].tolist(), brand_table['name'].tolist()) if brand == str(name)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
temp_name.append('无对应')
temp_code.append('无对应')
continue
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
temp_name.append('无对应')
temp_code.append('无对应')
zgc_brand_list.append(temp_name)
zgc_brand_code_list.append(temp_code)
conn.close()
return zgc_brand_list, zgc_brand_code_list
def get_zgc_classify_info(classify_list,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_DataBase')
cursor = conn.cursor()
if source == 'zi_zh':
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
#subcategory_name = '打印机'
zd_category_single = []
zd_category_code_single = []
cursor.execute(f"select a.subcategoryname,b.SubCategoryCode from op_zh_zd_type a \
left join VW_Relation_Property b \
on a.subcategoryname = b.SubCategoryName \
where a.second = '{subcategory_name}'")
data = (cursor.fetchall())
data = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None or "错误" in name:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
if zd_category_single:
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
else:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
return zd_category,zd_category_code
else:
source = source.replace('_', '').replace('ZH', '')
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
zd_category_single = []
zd_category_code_single = []
cursor.execute(f'''select b.name 'subcategoryname',a.ZI_SubCategoryCode 'SubCategoryCode' from Product_Relation_Attribute_SubTitle a LEFT JOIN ZI_SubCategory b
on a.zi_subcategorycode=b.subcategorycode
where a.source='{source}' and a.SourceSubCategory='{subcategory_name}' ''')
data = (cursor.fetchall())
data = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
return zd_category, zd_category_code
def remove_error_productcode(productcodes):
'''
排除productcode对应的子类或品牌包含错误两个字的productcode
:param productcodes: 排重之后的productcode列表
:return: 删除错误品牌之后的新productcode列表
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_DataBase', autocommit=True)
cursor = conn.cursor()
error_code = []
sql = '''
select distinct c.productcode,c.zi_brandname,d.name 'zi_subcategoryname' from
(select a.productcode,a.subcategorycode,b.name 'zi_brandname' from info_product a left join zi_brand b on a.brandcode=b.brandcode) c
left join zi_subcategory d on c.subcategorycode=d.subcategorycode
where c.productcode in (%s)
''' % ','.join(['%s'] * len(productcodes))
cursor.execute(sql,tuple(productcodes))
all_productcodes = cursor.fetchall()
for i in all_productcodes:
if '错误' in i[1] or '错误' in i[2]:
error_code.append(i[0])
return error_code
def remove_old_productcode(productcodes):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW', autocommit=True)
cursor = conn.cursor()
error_code = []
for code in productcodes:
cursor.execute(f"select sku from p_sku where sku = '{code}'")
check_df = pd.DataFrame(cursor.fetchall(), columns=[tuple[0] for tuple in cursor.description])
if check_df.empty:
error_code.append(code)
else:
pass
conn.close()
return error_code
def get_cn_number(char):
"""
判断字符串中,中文的个数
:param char: 字符串
:return:
"""
count = 0
for item in char:
if 0x4E00 <= ord(item) <= 0x9FA5:
count += 1
return count
def judge_unit(string):
unit_list = ['MM','CM', 'DM', 'ML', 'W', 'KW','KG','G','M','L','KBPS','MMM','P','V','KM']
error_unit_list = ['公斤','NULL','PVC','MONITOR','QIANG','I3','I5','I7','I9','LED','NO','寸','USB','OPTIX','OSN','ZXMP','BASE','ZFSD']
for error_unit in error_unit_list:
if error_unit in string:
return False
if get_cn_number(string) >= 3:
return False
elif not string[0].isdigit():
return True
m = 0
n = 0
for char in string:
if char.isdigit() or char == '.' or char == '-' or char == 'X':
m += 1
if n != 0:
m = m-1
break
elif char.isalpha():
n += 1
if string[m:m+n].upper() in unit_list:
return False
return True
def get_model(name, params, brand):
#name = '得力(deli)A4浅红色复印纸 彩色打印纸手工折纸卡纸 ZFSD 80g100张/包 7757'
#params = "{'产品型号': '得力7757', '产品详细参数': '得力\t彩色复印纸 A4 80g 210*297mm 100张/包 浅红 '}"
#brand = '[得力]'
try:
params = eval(params)
except:
params = ''
try:
name = name.split('(')[0] + name.split(')')[1]
except:
try:
name = name.split('(')[0] + name.split(')')[1]
except:
name = name
brand = str(brand)
brand_flag = 0
speical_brand_list = ['得力','世达','华为']
for special_brand in speical_brand_list:
if special_brand in brand:
brand_flag = 1
try:
brand_remove = re.findall(r"[A-Za-z0-9-]+", brand)[0].upper()
except IndexError:
brand_remove = '没有英文品牌!'
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
elif '产品型号' in params.keys():
param_model = params['产品型号']
elif '型号' in params:
param_model = params['型号']
else:
pass
param_model = param_model.upper()
param_model = param_model.replace(brand_remove,'')
if param_model:
if judge_unit(param_model) == False:
param_model = ''
if '*' in param_model:
param_model = ''
if '×' in param_model:
param_model = ''
if param_model.isdigit() and brand_flag !=1:
param_model = ''
else:
param_model = ''
name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-]+", name)))
for i in range(len(name_xinghao_lyst)):
name_xinghao_lyst[i] = name_xinghao_lyst[i].upper()
name_xinghao_lyst[i] = name_xinghao_lyst[i].replace(brand_remove,'')
if name_xinghao_lyst[i]:
if judge_unit(name_xinghao_lyst[i]) == False:
name_xinghao_lyst[i] = '?'
if '*' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = '?'
if '×' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = ''
if name_xinghao_lyst[i].isdigit() and brand_flag !=1:
name_xinghao_lyst[i] = '?'
else:
name_xinghao_lyst[i] = '?'
if len(name_xinghao_lyst) == 0:
return_model = param_model
else:
return_model = max(name_xinghao_lyst, key=len)
if param_model:
res_model = param_model
else:
res_model = return_model
#res_model = max([param_model,return_model], key=len)
if res_model == '?' or res_model == '':
res_model = '无型号'
if '/T' in res_model or '\T' in res_model:
res_model = res_model.replace('/T','').replace('\T','')
return res_model
'''
if len(name_xinghao_lyst) == 0:
#type_lyst.append(param_xinghao.upper())
if param_xinghao.isdigit():
param_xinghao = '无型号'
return param_xinghao
else:
return param_xinghao.upper()
else:
if param_xinghao in name_xinghao_lyst:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
xinghao_data = max(name_xinghao_lyst, key=len)
for xinghao in name_xinghao_lyst:
if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
xinghao_data = xinghao
break
if not judge_unit(xinghao_data):
xinghao_data == '无型号'
#type_lyst.append(xinghao_data.upper())
return xinghao_data
'''
def predict_category(name_list):
'''
获取预测类别结果
:param name_list: 原始名称
:return: 预测结果列表
'''
os.chdir("./text_moudle_LXWL")
tf.reset_default_graph()
model = LXWL_name2subcategory()
category_list = model.namelyst_predict(name_list)
os.chdir("../")
return category_list
def fuzzy_match(zgc_brand_code_list,model_list,price_list):
#获取库中所有产品信息
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou202006','ZI_NEW')
cursor = conn.cursor()
cursor.execute(f"select a.sku,a.skuname,b.brandid,c.index_price_wave from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join (select goods_id,index_price_wave from zdindex.dbo.zd_week_price where periods in (select top 1 periods from zdindex.dbo.zd_entry_goods_price)) c \
on a.sku = c.goods_id\
where a.state in ('1','4')")
data = cursor.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
res_name_list = []
res_code_list = []
for brand_code,model,price in zip(zgc_brand_code_list,model_list,price_list):
try:
price = int(float(price))
except:
price = 0
try:
brand_df = df_db[df_db['brandid'] == int(brand_code)]
except:
brand_df = df_db[df_db['brandid'] == str(brand_code)]
temp_df = pd.DataFrame()
temp_res_name_list = []
temp_res_code_list = []
temp_res_price_diff_list = []
for name,product_code,index_price in zip(brand_df['skuname'].tolist(),brand_df['sku'].tolist(),brand_df['index_price_wave'].tolist()):
if index_price == None:
index_price = 0
if str(model).strip().upper() in name.upper():
if product_code not in temp_res_code_list:
temp_res_name_list.append(name)
temp_res_code_list.append(product_code)
temp_res_price_diff_list.append(abs(price-index_price))
temp_df['name'] = temp_res_name_list
temp_df['code'] = temp_res_code_list
temp_df['diff'] = temp_res_price_diff_list
temp_df = temp_df.sort_values('diff',ascending=True).head(3)
res_name_list.append(temp_df['name'].tolist())
res_code_list.append(temp_df['code'].tolist())
return res_name_list,res_code_list
def standard_point_sku_name(category_name,name_rules,cat_subtitle_df):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment