Commit 986dd4f7 authored by LAPTOP-1TK31LNP\liang's avatar LAPTOP-1TK31LNP\liang

处理类别错误数据

parent d47d0e89
'''
@Author: SoreLemon
@Time: 2021/5/19 16:41:01
@Target: 适用于非重点类或者重点类类别错误时,需要修改为重点类,方便技术人员和补参人员完成sku_name的重新命名.
@Input: <Line 44> This is the file path(usually be EXCEL file <xlsx>).
@Input: <Line 48> This is a string List that contains category name.
@Output: <Line 86> A new excel file with several new columns needed.
'''
# -*- coding: utf-8 -*-
import pandas as pd
import pymssql
from public import Index
def get_point_category_params_data(category,sku_tuple):
category_sheet_name = category.replace("/","_")
#创建产品库链接
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor = conn_zi_new.cursor()
conn_zdindex = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'zdindex',autocommit=True)
cursor_zdindex = conn_zdindex.cursor()
cursor.execute(f"select id,name from p_category where id not in (select distinct pid from p_category) and name in ('{category}')")
data = (cursor.fetchall())
export_category = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description])
for category_code,category_name in zip(export_category['id'].tolist(),export_category['name'].tolist()):
print(f"开始提取{category_name}参数数据")
#获取产品信息
cursor.execute(f"select a.*,h.name as father_brand_name,d.name as brand_name,g.name as attr_name,f.value from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join p_category c \
on b.categoryid = c.id \
left join p_brand d \
on b.brandid = d.id \
left join p_skuvaluemap e \
on a.id = e.skuid \
left join p_skuvalue f \
on e.valueid = f.id \
left join p_skusubtitle g \
on f.subtitleid = g.id \
left join p_brand h \
on d.pid = h.id \
where b.categoryid = {category_code} and a.sku in {sku_tuple}")
data = (cursor.fetchall())
df_sku = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description])
cursor.execute(f"select a.*,d.name as brand_name,g.name as attr_name,f.value from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join p_category c \
on b.categoryid = c.id \
left join p_brand d \
on b.brandid = d.id \
left join p_valuemap e \
on b.id = e.spuid \
left join p_value f \
on e.valueid = f.id \
left join p_subtitle g \
on f.subtitleid = g.id \
where b.categoryid = {category_code} and a.sku in {sku_tuple}")
data = (cursor.fetchall())
df_spu = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description])
res = pd.DataFrame()
id_list = []
state_list = []
product_name_list = []
father_brand_list = []
brand_list = []
category_list = []
#param_list
cursor.execute(f"select * from vw_property where categoryid = {category_code}")
data = cursor.fetchall()
params_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
params_df['needed_param'] = params_df['identy'].apply(lambda x: x[0])
params_df['standard_param'] = params_df['identy'].apply(lambda x: x[2])
params_df = params_df[params_df['needed_param'] != '0']
params_df['subtitle'] = params_df['subtitle'].apply(lambda x: x.strip())
param_list = params_df['subtitle'].tolist()
for param in param_list:
param_var = '_' + ''.join(param.split()).replace('(','').replace(')','').replace('/','').replace('(','').replace(')','').replace('+','').replace('-','').replace('*','').replace('.','')
exec('%s_list=[]'%param_var)
process_index = 0
index = Index()
for prodcut_id in list(df_sku['sku'].unique()):
try:
print(index(process_index,len(list(df_sku['sku'].unique()))-1), end='%')
except:
print(index(process_index,1), end='%')
process_index+=1
id_list.append(prodcut_id)
state_list.append(df_sku[df_sku['sku'] == prodcut_id]['state'].tolist()[0])
product_name_list.append(df_sku[df_sku['sku'] == prodcut_id]['skuname'].tolist()[0])
father_brand_list.append(df_sku[df_sku['sku'] == prodcut_id]['father_brand_name'].tolist()[0])
brand_list.append(df_sku[df_sku['sku'] == prodcut_id]['brand_name'].tolist()[0])
category_list.append(category_name)
for param in param_list:
param_var = '_' + ''.join(param.split()).replace('(','').replace(')','').replace('/','').replace('(','').replace(')','').replace('+','').replace('-','').replace('*','').replace('.','')
try:
exec("%s_list.append(df_sku[(df_sku['sku'] == prodcut_id) & (df_sku['attr_name'] == '%s')]['value'].tolist()[0])"%(param_var,param))
except:
try:
exec("%s_list.append(df_spu[(df_spu['sku'] == prodcut_id) & (df_spu['attr_name'] == '%s')]['value'].tolist()[0])"%(param_var,param))
except:
exec("%s_list.append('无参数,需补充')"%param_var)
res['产品编码'] = id_list
res['产品状态'] = state_list
res['产品名称'] = product_name_list
res['产品父品牌'] = father_brand_list
res['产品品牌'] = brand_list
res['产品类别'] = category_list
for index,row in params_df.iterrows():
param = row['subtitle']
needed_flag = row['needed_param']
standard_flag = row['standard_param']
if param == '产品名称':
continue
param_var = '_' + ''.join(param.split()).replace('(','').replace(')','').replace('/','').replace('(','').replace(')','').replace('+','').replace('-','').replace('*','').replace('.','')
#标记标准项
if needed_flag == '1' and standard_flag == '1':
param = '*' + param
exec("res['%s']=%s_list"%(param,param_var))
res.to_excel(writer,f"{category_sheet_name}参数数据")
'''cursor_zdindex.execute(f"select goods_id,goods_name,platform_id,goods_url,index_price_wave from zd_week_price where periods in (select top 1 max(periods) from zd_entry_goods_price) and sub_category_code = {category_code} ")
data = (cursor_zdindex.fetchall())
price_df = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor_zdindex.description])
price_df.to_excel(writer,f"{category_sheet_name}价格链接数据")'''
print(f"{category_name}数据导出完毕!")
conn_zi_new.close()
#这是要读的excel文件
filepath = '平板电视类别分析0512_反馈.xlsx'
#这是修改后的正确类别,有几类写几类
category_list = ['投影机','显示器']
df_excel = pd.read_excel(filepath, sheet_name = 0, converters={'产品编码':str,'类别名称':str,'产品名称':str,'产品品牌':str,'产品类别':str,'类别编码':str,'是否重点类': str,'备注':str,'类别名称':str})
df_clean = df_excel[df_excel['备注'] == "true"]
df_clean_final = df_clean[df_clean['是否重点类'] == "true"]
sku_list = df_clean_final['产品编码'].tolist()
sku_tuple = tuple(sku_list)
#自动生成一个excel
writer = pd.ExcelWriter(f"重点错误类别参数提取.xlsx")
for category in category_list:
get_point_category_params_data(category,sku_tuple)
writer.save()
writer.close()
'''def get_all_parameter_sku(filepath,category_list):
df_excel = pd.read_excel(filepath, sheet_name = 0, converters={'产品编码':str,'类别名称':str,'产品名称':str,'产品品牌':str,'产品类别':str,'类别编码':str,'是否为重点类': str,'备注':str,'类别名称':str})
sku_list = df_excel['产品编码'].tolist()
category_name_list = df_excel['类别名称'].tolist()
import_category_list = df_excel['是否重点类'].tolist()
have_to_deal_list = df_excel['备注'].tolist()
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor = conn_zi_new.cursor()
index_row = 0
for i in sku_list:
if import_category_list[index_row] == "true" and have_to_deal_list[index_row] == "true":
#获取产品信息
cursor.execute(f"select a.*,h.name as father_brand_name,d.name as brand_name,g.name as attr_name,f.value from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join p_category c \
on b.categoryid = c.id \
left join p_brand d \
on b.brandid = d.id \
left join p_skuvaluemap e \
on a.id = e.skuid \
left join p_skuvalue f \
on e.valueid = f.id \
left join p_skusubtitle g \
on f.subtitleid = g.id \
left join p_brand h \
on d.pid = h.id \
where a.sku = {i}")
data_sku = (cursor.fetchall())
print(data_sku)
cursor.execute(f"select a.*,d.name as brand_name,g.name as attr_name,f.value from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join p_category c \
on b.categoryid = c.id \
left join p_brand d \
on b.brandid = d.id \
left join p_valuemap e \
on b.id = e.spuid \
left join p_value f \
on e.valueid = f.id \
left join p_subtitle g \
on f.subtitleid = g.id \
where a.sku = {i}")
data_spu = (cursor.fetchall())
print(data_spu)'''
\ No newline at end of file
......@@ -40,10 +40,14 @@ def all_modify_SKU_name(product_name,category_name,product_category,DATABASE):
category_name_list = df_clean[category_name].tolist()
product_category_list = df_clean[product_category].tolist()
import_category_list = df_clean["是否重点类"].tolist()
print(import_category_list)
#替换产品名称中错误的类别
for i in product_name_list:
#重点类,需要生成新sku名字
if import_category_list[index_row] == 1:
print(import_category_list[index_row])
print(type(import_category_list[index_row]))
if import_category_list[index_row] == 1.0:
print("重点类")
temp = i.replace(category_name_list[index_row], "")
temp_1 = temp.replace(product_category_list[index_row], "")
temp_final = temp_1+" "+category_name_list[index_row]
......@@ -51,9 +55,9 @@ def all_modify_SKU_name(product_name,category_name,product_category,DATABASE):
#非重点类,sku名字已经修改正确
else:
list_temp.append(i)
index_row += 1
print("Through the list")
#检查替换后的列表
SKU_name = list_temp
print(SKU_name)
return SKU_name
......@@ -234,13 +238,13 @@ product_name = '产品名称'
product_category = '产品类别'
category_name = '类别名称'
category_id = '类别编码'
database = 'ZI_NEW_TEST'
database = 'ZI_NEW'
test_database = 'ZI_NEW_TEST'
#在跑之前只需改第243行得excel文件路径和第237行得库名
#df_clean = clean_excel(database)
df_excel = pd.read_excel('显示器产品类别分析0510.xlsx', sheet_name = 0, converters={'产品编码':str})
df_excel = pd.read_excel('平板电视类别分析0512_反馈.xlsx', sheet_name = 0, converters={'产品编码':str,'类别名称':str,'产品名称':str,'产品品牌':str,'产品类别':str,'类别编码':str,'是否为重点类': int,'备注':int})
df_clean = df_excel[df_excel['备注'] == 1]
SKU_name = all_modify_SKU_name(product_name,category_name,product_category,database)
SPU_name = all_modify_SPU_name(product_brand,category_name)
......@@ -250,7 +254,7 @@ SPU = all_match_SPU_name(database,database,SPU_name,CATEGORY_id,BRAND_id)
SKU = get_sku(sku)
SPU_ID = get_spu_id(SPU, database)
#print(SKU)
update_sku_data(database,SKU,SKU_name,SPU_ID)
#update_sku_data(database,SKU,SKU_name,SPU_ID)
#delete_data(SPU_name, test_database)删库内数据用的,少用
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment