Commit 4521e64e authored by Jialin's avatar Jialin

真·代码最终修改

parent cac7d04a
...@@ -6,13 +6,20 @@ import pandas as pd ...@@ -6,13 +6,20 @@ import pandas as pd
import re import re
import xlsxwriter import xlsxwriter
import numpy as np import numpy as np
import pymssql
def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0): def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
# filepath:文件路径,thre为两个品牌下型号重合率阈值,inner_thre为两个品牌下某条型号内关键词重合率阈值,a为权重调整,sheet_name为表单名 # filepath:文件路径,thre为两个品牌下型号重合率阈值,inner_thre为两个品牌下某条型号内关键词重合率阈值,a为权重调整,sheet_name为表单名
df = pd.read_excel(filepath, sheet_name=sheet_name, converters = {'产品编码':str}) df = pd.read_excel(filepath, sheet_name=sheet_name, converters = {'产品编码':str})
df_null = pd.read_excel(".\\异常数据表格.xlsx") conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
invalid_list = df_null['异常数据名称'].values database='ZI_NEW', autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
invalid_list_fetch = cursor_zi_new.fetchall()
invalid_list = []
for invalid_tuple in invalid_list_fetch:
invalid_list.append(invalid_tuple[0])
# 处理缺失值 # 处理缺失值
valid_index=[] valid_index=[]
for i in df.index: for i in df.index:
......
This diff is collapsed.
...@@ -10,16 +10,29 @@ import pandas as pd ...@@ -10,16 +10,29 @@ import pandas as pd
import re import re
import numpy as np import numpy as np
import xlsxwriter import xlsxwriter
import pymssql
def product_washing(filepath, category,thre=1, a=0):
df_null = pd.read_excel(".\\异常数据表格.xlsx") def product_washing(filepath, category, thre=1, a=0):
invalid_list = df_null['异常数据名称'].values conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_NEW', autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
invalid_list_fetch = cursor_zi_new.fetchall()
invalid_list = []
for invalid_tuple in invalid_list_fetch:
invalid_list.append(invalid_tuple[0])
df=pd.read_excel(filepath, converters={'产品编码':str}) df=pd.read_excel(filepath, converters={'产品编码':str})
df.drop(columns='Unnamed: 0', axis=1, inplace=True) df.drop(columns='Unnamed: 0', axis=1, inplace=True)
for col in df.columns: for col in df.columns:
df[col]=df[col].astype(str) df[col]=df[col].astype(str)
cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='{category}' and col_name='重复参数项'")
other_parameters_fetch=cursor_zi_new.fetchall()
other_parameters=[]
for param in other_parameters_fetch:
other_parameters.append(param[0])
related_product = [] related_product = []
brand_grouped = df.groupby(by='产品品牌') brand_grouped = df.groupby(by='产品品牌')
for brand in brand_grouped: for brand in brand_grouped:
...@@ -86,7 +99,7 @@ def product_washing(filepath, category,thre=1, a=0): ...@@ -86,7 +99,7 @@ def product_washing(filepath, category,thre=1, a=0):
# brand_combined = temp_list1+temp_list2 # brand_combined = temp_list1+temp_list2
tempo_dict[i] = [set(combined)] tempo_dict[i] = [set(combined)]
other_parameters=df_null['重复参数项'][df_null['类别']==category][df_null['重复参数项'].notnull()].values
other_parameters_values=[] other_parameters_values=[]
for parameter in other_parameters: for parameter in other_parameters:
other_parameters_values.append(brand[1].loc[i,parameter]) other_parameters_values.append(brand[1].loc[i,parameter])
......
...@@ -162,5 +162,5 @@ def get_point_category_params_data(category): ...@@ -162,5 +162,5 @@ def get_point_category_params_data(category):
conn_zi_new.close() conn_zi_new.close()
category = '扫描仪' category = '激光打印机'
get_point_category_params_data(category) get_point_category_params_data(category)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment