Commit 4521e64e authored by Jialin's avatar Jialin

真·代码最终修改

parent cac7d04a
......@@ -6,13 +6,20 @@ import pandas as pd
import re
import xlsxwriter
import numpy as np
import pymssql
def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
# filepath:文件路径,thre为两个品牌下型号重合率阈值,inner_thre为两个品牌下某条型号内关键词重合率阈值,a为权重调整,sheet_name为表单名
df = pd.read_excel(filepath, sheet_name=sheet_name, converters = {'产品编码':str})
df_null = pd.read_excel(".\\异常数据表格.xlsx")
invalid_list = df_null['异常数据名称'].values
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_NEW', autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
invalid_list_fetch = cursor_zi_new.fetchall()
invalid_list = []
for invalid_tuple in invalid_list_fetch:
invalid_list.append(invalid_tuple[0])
# 处理缺失值
valid_index=[]
for i in df.index:
......
This diff is collapsed.
......@@ -10,16 +10,29 @@ import pandas as pd
import re
import numpy as np
import xlsxwriter
def product_washing(filepath, category,thre=1, a=0):
df_null = pd.read_excel(".\\异常数据表格.xlsx")
invalid_list = df_null['异常数据名称'].values
import pymssql
def product_washing(filepath, category, thre=1, a=0):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_NEW', autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
invalid_list_fetch = cursor_zi_new.fetchall()
invalid_list = []
for invalid_tuple in invalid_list_fetch:
invalid_list.append(invalid_tuple[0])
df=pd.read_excel(filepath, converters={'产品编码':str})
df.drop(columns='Unnamed: 0', axis=1, inplace=True)
for col in df.columns:
df[col]=df[col].astype(str)
cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='{category}' and col_name='重复参数项'")
other_parameters_fetch=cursor_zi_new.fetchall()
other_parameters=[]
for param in other_parameters_fetch:
other_parameters.append(param[0])
related_product = []
brand_grouped = df.groupby(by='产品品牌')
for brand in brand_grouped:
......@@ -86,7 +99,7 @@ def product_washing(filepath, category,thre=1, a=0):
# brand_combined = temp_list1+temp_list2
tempo_dict[i] = [set(combined)]
other_parameters=df_null['重复参数项'][df_null['类别']==category][df_null['重复参数项'].notnull()].values
other_parameters_values=[]
for parameter in other_parameters:
other_parameters_values.append(brand[1].loc[i,parameter])
......
......@@ -162,5 +162,5 @@ def get_point_category_params_data(category):
conn_zi_new.close()
category = '扫描仪'
category = '激光打印机'
get_point_category_params_data(category)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment