真·代码最终修改

4521e64e · Jialin · cac7d04a · 4521e64e · 4521e64e · 4521e64e
Commit 4521e64e authored Apr 14, 2021 by Jialin
8 changed files
--- a/公共代码/brand_filter.xlsx
+++ b/公共代码/brand_filter.xlsx
--- a/公共代码/class_filter.xlsx
+++ b/公共代码/class_filter.xlsx
--- a/公共代码/product_filter.xlsx
+++ b/公共代码/product_filter.xlsx
--- a/公共代码/产品品牌分析.py
+++ b/公共代码/产品品牌分析.py
@@ -6,13 +6,20 @@ import pandas as pd
 import re
 import xlsxwriter
 import numpy as np
+import pymssql


 def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
    # filepath:文件路径，thre为两个品牌下型号重合率阈值，inner_thre为两个品牌下某条型号内关键词重合率阈值，a为权重调整，sheet_name为表单名
    df = pd.read_excel(filepath, sheet_name=sheet_name, converters = {'产品编码':str})
-    df_null = pd.read_excel(".\\异常数据表格.xlsx")
-    invalid_list = df_null['异常数据名称'].values
+    conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
+                                  database='ZI_NEW', autocommit=True)
+    cursor_zi_new = conn_zi_new.cursor()
+    cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
+    invalid_list_fetch = cursor_zi_new.fetchall()
+    invalid_list = []
+    for invalid_tuple in invalid_list_fetch:
+        invalid_list.append(invalid_tuple[0])
    # 处理缺失值
    valid_index=[]
    for i in df.index:

--- a/公共代码/产品类别分析.py
+++ b/公共代码/产品类别分析.py
--- a/公共代码/产品重复型号分析.py
+++ b/公共代码/产品重复型号分析.py
@@ -10,16 +10,29 @@ import pandas as pd
 import re
 import numpy as np
 import xlsxwriter
-
-
-def product_washing(filepath, category,thre=1, a=0):
-    df_null = pd.read_excel(".\\异常数据表格.xlsx")
-    invalid_list = df_null['异常数据名称'].values
+import pymssql
+
+
+def product_washing(filepath, category, thre=1, a=0):
+    conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
+                                  database='ZI_NEW', autocommit=True)
+    cursor_zi_new = conn_zi_new.cursor()
+    cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
+    invalid_list_fetch = cursor_zi_new.fetchall()
+    invalid_list = []
+    for invalid_tuple in invalid_list_fetch:
+        invalid_list.append(invalid_tuple[0])
    df=pd.read_excel(filepath, converters={'产品编码':str})
    df.drop(columns='Unnamed: 0', axis=1, inplace=True)
    for col in df.columns:
        df[col]=df[col].astype(str)

+    cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='{category}' and col_name='重复参数项'")
+    other_parameters_fetch=cursor_zi_new.fetchall()
+    other_parameters=[]
+    for param in other_parameters_fetch:
+        other_parameters.append(param[0])
+
    related_product = []
    brand_grouped = df.groupby(by='产品品牌')
    for brand in brand_grouped:
@@ -86,7 +99,7 @@ def product_washing(filepath, category,thre=1, a=0):
            # brand_combined = temp_list1+temp_list2

            tempo_dict[i] = [set(combined)]
-            other_parameters=df_null['重复参数项'][df_null['类别']==category][df_null['重复参数项'].notnull()].values
+
            other_parameters_values=[]
            for parameter in other_parameters:
                other_parameters_values.append(brand[1].loc[i,parameter])

--- a/公共代码/导出库内参数数据.py
+++ b/公共代码/导出库内参数数据.py
@@ -162,5 +162,5 @@ def get_point_category_params_data(category):
    conn_zi_new.close()


-category = '扫描仪'
+category = '激光打印机'
 get_point_category_params_data(category)
\ No newline at end of file
--- a/公共代码/爬取数据分析.xlsx
+++ b/公共代码/爬取数据分析.xlsx