代码更新

62dacfbd · Jialin · e9bce7fd · 62dacfbd
Commit 62dacfbd authored May 08, 2021 by Jialin
Hide whitespace changes
Inline Side-by-side

Showing with 132 additions and 1 deletion

产品品牌分析.py 公共代码/产品品牌分析.py +132 -1

No files found.
--- a/公共代码/产品品牌分析.py
+++ b/公共代码/产品品牌分析.py
@@ -8,10 +8,141 @@ import xlsxwriter
 import numpy as np
 import pymssql

+def brand_washing_special(filepath, sheet_name, category):
+    df = pd.read_excel(filepath, sheet_name=sheet_name, converters={'产品编码': str})
+    conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
+                                  database='ZI_NEW', autocommit=True)
+    cursor_zi_new = conn_zi_new.cursor()
+    cursor_zi_new.execute(f"select col_value from data_washing_external where category_name='通用' and col_name='异常数据名称'")
+    invalid_list_fetch = cursor_zi_new.fetchall()
+    invalid_list = []
+    for invalid_tuple in invalid_list_fetch:
+        invalid_list.append(invalid_tuple[0])
+    # 处理缺失值
+    valid_index = []
+    if category == '复印纸':
+        for i in df.index:
+            if df.loc[i, '产品品牌'] not in invalid_list and df.loc[i, '产品系列（SPU）'] not in invalid_list:
+                valid_index.append(i)
+        result = df.loc[valid_index, ['产品品牌', '产品系列（SPU）']]
+        # 将df数据格式转为字符串
+        result = result.apply(lambda x: x.astype(str))
+        # def xilie_clean(x):
+        #     x = x.strip().replace('系列','').upper()
+        #     ch = re.findall(r'([\u4e00-\u9fa5]+)', x)
+        #     en = re.findall(r'[0-9a-zA-Z]+', x)
+        #     if ch and en:
+        #         return
+        result['产品系列（SPU）'] = result['产品系列（SPU）'].apply(lambda x: x.strip().replace('系列', '').upper())
+        result_groupby = dict(list(result.groupby('产品品牌')))
+        brand_dict = {}
+        for brand in result_groupby.keys():
+            xilie_list = result_groupby[brand]['产品系列（SPU）'].unique().tolist()
+            try:
+                xilie_list.remove('彩色复印纸')
+            except:
+                pass
+            brand_dict[brand] = xilie_list
+    else:
+        for i in df.index:
+            if df.loc[i, '产品品牌'] not in invalid_list and df.loc[i, '*产品系列'] not in invalid_list:
+                valid_index.append(i)
+        result = df.loc[valid_index,['产品品牌','*产品系列']]
+        # 将df数据格式转为字符串
+        result = result.apply(lambda x: x.astype(str))
+        # def xilie_clean(x):
+        #     x = x.strip().replace('系列','').upper()
+        #     ch = re.findall(r'([\u4e00-\u9fa5]+)', x)
+        #     en = re.findall(r'[0-9a-zA-Z]+', x)
+        #     if ch and en:
+        #         return
+        result['*产品系列'] = result['*产品系列'].apply(lambda x: x.strip().replace('系列','').upper())
+        result_groupby = dict(list(result.groupby('产品品牌')))
+        brand_dict={}
+        for brand in result_groupby.keys():
+            xilie_list = result_groupby[brand]['*产品系列'].unique().tolist()
+            brand_dict[brand] = xilie_list
+
+    related_brand=[]
+    tested_brand = []
+    repeated_xilie = []
+
+    for brand_i in brand_dict.keys():
+        for brand_j in brand_dict.keys():
+            if brand_i == brand_j or {brand_i, brand_j} in tested_brand:
+                continue
+            temp_repeated_xilie=[]
+            for xilie in brand_dict[brand_i]:
+                if xilie in brand_dict[brand_j]:
+                    temp_repeated_xilie.append(xilie)
+            if temp_repeated_xilie:
+                repeated_xilie.append(temp_repeated_xilie)
+                related_brand.append([brand_i, brand_j])
+            tested_brand.append({brand_i, brand_j})
+    #输出到excel里
+    workbook = xlsxwriter.Workbook('./brand_filter.xlsx')
+    bold_format = workbook.add_format({'bold': True})
+
+    worksheet = workbook.add_worksheet(name='Sheet1')
+    worksheet.write('A1', '品牌A', bold_format)
+    worksheet.write('B1', '品牌B', bold_format)
+    worksheet.write('C1', '正确品牌', bold_format)
+    col = 0
+    row = 1
+    for brand_list in related_brand:
+        for brand in brand_list:
+            worksheet.write_string(row, col, brand)
+            col += 1
+        row += 1
+        col = 0
+
+    worksheet2 = workbook.add_worksheet(name='Sheet2')
+    worksheet2.write('A1', '品牌A', bold_format)
+    worksheet2.write('B1', '品牌B', bold_format)
+    worksheet2.write('C1', '重复系列', bold_format)
+    row = 1
+    col = 0
+    for i in range(len(related_brand)):
+        worksheet2.write_string(row, col, related_brand[i][0])
+        col += 1
+        worksheet2.write_string(row, col, related_brand[i][1])
+        col += 1
+        worksheet2.write_string(row, col, f"{repeated_xilie[i]}")
+        row += 1
+        col = 0
+
+    worksheet3 = workbook.add_worksheet(name='Sheet3')
+    row = 0
+    col = 0
+    for key in brand_dict.keys():
+        worksheet3.write_string(row, col, key)
+        col += 1
+        worksheet3.write_string(row, col, f"{brand_dict[key]}")
+        col = 0
+        row += 1
+
+    workbook.close()
+
+
+
+
+
+
+
+
+
+
+
+

 def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
    # filepath:文件路径，thre为两个品牌下型号重合率阈值，inner_thre为两个品牌下某条型号内关键词重合率阈值，a为权重调整，sheet_name为表单名
    df = pd.read_excel(filepath, sheet_name=sheet_name, converters = {'产品编码':str})
+    category = df.loc[1,'产品类别']
+    wrong_category_list = ['台式机', '笔记本', '一体电脑', '复印纸']
+    if category in wrong_category_list:
+        brand_washing_special(filepath, sheet_name, category)
+        return
    conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
                                  database='ZI_NEW', autocommit=True)
    cursor_zi_new = conn_zi_new.cursor()
@@ -277,6 +408,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
    workbook.close()

 if __name__ == '__main__':
-    filepath = 'E:\\ZDZC\\激光打印机参数确认.xlsx'
+    filepath = 'E:\\ZDZC\\Sourcetree_local\\公共代码\\复印纸参数确认.xlsx'
    brand_washing(filepath)