update

c3b68a4d · rico.liu · a87c2a18 · c3b68a4d · c3b68a4d
Commit c3b68a4d authored Feb 26, 2020 by rico.liu
Hide whitespace changes
Inline Side-by-side

Showing with 201 additions and 184 deletions

match_program.py match_program.py +189 -183

test_run.py test_run.py +12 -1

No files found.
--- a/match_program.py
+++ b/match_program.py
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Dec 25 13:58:07 2019
-
-@author: rico
-"""
-
-import pandas as pd
-import pymssql
-import os
-from collections import Counter
-import datetime
-'''
-test data load
-'''
-
-def get_test_data():
-    
-    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
-    cursor = conn.cursor()
-    #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
-    cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4")
-    test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
-        
-    cursor.close()
-    conn.close()
-    return test_df
-
-#df = get_test_data()
-
-
-def get_params_df(df):
-    
-    #get productid and batch and source
-    productid_list = df['productId'].unique().tolist()
-    productid_list_str = str(productid_list).replace('[','(').replace(']',')')
-    batch = df['batch'][0]
-    #source = df['source'][0]
-    
-    #search
-    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
-    cursor = conn.cursor()
-    cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and state = 2 and paramAttributeMatch = 1")
-    params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
-    
-    #comoleted brand info
-    params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
-    
-    cursor.close()
-    conn.close()
-    
-    return params_df
-
-def get_db_data(zgc_category_code):
-    
-    try:
-        path = os.getcwd() #获取当前工作目录路径
-        file_list = [c for a,b,c in os.walk(path)][0]
-        
-        date = datetime.datetime.now().strftime('%Y-%m-%d')
-        db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
-        file_path = path+'/'+db_file_name
-        
-        #删除过期文件
-        for file in [file for file in file_list if '_db_data.csv' in file]:
-            if date != file.split('_')[1]:
-                print('删除过期文件')
-                os.remove(path+'/'+file)
-        
-        if db_file_name in file_list:
-            print('获取本地已存数据库数据')
-            db_attr_data = pd.read_csv(file_path,converters={'productcode':str,'SubCategoryCode':str,'BrandCode':str})
-            db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
-            return db_attr_data
-        
-        
-        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
-        cursor = conn.cursor()
-
-        print('获取最新线上数据库数据')
-        
-        cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
-        params = cursor.fetchall()
-        params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
-        
-        CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
-        CfgID_list_string =  "','".join(CfgID_list)
-        cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
-        CfgID_name = cursor.fetchall()
-        CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
-        
-        cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
-        pro_info = cursor.fetchall()
-        pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
-        
-        res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
-        res = pd.merge(res,pro_info,on = 'ProductCode')
-        
-        res.to_csv(file_path)
-        
-        cursor.close()
-        conn.close()
-        
-        return res
-    except OSError:
-        print('链接失败,重新链接')
-        return get_db_data(zgc_category_code)
-    
-    
-def counter(arr):
-    #count list element frequency
-    return Counter(arr).most_common(len(list(set(arr))))
-
-
-def match_with_db(params_df):
-    
-    #return data
-    match_res = pd.DataFrame()
-    match_res['productId'] = params_df['productId'].unique().tolist()
-    match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
-    match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
-    match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
-    
-    match_code_list = []
-    match_name_list = []
-    #classify data
-    category_list = params_df['subcategorycode'].unique().tolist()
-    
-    for category_code in category_list:
-        print('开始匹配'+category_code+'数据')
-        
-        #category_code = '0101'
-        single_df = params_df[params_df['subcategorycode'] == category_code]
-        
-        #get brand list
-        brand_list = single_df['brandCode'].unique().tolist()
-        #get productid
-        productid_list = single_df['productId'].unique().tolist()
-        
-        #get db data
-        db_attr_data = get_db_data(category_code)
-        db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
-        
-        #match
-        for productid in productid_list:
-            #productid = '46262229631'
-            matched_productcode_list = []
-            matched_productname_list = []
-            match_flag = 1
-            df = single_df[single_df['productId'] == productid]
-            
-            df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
-            
-            combine_code_list = []
-            for param,value in zip(list(df['param']),list(df['value'])):
-
-                productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
-                if len(productcode_list) == 0:
-                    match_flag = 0
-                    break
-                combine_code_list += productcode_list
-            
-            if match_flag == 1:
-                match_flag = 0
-                for tuple_ in counter(combine_code_list):
-                    if tuple_[1] == len(df['param']):
-                        match_flag = 1
-                        matched_productcode_list.append(tuple_[0])
-                        matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
-            
-            match_code_list.append(matched_productcode_list)
-            match_name_list.append(matched_productname_list)
-    match_res['match_code'] = match_code_list
-    match_res['match_name'] = match_name_list
-    
-    return match_res
-
-def match(df):
-    #enter
-    params_df = get_params_df(df)
-    res = match_with_db(params_df)
-    return res
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 25 13:58:07 2019
+
+@author: rico
+"""
+
+import pandas as pd
+import pymssql
+import os
+from collections import Counter
+import datetime
+'''
+test data load
+'''
+
+def get_test_data():
+    
+    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+    cursor = conn.cursor()
+    #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+    cursor.execute("select * from Product_Api_Data where batch = '2020-02-153' and state = 4")
+    test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        
+    cursor.close()
+    conn.close()
+    return test_df
+
+#df = get_test_data()
+
+
+def get_params_df(df):
+    
+    #get productid and batch and source
+    productid_list = df['productId'].unique().tolist()
+    productid_list_str = str(productid_list).replace('[','(').replace(']',')')
+    batch = df['batch'][0]
+    #source = df['source'][0]
+    
+    #search
+    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+    cursor = conn.cursor()
+    cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and state = 2 and paramAttributeMatch = 1")
+    params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+    
+    #comoleted brand info
+    params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
+    
+    cursor.close()
+    conn.close()
+    
+    return params_df
+
+def get_db_data(zgc_category_code):
+    
+    try:
+        path = os.getcwd() #获取当前工作目录路径
+        file_list = [c for a,b,c in os.walk(path)][0]
+        
+        date = datetime.datetime.now().strftime('%Y-%m-%d')
+        db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
+        file_path = path+'/'+db_file_name
+        
+        #删除过期文件
+        for file in [file for file in file_list if '_db_data.csv' in file]:
+            if date != file.split('_')[1]:
+                print('删除过期文件')
+                os.remove(path+'/'+file)
+        
+        if db_file_name in file_list:
+            print('获取本地已存数据库数据')
+            db_attr_data = pd.read_csv(file_path,converters={'ProductCode':str,'SubCategoryCode':str,'BrandCode':str})
+            db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
+            return db_attr_data
+        
+        
+        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+        cursor = conn.cursor()
+
+        print('获取最新线上数据库数据')
+        
+        cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
+        params = cursor.fetchall()
+        params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
+        
+        CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
+        CfgID_list_string =  "','".join(CfgID_list)
+        cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
+        CfgID_name = cursor.fetchall()
+        CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
+        
+        cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
+        pro_info = cursor.fetchall()
+        pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
+        
+        res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
+        res = pd.merge(res,pro_info,on = 'ProductCode')
+        
+        res.to_csv(file_path)
+        
+        cursor.close()
+        conn.close()
+        
+        return res
+    except OSError:
+        print('链接失败,重新链接')
+        return get_db_data(zgc_category_code)
+    
+    
+def counter(arr):
+    #count list element frequency
+    return Counter(arr).most_common(len(list(set(arr))))
+
+
+def match_with_db(params_df):
+    
+    #return data
+    match_res = pd.DataFrame()
+    match_res['productId'] = params_df['productId'].unique().tolist()
+    match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
+    match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
+    match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
+    match_res['subcategorycode'] = [list(params_df[params_df['productId'] == _id]['subcategorycode'])[0] for _id in list(match_res['productId'])]
+    match_res.sort_values("subcategorycode",inplace=True)
+    
+    
+    match_code_list = []
+    match_name_list = []
+    #classify data
+    category_list = match_res['subcategorycode'].unique().tolist()
+    
+    for category_code in category_list:
+        print('开始匹配'+category_code+'数据')
+        
+        #category_code = '0101'
+        single_df = params_df[params_df['subcategorycode'] == category_code]
+        
+        #get brand list
+        brand_list = single_df['brandCode'].unique().tolist()
+        #get productid
+        productid_list = match_res[match_res['subcategorycode']==category_code]['productId'].unique().tolist()
+        
+        #get db data
+        db_attr_data = get_db_data(category_code)
+        db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
+        
+        #match
+        for productid in productid_list:
+            
+            #productid = '46262229631'
+            matched_productcode_list = []
+            matched_productname_list = []
+            match_flag = 1
+            df = single_df[single_df['productId'] == productid]
+            
+            df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
+            
+            combine_code_list = []
+            for param,value in zip(list(df['param']),list(df['value'])):
+
+                productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
+                if len(productcode_list) == 0:
+                    match_flag = 0
+                    break
+                combine_code_list += productcode_list
+            
+            if match_flag == 1:
+                match_flag = 0
+                for tuple_ in counter(combine_code_list):
+                    if tuple_[1] == len(df['param']):
+                        match_flag = 1
+                        matched_productcode_list.append(tuple_[0])
+                        matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
+            
+            match_code_list.append(matched_productcode_list)
+            match_name_list.append(matched_productname_list)
+    match_res['match_code'] = match_code_list
+    match_res['match_name'] = match_name_list
+    del match_res['subcategorycode']
+    
+    return match_res
+
+def match(df):
+    #enter
+    params_df = get_params_df(df)
+    res = match_with_db(params_df)
+    return res
+
--- a/test_run.py
+++ b/test_run.py
@@ -10,13 +10,14 @@ import match_program

 import pandas as pd
 import pymssql
+import pymysql

 '''
 test data load
 '''

 def get_test_data():
-    
+    '''
    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
    cursor = conn.cursor()
    #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
@@ -25,6 +26,16 @@ def get_test_data():
        
    cursor.close()
    conn.close()
+    '''
+    conn = pymysql.connect('59.110.219.171','root','qwertyuiop1','product_chongqin')
+    cursor = conn.cursor()
+    #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+    cursor.execute("select * from product")
+    test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        
+    cursor.close()
+    conn.close()
+    
    return test_df

 test_df = get_test_data()