init

bc17ee8d · rico.liu · fa65f5c4 · bc17ee8d · bc17ee8d · bc17ee8d
Commit bc17ee8d authored Dec 25, 2019 by rico.liu
5 changed files
--- a/0101_2019-12-25_db_data.csv
+++ b/0101_2019-12-25_db_data.csv
--- a/0526_2019-12-25_db_data.csv
+++ b/0526_2019-12-25_db_data.csv
--- a/__pycache__/match_program.cpython-36.pyc
+++ b/__pycache__/match_program.cpython-36.pyc
--- a/match_program.py
+++ b/match_program.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 25 13:58:07 2019
+
+@author: rico
+"""
+
+import pandas as pd
+import pymssql
+import os
+from collections import Counter
+import datetime
+'''
+test data load
+'''
+
+def get_test_data():
+    
+    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+    cursor = conn.cursor()
+    #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+    cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4")
+    test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        
+    cursor.close()
+    conn.close()
+    return test_df
+
+#df = get_test_data()
+
+
+def get_params_df(df):
+    
+    #get productid and batch and source
+    productid_list = df['productId'].unique().tolist()
+    productid_list_str = str(productid_list).replace('[','(').replace(']',')')
+    batch = df['batch'][0]
+    source = df['source'][0]
+    
+    #search
+    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+    cursor = conn.cursor()
+    cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and source = '{source}' and state = 2 and paramAttributeMatch = 1")
+    params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+    
+    #comoleted brand info
+    params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
+    
+    cursor.close()
+    conn.close()
+    
+    return params_df
+
+def get_db_data(zgc_category_code):
+    
+    try:
+        path = os.getcwd() #获取当前工作目录路径
+        file_list = [c for a,b,c in os.walk(path)][0]
+        
+        date = datetime.datetime.now().strftime('%Y-%m-%d')
+        db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
+        file_path = path+'/'+db_file_name
+        
+        #删除过期文件
+        for file in [file for file in file_list if '_db_data.csv' in file]:
+            if date != file.split('_')[1]:
+                print('删除过期文件')
+                os.remove(path+'/'+file)
+        
+        if db_file_name in file_list:
+            print('获取本地已存数据库数据')
+            db_attr_data = pd.read_csv(file_path,converters={'productcode':str,'SubCategoryCode':str,'BrandCode':str})
+            db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
+            return db_attr_data
+        
+        
+        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+        cursor = conn.cursor()
+
+        print('获取最新线上数据库数据')
+        
+        cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
+        params = cursor.fetchall()
+        params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
+        
+        CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
+        CfgID_list_string =  "','".join(CfgID_list)
+        cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
+        CfgID_name = cursor.fetchall()
+        CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
+        
+        cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
+        pro_info = cursor.fetchall()
+        pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
+        
+        res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
+        res = pd.merge(res,pro_info,on = 'ProductCode')
+        
+        res.to_csv(file_path)
+        
+        cursor.close()
+        conn.close()
+        
+        return res
+    except OSError:
+        print('链接失败,重新链接')
+        return get_db_data(zgc_category_code)
+    
+    
+def counter(arr):
+    #count list element frequency
+    return Counter(arr).most_common(len(list(set(arr))))
+
+
+def match_with_db(params_df):
+    
+    #return data
+    match_res = pd.DataFrame()
+    match_res['productId'] = params_df['productId'].unique().tolist()
+    match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
+    match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
+    match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
+    
+    match_code_list = []
+    match_name_list = []
+    #classify data
+    category_list = params_df['subcategorycode'].unique().tolist()
+    
+    for category_code in category_list:
+        print('开始匹配'+category_code+'数据')
+        
+        #category_code = '0101'
+        single_df = params_df[params_df['subcategorycode'] == category_code]
+        
+        #get brand list
+        brand_list = single_df['brandCode'].unique().tolist()
+        #get productid
+        productid_list = single_df['productId'].unique().tolist()
+        
+        #get db data
+        db_attr_data = get_db_data(category_code)
+        db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
+        
+        #match
+        for productid in productid_list:
+            #productid = '46262229631'
+            matched_productcode_list = []
+            matched_productname_list = []
+            match_flag = 1
+            df = single_df[single_df['productId'] == productid]
+            
+            df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
+            
+            combine_code_list = []
+            for param,value in zip(list(df['param']),list(df['value'])):
+
+                productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
+                if len(productcode_list) == 0:
+                    match_flag = 0
+                    break
+                combine_code_list += productcode_list
+            
+            if match_flag == 1:
+                match_flag = 0
+                for tuple_ in counter(combine_code_list):
+                    if tuple_[1] == len(df['param']):
+                        match_flag = 1
+                        matched_productcode_list.append(tuple_[0])
+                        matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
+            
+            match_code_list.append(matched_productcode_list)
+            match_name_list.append(matched_productname_list)
+    match_res['match_code'] = match_code_list
+    match_res['match_name'] = match_name_list
+    
+    return match_res
+
+def match(df):
+    #enter
+    params_df = get_params_df(df)
+    res = match_with_db(params_df)
+    return res
\ No newline at end of file
--- a/test_run.py
+++ b/test_run.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 25 17:00:43 2019
+
+@author: rico
+"""
+
+import match_program
+
+import pandas as pd
+import pymssql
+
+'''
+test data load
+'''
+
+def get_test_data():
+    
+    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+    cursor = conn.cursor()
+    #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+    cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4")
+    test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        
+    cursor.close()
+    conn.close()
+    return test_df
+
+test_df = get_test_data()
+
+test_res = match_program.match(test_df)
\ No newline at end of file