Commit c3b68a4d authored by rico.liu's avatar rico.liu

update

parent a87c2a18
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 25 13:58:07 2019
@author: rico
"""
import pandas as pd
import pymssql
import os
from collections import Counter
import datetime
'''
test data load
'''
def get_test_data():
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return test_df
#df = get_test_data()
def get_params_df(df):
#get productid and batch and source
productid_list = df['productId'].unique().tolist()
productid_list_str = str(productid_list).replace('[','(').replace(']',')')
batch = df['batch'][0]
#source = df['source'][0]
#search
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and state = 2 and paramAttributeMatch = 1")
params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
#comoleted brand info
params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
cursor.close()
conn.close()
return params_df
def get_db_data(zgc_category_code):
try:
path = os.getcwd() #获取当前工作目录路径
file_list = [c for a,b,c in os.walk(path)][0]
date = datetime.datetime.now().strftime('%Y-%m-%d')
db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
file_path = path+'/'+db_file_name
#删除过期文件
for file in [file for file in file_list if '_db_data.csv' in file]:
if date != file.split('_')[1]:
print('删除过期文件')
os.remove(path+'/'+file)
if db_file_name in file_list:
print('获取本地已存数据库数据')
db_attr_data = pd.read_csv(file_path,converters={'productcode':str,'SubCategoryCode':str,'BrandCode':str})
db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
return db_attr_data
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
print('获取最新线上数据库数据')
cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
params = cursor.fetchall()
params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
CfgID_list_string = "','".join(CfgID_list)
cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
CfgID_name = cursor.fetchall()
CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
pro_info = cursor.fetchall()
pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
res = pd.merge(res,pro_info,on = 'ProductCode')
res.to_csv(file_path)
cursor.close()
conn.close()
return res
except OSError:
print('链接失败,重新链接')
return get_db_data(zgc_category_code)
def counter(arr):
#count list element frequency
return Counter(arr).most_common(len(list(set(arr))))
def match_with_db(params_df):
#return data
match_res = pd.DataFrame()
match_res['productId'] = params_df['productId'].unique().tolist()
match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
match_code_list = []
match_name_list = []
#classify data
category_list = params_df['subcategorycode'].unique().tolist()
for category_code in category_list:
print('开始匹配'+category_code+'数据')
#category_code = '0101'
single_df = params_df[params_df['subcategorycode'] == category_code]
#get brand list
brand_list = single_df['brandCode'].unique().tolist()
#get productid
productid_list = single_df['productId'].unique().tolist()
#get db data
db_attr_data = get_db_data(category_code)
db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
#match
for productid in productid_list:
#productid = '46262229631'
matched_productcode_list = []
matched_productname_list = []
match_flag = 1
df = single_df[single_df['productId'] == productid]
df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
combine_code_list = []
for param,value in zip(list(df['param']),list(df['value'])):
productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
if len(productcode_list) == 0:
match_flag = 0
break
combine_code_list += productcode_list
if match_flag == 1:
match_flag = 0
for tuple_ in counter(combine_code_list):
if tuple_[1] == len(df['param']):
match_flag = 1
matched_productcode_list.append(tuple_[0])
matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
match_code_list.append(matched_productcode_list)
match_name_list.append(matched_productname_list)
match_res['match_code'] = match_code_list
match_res['match_name'] = match_name_list
return match_res
def match(df):
#enter
params_df = get_params_df(df)
res = match_with_db(params_df)
return res
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 25 13:58:07 2019
@author: rico
"""
import pandas as pd
import pymssql
import os
from collections import Counter
import datetime
'''
test data load
'''
def get_test_data():
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from Product_Api_Data where batch = '2020-02-153' and state = 4")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return test_df
#df = get_test_data()
def get_params_df(df):
#get productid and batch and source
productid_list = df['productId'].unique().tolist()
productid_list_str = str(productid_list).replace('[','(').replace(']',')')
batch = df['batch'][0]
#source = df['source'][0]
#search
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and state = 2 and paramAttributeMatch = 1")
params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
#comoleted brand info
params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
cursor.close()
conn.close()
return params_df
def get_db_data(zgc_category_code):
try:
path = os.getcwd() #获取当前工作目录路径
file_list = [c for a,b,c in os.walk(path)][0]
date = datetime.datetime.now().strftime('%Y-%m-%d')
db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
file_path = path+'/'+db_file_name
#删除过期文件
for file in [file for file in file_list if '_db_data.csv' in file]:
if date != file.split('_')[1]:
print('删除过期文件')
os.remove(path+'/'+file)
if db_file_name in file_list:
print('获取本地已存数据库数据')
db_attr_data = pd.read_csv(file_path,converters={'ProductCode':str,'SubCategoryCode':str,'BrandCode':str})
db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
return db_attr_data
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
print('获取最新线上数据库数据')
cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
params = cursor.fetchall()
params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
CfgID_list_string = "','".join(CfgID_list)
cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
CfgID_name = cursor.fetchall()
CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
pro_info = cursor.fetchall()
pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
res = pd.merge(res,pro_info,on = 'ProductCode')
res.to_csv(file_path)
cursor.close()
conn.close()
return res
except OSError:
print('链接失败,重新链接')
return get_db_data(zgc_category_code)
def counter(arr):
#count list element frequency
return Counter(arr).most_common(len(list(set(arr))))
def match_with_db(params_df):
#return data
match_res = pd.DataFrame()
match_res['productId'] = params_df['productId'].unique().tolist()
match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
match_res['subcategorycode'] = [list(params_df[params_df['productId'] == _id]['subcategorycode'])[0] for _id in list(match_res['productId'])]
match_res.sort_values("subcategorycode",inplace=True)
match_code_list = []
match_name_list = []
#classify data
category_list = match_res['subcategorycode'].unique().tolist()
for category_code in category_list:
print('开始匹配'+category_code+'数据')
#category_code = '0101'
single_df = params_df[params_df['subcategorycode'] == category_code]
#get brand list
brand_list = single_df['brandCode'].unique().tolist()
#get productid
productid_list = match_res[match_res['subcategorycode']==category_code]['productId'].unique().tolist()
#get db data
db_attr_data = get_db_data(category_code)
db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
#match
for productid in productid_list:
#productid = '46262229631'
matched_productcode_list = []
matched_productname_list = []
match_flag = 1
df = single_df[single_df['productId'] == productid]
df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
combine_code_list = []
for param,value in zip(list(df['param']),list(df['value'])):
productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
if len(productcode_list) == 0:
match_flag = 0
break
combine_code_list += productcode_list
if match_flag == 1:
match_flag = 0
for tuple_ in counter(combine_code_list):
if tuple_[1] == len(df['param']):
match_flag = 1
matched_productcode_list.append(tuple_[0])
matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
match_code_list.append(matched_productcode_list)
match_name_list.append(matched_productname_list)
match_res['match_code'] = match_code_list
match_res['match_name'] = match_name_list
del match_res['subcategorycode']
return match_res
def match(df):
#enter
params_df = get_params_df(df)
res = match_with_db(params_df)
return res
......@@ -10,13 +10,14 @@ import match_program
import pandas as pd
import pymssql
import pymysql
'''
test data load
'''
def get_test_data():
'''
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
......@@ -25,6 +26,16 @@ def get_test_data():
cursor.close()
conn.close()
'''
conn = pymysql.connect('59.110.219.171','root','qwertyuiop1','product_chongqin')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from product")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return test_df
test_df = get_test_data()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment