Commit c3b68a4d authored by rico.liu's avatar rico.liu

update

parent a87c2a18
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on Wed Dec 25 13:58:07 2019 Created on Wed Dec 25 13:58:07 2019
@author: rico @author: rico
""" """
import pandas as pd import pandas as pd
import pymssql import pymssql
import os import os
from collections import Counter from collections import Counter
import datetime import datetime
''' '''
test data load test data load
''' '''
def get_test_data(): def get_test_data():
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase') conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor() cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)") #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4") cursor.execute("select * from Product_Api_Data where batch = '2020-02-153' and state = 4")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description]) test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close() cursor.close()
conn.close() conn.close()
return test_df return test_df
#df = get_test_data() #df = get_test_data()
def get_params_df(df): def get_params_df(df):
#get productid and batch and source #get productid and batch and source
productid_list = df['productId'].unique().tolist() productid_list = df['productId'].unique().tolist()
productid_list_str = str(productid_list).replace('[','(').replace(']',')') productid_list_str = str(productid_list).replace('[','(').replace(']',')')
batch = df['batch'][0] batch = df['batch'][0]
#source = df['source'][0] #source = df['source'][0]
#search #search
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase') conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and state = 2 and paramAttributeMatch = 1") cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and state = 2 and paramAttributeMatch = 1")
params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description]) params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
#comoleted brand info #comoleted brand info
params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId') params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
cursor.close() cursor.close()
conn.close() conn.close()
return params_df return params_df
def get_db_data(zgc_category_code): def get_db_data(zgc_category_code):
try: try:
path = os.getcwd() #获取当前工作目录路径 path = os.getcwd() #获取当前工作目录路径
file_list = [c for a,b,c in os.walk(path)][0] file_list = [c for a,b,c in os.walk(path)][0]
date = datetime.datetime.now().strftime('%Y-%m-%d') date = datetime.datetime.now().strftime('%Y-%m-%d')
db_file_name = zgc_category_code +'_'+date+'_db_data.csv' db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
file_path = path+'/'+db_file_name file_path = path+'/'+db_file_name
#删除过期文件 #删除过期文件
for file in [file for file in file_list if '_db_data.csv' in file]: for file in [file for file in file_list if '_db_data.csv' in file]:
if date != file.split('_')[1]: if date != file.split('_')[1]:
print('删除过期文件') print('删除过期文件')
os.remove(path+'/'+file) os.remove(path+'/'+file)
if db_file_name in file_list: if db_file_name in file_list:
print('获取本地已存数据库数据') print('获取本地已存数据库数据')
db_attr_data = pd.read_csv(file_path,converters={'productcode':str,'SubCategoryCode':str,'BrandCode':str}) db_attr_data = pd.read_csv(file_path,converters={'ProductCode':str,'SubCategoryCode':str,'BrandCode':str})
db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"}) db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
return db_attr_data return db_attr_data
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase') conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor() cursor = conn.cursor()
print('获取最新线上数据库数据') print('获取最新线上数据库数据')
cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}") cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
params = cursor.fetchall() params = cursor.fetchall()
params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description]) params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist() CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
CfgID_list_string = "','".join(CfgID_list) CfgID_list_string = "','".join(CfgID_list)
cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')") cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
CfgID_name = cursor.fetchall() CfgID_name = cursor.fetchall()
CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description]) CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}") cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
pro_info = cursor.fetchall() pro_info = cursor.fetchall()
pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description]) pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID') res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
res = pd.merge(res,pro_info,on = 'ProductCode') res = pd.merge(res,pro_info,on = 'ProductCode')
res.to_csv(file_path) res.to_csv(file_path)
cursor.close() cursor.close()
conn.close() conn.close()
return res return res
except OSError: except OSError:
print('链接失败,重新链接') print('链接失败,重新链接')
return get_db_data(zgc_category_code) return get_db_data(zgc_category_code)
def counter(arr): def counter(arr):
#count list element frequency #count list element frequency
return Counter(arr).most_common(len(list(set(arr)))) return Counter(arr).most_common(len(list(set(arr))))
def match_with_db(params_df): def match_with_db(params_df):
#return data #return data
match_res = pd.DataFrame() match_res = pd.DataFrame()
match_res['productId'] = params_df['productId'].unique().tolist() match_res['productId'] = params_df['productId'].unique().tolist()
match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])] match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])] match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])] match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
match_res['subcategorycode'] = [list(params_df[params_df['productId'] == _id]['subcategorycode'])[0] for _id in list(match_res['productId'])]
match_code_list = [] match_res.sort_values("subcategorycode",inplace=True)
match_name_list = []
#classify data
category_list = params_df['subcategorycode'].unique().tolist() match_code_list = []
match_name_list = []
for category_code in category_list: #classify data
print('开始匹配'+category_code+'数据') category_list = match_res['subcategorycode'].unique().tolist()
#category_code = '0101' for category_code in category_list:
single_df = params_df[params_df['subcategorycode'] == category_code] print('开始匹配'+category_code+'数据')
#get brand list #category_code = '0101'
brand_list = single_df['brandCode'].unique().tolist() single_df = params_df[params_df['subcategorycode'] == category_code]
#get productid
productid_list = single_df['productId'].unique().tolist() #get brand list
brand_list = single_df['brandCode'].unique().tolist()
#get db data #get productid
db_attr_data = get_db_data(category_code) productid_list = match_res[match_res['subcategorycode']==category_code]['productId'].unique().tolist()
db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
#get db data
#match db_attr_data = get_db_data(category_code)
for productid in productid_list: db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
#productid = '46262229631'
matched_productcode_list = [] #match
matched_productname_list = [] for productid in productid_list:
match_flag = 1
df = single_df[single_df['productId'] == productid] #productid = '46262229631'
matched_productcode_list = []
df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]] matched_productname_list = []
match_flag = 1
combine_code_list = [] df = single_df[single_df['productId'] == productid]
for param,value in zip(list(df['param']),list(df['value'])):
df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
if len(productcode_list) == 0: combine_code_list = []
match_flag = 0 for param,value in zip(list(df['param']),list(df['value'])):
break
combine_code_list += productcode_list productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
if len(productcode_list) == 0:
if match_flag == 1: match_flag = 0
match_flag = 0 break
for tuple_ in counter(combine_code_list): combine_code_list += productcode_list
if tuple_[1] == len(df['param']):
match_flag = 1 if match_flag == 1:
matched_productcode_list.append(tuple_[0]) match_flag = 0
matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0]) for tuple_ in counter(combine_code_list):
if tuple_[1] == len(df['param']):
match_code_list.append(matched_productcode_list) match_flag = 1
match_name_list.append(matched_productname_list) matched_productcode_list.append(tuple_[0])
match_res['match_code'] = match_code_list matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
match_res['match_name'] = match_name_list
match_code_list.append(matched_productcode_list)
return match_res match_name_list.append(matched_productname_list)
match_res['match_code'] = match_code_list
def match(df): match_res['match_name'] = match_name_list
#enter del match_res['subcategorycode']
params_df = get_params_df(df)
res = match_with_db(params_df) return match_res
return res
def match(df):
#enter
params_df = get_params_df(df)
res = match_with_db(params_df)
return res
...@@ -10,13 +10,14 @@ import match_program ...@@ -10,13 +10,14 @@ import match_program
import pandas as pd import pandas as pd
import pymssql import pymssql
import pymysql
''' '''
test data load test data load
''' '''
def get_test_data(): def get_test_data():
'''
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase') conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor() cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)") #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
...@@ -25,6 +26,16 @@ def get_test_data(): ...@@ -25,6 +26,16 @@ def get_test_data():
cursor.close() cursor.close()
conn.close() conn.close()
'''
conn = pymysql.connect('59.110.219.171','root','qwertyuiop1','product_chongqin')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from product")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return test_df return test_df
test_df = get_test_data() test_df = get_test_data()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment