Commit bc17ee8d authored by rico.liu's avatar rico.liu

init

parent fa65f5c4
Pipeline #122 failed with stages
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 25 13:58:07 2019
@author: rico
"""
import pandas as pd
import pymssql
import os
from collections import Counter
import datetime
'''
test data load
'''
def get_test_data():
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return test_df
#df = get_test_data()
def get_params_df(df):
#get productid and batch and source
productid_list = df['productId'].unique().tolist()
productid_list_str = str(productid_list).replace('[','(').replace(']',')')
batch = df['batch'][0]
source = df['source'][0]
#search
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
cursor.execute(f"select * from Product_Parameter_Process where productId in {productid_list_str} and batch = '{batch}' and source = '{source}' and state = 2 and paramAttributeMatch = 1")
params_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
#comoleted brand info
params_df = pd.merge(params_df,df[['productId','brandName','brandCode']],on = 'productId')
cursor.close()
conn.close()
return params_df
def get_db_data(zgc_category_code):
try:
path = os.getcwd() #获取当前工作目录路径
file_list = [c for a,b,c in os.walk(path)][0]
date = datetime.datetime.now().strftime('%Y-%m-%d')
db_file_name = zgc_category_code +'_'+date+'_db_data.csv'
file_path = path+'/'+db_file_name
#删除过期文件
for file in [file for file in file_list if '_db_data.csv' in file]:
if date != file.split('_')[1]:
print('删除过期文件')
os.remove(path+'/'+file)
if db_file_name in file_list:
print('获取本地已存数据库数据')
db_attr_data = pd.read_csv(file_path,converters={'productcode':str,'SubCategoryCode':str,'BrandCode':str})
db_attr_data = db_attr_data.rename(columns={"productcode": "ProductCode","productname":"ProductName","subtitle":"SubTitle","state":"State","value":"Value"})
return db_attr_data
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
print('获取最新线上数据库数据')
cursor.execute(f"select ProductCode,CfgID,Value from Info_ProductDetail where LEFT(ProductCode,4) = {zgc_category_code}")
params = cursor.fetchall()
params= pd.DataFrame(params, columns=[tuple[0] for tuple in cursor.description])
CfgID_list = params['CfgID'].apply(lambda x:str(x)).unique().tolist()
CfgID_list_string = "','".join(CfgID_list)
cursor.execute(f"select SubTitleID,SubTitle from VW_Relation_Property where SubTitleID in ('{CfgID_list_string}')")
CfgID_name = cursor.fetchall()
CfgID_name = pd.DataFrame(CfgID_name, columns=[tuple[0] for tuple in cursor.description])
cursor.execute(f"select ProductCode,ProductName,State,RIGHT(BrandCode,5) as BrandCode from Info_Product where SubCategoryCode = {zgc_category_code}")
pro_info = cursor.fetchall()
pro_info = pd.DataFrame(pro_info, columns=[tuple[0] for tuple in cursor.description])
res = pd.merge(params,CfgID_name,left_on = 'CfgID',right_on = 'SubTitleID')
res = pd.merge(res,pro_info,on = 'ProductCode')
res.to_csv(file_path)
cursor.close()
conn.close()
return res
except OSError:
print('链接失败,重新链接')
return get_db_data(zgc_category_code)
def counter(arr):
#count list element frequency
return Counter(arr).most_common(len(list(set(arr))))
def match_with_db(params_df):
#return data
match_res = pd.DataFrame()
match_res['productId'] = params_df['productId'].unique().tolist()
match_res['productName'] = [list(params_df[params_df['productId'] == _id]['productName'])[0] for _id in list(match_res['productId'])]
match_res['source'] = [list(params_df[params_df['productId'] == _id]['source'])[0] for _id in list(match_res['productId'])]
match_res['batch'] = [list(params_df[params_df['productId'] == _id]['batch'])[0] for _id in list(match_res['productId'])]
match_code_list = []
match_name_list = []
#classify data
category_list = params_df['subcategorycode'].unique().tolist()
for category_code in category_list:
print('开始匹配'+category_code+'数据')
#category_code = '0101'
single_df = params_df[params_df['subcategorycode'] == category_code]
#get brand list
brand_list = single_df['brandCode'].unique().tolist()
#get productid
productid_list = single_df['productId'].unique().tolist()
#get db data
db_attr_data = get_db_data(category_code)
db_attr_data = db_attr_data[db_attr_data['BrandCode'].isin(brand_list)]
#match
for productid in productid_list:
#productid = '46262229631'
matched_productcode_list = []
matched_productname_list = []
match_flag = 1
df = single_df[single_df['productId'] == productid]
df_db = db_attr_data[db_attr_data['BrandCode'] == list(df['brandCode'])[0]]
combine_code_list = []
for param,value in zip(list(df['param']),list(df['value'])):
productcode_list = df_db[(df_db['SubTitle'] == param) & (df_db['Value'] == value)]['ProductCode'].unique().tolist()
if len(productcode_list) == 0:
match_flag = 0
break
combine_code_list += productcode_list
if match_flag == 1:
match_flag = 0
for tuple_ in counter(combine_code_list):
if tuple_[1] == len(df['param']):
match_flag = 1
matched_productcode_list.append(tuple_[0])
matched_productname_list.append(list(df_db[df_db['ProductCode'] == tuple_[0]]['ProductName'])[0])
match_code_list.append(matched_productcode_list)
match_name_list.append(matched_productname_list)
match_res['match_code'] = match_code_list
match_res['match_name'] = match_name_list
return match_res
def match(df):
#enter
params_df = get_params_df(df)
res = match_with_db(params_df)
return res
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 25 17:00:43 2019
@author: rico
"""
import match_program
import pandas as pd
import pymssql
'''
test data load
'''
def get_test_data():
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from Product_Api_Data where batch = '20191224' and source = 'JD' and state = 4")
test_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return test_df
test_df = get_test_data()
test_res = match_program.match(test_df)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment