Commit 4196b164 authored by rico.liu's avatar rico.liu

init

parent 14c41495
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 4 13:19:46 2019
@author: rico
"""
import time
def timer(func):
def wrapper(*args,**kwds):
t0 = time.time()
res = func(*args,**kwds)
t1 = time.time()
print(func.__name__+'耗时%0.3f秒'%(t1-t0))
return res
return wrapper
# -*- coding: utf-8 -*-
import pymssql
import pandas as pd
import re
from lxml import etree
'''
class product():
def __init__(self, product_name, product_SKU, product_class_num = 'na'):
self.product_name = str(product_name)
self.product_SKU = str(product_SKU)
self.product_class = str(product_class_num)
def get_parameter(self, **kwargs):
'''
class sql_find():
def __init__ (self, database='ZI_DataBase', localhost=True):
if localhost:
self.conn = pymssql.connect(host='localhost', user='zgc',password='1234',database=database,autocommit=True)
else:
self.conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database=database,autocommit=True)
self.cursor = self.conn.cursor()
'''
def execute(self, sql_sentence):
self.cursor.execute(sql_sentence)
return self.cursor
'''
def BN(brand):
brand = str(brand)
try:
country = brand.split('[')[1].split(']')[-2]
brand = brand.replace(country,'')
except IndexError:
pass
res = re.findall(r'[\u4E00-\u9FA5]', brand)
new_res = ''.join(res)
#print(len(new_res))
if len(new_res) == 0:
res1 = re.findall(r'[a-zA-Z0-9]', brand)
new_res = ''.join(res1)
new_res = new_res.upper()
return new_res
class Index(object):
def __init__(self, number=50, decimal=2):
"""
:param decimal: 你保留的保留小数位
:param number: # 号的 个数
"""
self.decimal = decimal
self.number = number
self.a = 100/number # 在百分比 为几时增加一个 # 号
def __call__(self, now, total):
# 1. 获取当前的百分比数
percentage = self.percentage_number(now, total)
# 2. 根据 现在百分比计算
well_num = int(percentage / self.a)
# print("well_num: ", well_num, percentage)
# 3. 打印字符进度条
progress_bar_num = self.progress_bar(well_num)
# 4. 完成的进度条
result = "\r%s %s" % (progress_bar_num, percentage)
return result
def percentage_number(self, now, total):
"""
计算百分比
:param now: 现在的数
:param total: 总数
:return: 百分
"""
return round(now / total * 100, self.decimal)
def progress_bar(self, num):
"""
显示进度条位置
:param num: 拼接的 “#” 号的
:return: 返回的结果当前的进度条
"""
# 1. "#" 号个数
well_num = "#" * num
# 2. 空格的个数
space_num = " " * (self.number - num)
return '[%s%s]' % (well_num, space_num)
class tool():
def __init__(self):
self.peijian_table = pd.read_excel('是否需要配件.xlsx')
self.brand_table = pd.read_excel('品牌对应表.xlsx')
def judge_brand(self, brand, brandcode_original):
if brandcode_original == '没有对应指数品牌':
BRANDID = '没有对应指数品牌'
for ID,Chinese_brand,English_brand in zip(self.brand_table['ID'], self.brand_table['中文品牌'], self.brand_table['英文品牌']):
if brand == Chinese_brand:
BRANDID = str(ID).zfill(5)
return BRANDID
elif BN(brand) == English_brand:
BRANDID = str(ID).zfill(5)
return BRANDID
else:
BRANDID = str(brandcode_original).zfill(5)[-5:]
return BRANDID
def judge_peijian(self, data_table):
ispeijian_lyst = []
isunique_lyst = []
for class_code in data_table['指数子类编码']:
mark = '0'
mark2 = '0'
class_code = str(class_code).zfill(4)
if class_code != '没有匹配的指数子类编码':
for categorycode, ispeijian, isunique in zip(self.peijian_table['categorycode'], self.peijian_table['ispeijian'], self.peijian_table['isunique']):
if class_code == str(categorycode).zfill(4):
if str(ispeijian) != '0':
mark = '1'
if str(isunique) != '0':
mark2 = '1'
break
ispeijian_lyst.append(mark)
isunique_lyst.append(mark2)
else:
ispeijian_lyst.append(mark)
isunique_lyst.append(mark2)
#print(len(ispeijian_lyst), len(data_table['指数子类编码']))
data_table['有无配件'] = ispeijian_lyst
data_table['型号_only'] = isunique_lyst
return data_table
def judge_unit(string):
unit_list = {'MM','CM', 'DM', 'ML', 'W', 'KW'}
if not string[0].isdigit():
return True
m = 0
for char in string:
if char.isdigit() or char == '.':
m += 1
continue
elif char.isalpha():
if string[m:].upper() in unit_list:
return False
else:
return True
return True
def type_extract_JD(name, params, brand):
#params = eval(params)
try:
brand_remove = re.findall(r"[A-Za-z0-9]+", brand)[0].upper()
except IndexError:
brand_remove = '没有英文品牌!'
param_xinghao = 'NA'
if '型号' in params:
param_xinghao = params['型号']
elif r'\t型号\t' in params:
param_xinghao = params[r'\t型号\t']
name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-+/.*]+", name)))
for i in range(len(name_xinghao_lyst)):
name_xinghao_lyst[i] = name_xinghao_lyst[i].upper()
try:
name_xinghao_lyst.remove(brand_remove)
except ValueError:
pass
if len(name_xinghao_lyst) == 0:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
if param_xinghao in name_xinghao_lyst:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
xinghao_data = max(name_xinghao_lyst, key=len)
for xinghao in name_xinghao_lyst:
if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
xinghao_data = xinghao
break
if not judge_unit(xinghao_data):
xinghao_data == 'NA'
#type_lyst.append(xinghao_data.upper())
return xinghao_data
def type_extract(name, params):
#params = eval(params)
param_xinghao = 'NA'
if '型号' in params:
param_xinghao = params['型号']
elif r'\t型号\t' in params:
param_xinghao = params[r'\t型号\t']
name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-+/.*]+", name)))
if len(name_xinghao_lyst) == 0:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
if param_xinghao in name_xinghao_lyst:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
xinghao_data = max(name_xinghao_lyst, key=len)
for xinghao in name_xinghao_lyst:
if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
xinghao_data = xinghao
break
if not judge_unit(xinghao_data):
xinghao_data == 'NA'
#type_lyst.append(xinghao_data.upper())
return xinghao_data.upper()
def param_load(product_id, xml_string):
"""
传入sku,和xml原始代码
:param product_id:sku
:param xml_string:xml数据
:return:csv
"""
xml_str = etree.HTML(xml_string)
#title = xml_str.xpath("//th[@class='tdTitle']")
secend = xml_str.xpath("//td[@class='tdTitle']")
zhi = xml_str.xpath("//tr//td[position()>1]")
data_dict = {}
for j, k in zip(secend, zhi):
#item = i.xpath("./text()")[0]
sec = j.xpath("./text()")[0]
value = k.xpath("./text()")[0]
data_dict[sec] = value
return data_dict
if __name__ == '__main__':
sqlserver = sql_find('ZI_BAK', True)
sqlserver.cursor.execute("select * from ZI_Price_Quote where productcode = '0506003750007'")
print(sqlserver.cursor.fetchall())
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 24 09:27:37 2019
@author: rico
"""
#方案一:提前计算
#方案二:实时计算
#输入:产品名称输入、差异还原标识(未匹配到具体SKU,但匹配到SPU)
import pymssql
from func_timer import timer
import pandas as pd
import dask.dataframe as dd
import datetime
from function import Index
import math
from sqlalchemy import create_engine
import warnings
warnings.filterwarnings("ignore")
def Descartes(*args,repeat=1):
pools = [tuple(pool) for pool in args] * repeat
result = [[]]
for pool in pools:
result = [x + [y] for x in result for y in pool]
for prod in result:
yield tuple(prod)
@timer
def get_attr_data(SubCategoryCode):
try:
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
cursor = conn.cursor()
sql_attr = '''select c.SubCategoryCode,a.productcode,c.productname,d.Name,c.BrandCode,b.subtitle,a.value,c.state from
(select productcode,productname,BrandCode,state,SubCategoryCode from info_product where SubCategoryCode = '''+SubCategoryCode+''') c
join info_productdetail a
on a.productcode=c.productcode
join vw_relation_property b
on a.cfgid=b.subtitleid and c.SubCategoryCode=b.subcategorycode
join ZI_Brand d
on c.BrandCode=d.BrandCode
'''
cursor.execute(sql_attr)
data_source = [v for v in cursor.fetchall()]
attr_data = pd.DataFrame(data_source,columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return attr_data
except:
print('连接失败,重新连接')
return get_attr_data(SubCategoryCode)
#生成SPU配型
@timer
def create_SPU_configure(attr_df):
res = []
cpu_series = ['i3','i5','i7']
for brand in attr_df['BrandCode'].unique().tolist():
product_series = attr_df[(attr_df['BrandCode'] == brand ) & (attr_df['subtitle'] == '产品系列')]['value'].unique().tolist()
res.append(list(Descartes([brand],product_series,cpu_series)))
return res
#获取SPU对应的SKU
@timer
def get_product_code(attr_df,SPU_conf):
#attr_df = dd.from_pandas(attr_df,npartitions = 8)
index = Index()
res_spu = []
res_code = []
m = 1
for v in SPU_conf:
print(index(m, len(SPU_conf)-1), end='%')
m +=1
for v_ in v:
productcode = attr_df[(attr_df['BrandCode'] == v_[0]) & (attr_df['value'] == v_[1]) & ((attr_df['productname'].str.contains(v_[2].upper())) | (attr_df[attr_df['subtitle'] == 'CPU型号']['value'].str.contains(v_[2].upper())))]['productcode'].unique().tolist()
#if len(productcode) == 0:
# continue
res_spu.append(v_)
res_code.append(productcode)
return res_spu,res_code
@timer
def get_price_data(channel):
try:
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select * from "+channel+" where periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
price_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return price_df
except:
print('连接失败,重新连接')
return get_price_data(channel)
@timer
def get_zc_price_weight():
try:
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
cursor = conn.cursor()
cursor.execute("select price_id,price_name,weight from zd_price_weight")
price_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return price_df
except:
print('连接失败,重新连接')
return get_zc_price_weight()
@timer
def calculate_configure_price(productCode,categoryCode):
#productCode = '0101003612569'
attr_df = pd.read_csv('0101.csv', converters={'productcode':str,'BrandCode':str})
product_attr = attr_df[attr_df['productcode'] == productCode][['subtitle','value']]
return 0
@timer
def calculate_zc_with_df(zc_filter_price_df,zc_price_weight):
dfp = zc_filter_price_df.fillna(0)
price_dict = {'purchase_price':'PLCJJ',
'agreement_price':'XYJ',
'bid_price':'JJ',
'contract_price':'HTJ',
'electronic_mall_price':'DZSCJ'}
for col_name in ['purchase_price','agreement_price','bid_price','contract_price','electronic_mall_price','weight']:
dfp[col_name] = pd.to_numeric(dfp[col_name])
sum_weight_list = []
for index,row in dfp.iterrows():
sum_weight = 0
for col_name in ['purchase_price','agreement_price','bid_price','contract_price','electronic_mall_price']:
weight = int(zc_price_weight[zc_price_weight['price_id'] == price_dict[col_name]]['weight'].values[0])
if row[col_name] == 0:
sum_weight += 0
else:
#minize configure price
configure_price = calculate_configure_price(row['ProductCode'],'0101')
dfp[col_name][index] = (row[col_name]-configure_price)*weight
sum_weight += weight
sum_weight_list.append(sum_weight)
dfp['sum_weight'] = sum_weight_list
dfp['combine_price'] = (dfp['purchase_price'] + dfp['agreement_price'] + dfp['bid_price'] + dfp['contract_price'] + dfp['electronic_mall_price'])/dfp['sum_weight']
zc_avg_price = ((dfp['combine_price'] * dfp['weight']).sum())/(dfp['weight'].sum())
return zc_avg_price
@timer
def calculate_ds_with_df(ds_filter_price_df):
dfp = ds_filter_price_df.fillna(0)
for col_name in ['goods_price','weight']:
dfp[col_name] = pd.to_numeric(dfp[col_name])
drop_list = []
for index,row in dfp.iterrows():
if row['goods_price'] == 0 or row['goods_price'] == -1:
drop_list.append(index)
else:
#minize configure price
configure_price = calculate_configure_price(row['ProductCode'],'0101')
dfp['goods_price'][index] = row['goods_price']-configure_price
dfp = dfp.drop(labels=drop_list,axis=0)
ds_avg_price = ((dfp['goods_price'] * dfp['weight']).sum())/(dfp['weight'].sum())
return ds_avg_price
@timer
def calculate_st_with_df(st_filter_price_df):
dfp = st_filter_price_df.fillna(0)
dfp['entry_retial_price'] = pd.to_numeric(dfp['entry_retail_price'])
for index,row in dfp.iterrows():
#minize configure price
configure_price = calculate_configure_price(row['ProductCode'],'0101')
dfp['entry_retail_price'][index] = row['entry_retail_price']-configure_price
st_avg_price = dfp['entry_retail_price'].mean()
return st_avg_price
#attr_df = get_attr_data('0101')
#attr_df.to_csv('0101.csv')
@timer
def main():
attr_df = pd.read_csv('0101.csv', converters={'productcode':str,'BrandCode':str})
#attr_df['subtitle'].unique().tolist()
#!!!业务配合补全配件价格
#for attr in ['操作系统','内存容量','硬盘容量','显存容量','屏幕尺寸','质保时间']
# attr_v = attr_df[attr_df['subtitle'] == '质保时间']['value'].unique().tolist()
SPU_conf = create_SPU_configure(attr_df)
res_spu,res_code = get_product_code(attr_df,SPU_conf)
#code_list_str = str(res_code[0]).replace('[','(').replace(']',')')
spu_price_list = []
process_index = 0
index = Index()
#for i in range(2):
for i in range(len(res_code)):
print(index(process_index,len(res_code)-1), end='%')
process_index+=1
code_list = res_code[i]
code_list_df = pd.DataFrame()
code_list_df['ProductCode'] = code_list
zc_price_df = get_price_data('vw_purchase_source_price')
zc_price_weight = get_zc_price_weight()
zc_filter_price_df = pd.merge(code_list_df,zc_price_df,on = 'ProductCode')
zc_avg_price = calculate_zc_with_df(zc_filter_price_df,zc_price_weight)
ds_price_df = get_price_data('vw_electricity_source_price')
ds_filter_price_df = pd.merge(code_list_df,ds_price_df,on = 'ProductCode')
ds_avg_price = calculate_ds_with_df(ds_filter_price_df)
st_price_df = get_price_data('vw_entry_source_price')
st_filter_price_df = pd.merge(code_list_df,st_price_df,on = 'ProductCode')
st_avg_price = calculate_st_with_df(st_filter_price_df)
if math.isnan(zc_avg_price):
zc_avg_price = 0
zc_avg_price_weight = 0
else:
zc_avg_price_weight = 0.1
if math.isnan(ds_avg_price):
ds_avg_price = 0
ds_avg_price_weight = 0
else:
ds_avg_price_weight = 0.2
if math.isnan(st_avg_price):
st_avg_price = 0
st_avg_price_weight = 0
else:
st_avg_price_weight = 0.7
sum_weight = zc_avg_price_weight+ds_avg_price_weight+st_avg_price_weight
if sum_weight == 0:
spu_price = 0
else:
spu_price = ((st_avg_price*st_avg_price_weight)+(ds_avg_price*ds_avg_price_weight)+(zc_avg_price*zc_avg_price_weight))/(st_avg_price_weight+ds_avg_price_weight+zc_avg_price_weight)
spu_price_list.append(spu_price)
return res_spu,res_code,spu_price_list
res_spu,res_code,spu_price_list = main()
spu_index_list = [res_spu.index(v) for v in res_spu]
spu_data = pd.DataFrame()
spu_data['spu_index'] = spu_index_list
spu_data['configure'] = res_spu
spu_data['price'] = spu_price_list
spu_index_list_child = []
sku_list_child = []
for sku_list in res_code:
for sku in sku_list:
spu_index_list_child.append(res_code.index(sku_list))
sku_list_child.append(sku)
sku_data = pd.DataFrame()
sku_data['spu_index'] = spu_index_list_child
sku_data['sku'] = sku_list_child
data_to_db(spu_data,'spu_price')
data_to_db(sku_data,'spu_sku_realtion')
@timer
def data_to_db(data,sheet_name):
engine = create_engine('mssql+pymssql://zgcindex:jiayou2017+2018@123.56.115.207/price_calculate?charset=utf8')
data.to_sql(sheet_name, con=engine)
print('导入完成')
@timer
def get_configure_price():
try:
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
cursor = conn.cursor()
#cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
cursor.execute("select sub_name,configure_attribute,configure_attribute_value,configure_price from zd_configure_price")
configure_price = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
configure_price['configure_price'] = pd.to_numeric(configure_price['configure_price'])
cursor.close()
conn.close()
return configure_price
except:
print('连接失败,重新连接')
return get_configure_price()
configure_price_df = get_configure_price()
configure_price_df.to_excel('configurePrice.xlsx')
@timer
def calculate_with_dd(price_df):
dfp = price_df
dfp['entry_retail_price'] = pd.to_numeric(dfp['entry_retail_price'])
df = dd.from_pandas(dfp,npartitions = 1)
df.entry_retail_price = df.entry_retail_price*4
df = df.groupby('ProductCode').entry_retail_price.mean()
b = df.compute()
return b
a = calculate_with_df(df)
b = calculate_with_dd(df)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 24 09:27:37 2019
@author: rico
"""
#方案一:提前计算
#方案二:实时计算
#输入:产品名称输入、差异还原标识(未匹配到具体SKU,但匹配到SPU)
import pymysql
import pymssql
from func_timer import timer
import pandas as pd
import dask.dataframe as dd
from sqlalchemy import create_engine
'''
def Descartes(*args,repeat=1):
pools = [tuple(pool) for pool in args] * repeat
result = [[]]
for pool in pools:
result = [x + [y] for x in result for y in pool]
for prod in result:
yield tuple(prod)
list(Descartes(['i3-7500','i5-7825'],['4G','8G']))
'''
@timer
def get_price_data():
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
cursor = conn.cursor()
cursor.execute("select * from vw_electricity_source_price")
price_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
cursor.close()
conn.close()
return price_df
@timer
def calculate_with_df(price_df):
dfp = price_df
dfp['entry_retail_price'] = pd.to_numeric(dfp['entry_retail_price'])*4
dfp = dfp.groupby('ProductCode').entry_retail_price.mean()
return dfp
@timer
def calculate_with_dd(price_df):
dfp = price_df
dfp['entry_retail_price'] = pd.to_numeric(dfp['entry_retail_price'])
df = dd.from_pandas(dfp,npartitions = 1)
df.entry_retail_price = df.entry_retail_price*4
df = df.groupby('ProductCode').entry_retail_price.mean()
b = df.compute()
return b
df = get_price_data()
a = calculate_with_df(df)
b = calculate_with_dd(df)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment