init

4196b164 · rico.liu · 14c41495 · 4196b164 · 4196b164 · 4196b164
Commit 4196b164 authored Jan 07, 2020 by rico.liu
9 changed files
--- a/0101.csv
+++ b/0101.csv
--- a/0101配件参数标化.xlsx
+++ b/0101配件参数标化.xlsx
--- a/__pycache__/func_timer.cpython-36.pyc
+++ b/__pycache__/func_timer.cpython-36.pyc
--- a/__pycache__/function.cpython-36.pyc
+++ b/__pycache__/function.cpython-36.pyc
--- a/configurePrice.xlsx
+++ b/configurePrice.xlsx
--- a/func_timer.py
+++ b/func_timer.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Nov  4 13:19:46 2019
+@author: rico
+"""
+import time
+def timer(func):
+    def wrapper(*args,**kwds):
+        t0 = time.time()
+        res = func(*args,**kwds)
+        t1 = time.time()
+        print(func.__name__+'耗时%0.3f秒'%(t1-t0))
+        return res
+    return wrapper
--- a/function.py
+++ b/function.py
+# -*- coding: utf-8 -*-
+import pymssql
+import pandas as pd
+import re
+from lxml import etree
+'''
+class product():
+    def __init__(self, product_name, product_SKU, product_class_num = 'na'):
+        self.product_name = str(product_name)
+        self.product_SKU = str(product_SKU)
+        self.product_class = str(product_class_num)
+    def get_parameter(self, **kwargs):
+'''
+class sql_find():
+    def __init__ (self, database='ZI_DataBase', localhost=True):
+        if localhost:
+            self.conn = pymssql.connect(host='localhost', user='zgc',password='1234',database=database,autocommit=True)
+        else:
+            self.conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database=database,autocommit=True)
+        self.cursor = self.conn.cursor()
+    '''
+    def execute(self, sql_sentence):
+        self.cursor.execute(sql_sentence)
+        return self.cursor
+    '''
+def BN(brand):
+    brand = str(brand)
+    try:
+        country = brand.split('[')[1].split(']')[-2]
+        brand = brand.replace(country,'')
+    except IndexError:
+        pass
+    res = re.findall(r'[\u4E00-\u9FA5]', brand)
+    new_res = ''.join(res)
+	#print(len(new_res))
+    if len(new_res) == 0:
+        res1 = re.findall(r'[a-zA-Z0-9]', brand)
+        new_res = ''.join(res1)
+        new_res = new_res.upper()
+    return new_res
+class Index(object):
+    def __init__(self, number=50, decimal=2):
+        """
+        :param decimal: 你保留的保留小数位
+        :param number: # 号的 个数
+        """
+        self.decimal = decimal
+        self.number = number
+        self.a = 100/number   # 在百分比 为几时增加一个 # 号
+    def __call__(self, now, total):
+        # 1. 获取当前的百分比数
+        percentage = self.percentage_number(now, total)
+        # 2. 根据 现在百分比计算
+        well_num = int(percentage / self.a)
+        # print("well_num: ", well_num, percentage)
+        # 3. 打印字符进度条
+        progress_bar_num = self.progress_bar(well_num)
+        # 4. 完成的进度条
+        result = "\r%s %s" % (progress_bar_num, percentage)
+        return result
+    def percentage_number(self, now, total):
+        """
+        计算百分比
+        :param now:  现在的数
+        :param total:  总数
+        :return: 百分
+        """
+        return round(now / total * 100, self.decimal)
+    def progress_bar(self, num):
+        """
+        显示进度条位置
+        :param num:  拼接的  “#” 号的
+        :return: 返回的结果当前的进度条
+        """
+        # 1. "#" 号个数
+        well_num = "#" * num
+        # 2. 空格的个数
+        space_num = " " * (self.number - num)
+        return '[%s%s]' % (well_num, space_num)
+class tool():
+    def __init__(self):
+        self.peijian_table = pd.read_excel('是否需要配件.xlsx')
+        self.brand_table = pd.read_excel('品牌对应表.xlsx')
+    def judge_brand(self, brand, brandcode_original):
+        if brandcode_original == '没有对应指数品牌':
+            BRANDID = '没有对应指数品牌'
+            for ID,Chinese_brand,English_brand in zip(self.brand_table['ID'], self.brand_table['中文品牌'], self.brand_table['英文品牌']):
+                if brand == Chinese_brand:
+                    BRANDID = str(ID).zfill(5)
+                    return BRANDID
+                elif BN(brand) == English_brand:
+                    BRANDID = str(ID).zfill(5)
+                    return BRANDID
+        else:
+            BRANDID = str(brandcode_original).zfill(5)[-5:]
+        return BRANDID
+    def judge_peijian(self, data_table):
+        ispeijian_lyst = []
+        isunique_lyst = []
+        for class_code in data_table['指数子类编码']:
+            mark = '0'
+            mark2 = '0'
+            class_code = str(class_code).zfill(4)
+            if class_code != '没有匹配的指数子类编码':
+                for categorycode, ispeijian, isunique in zip(self.peijian_table['categorycode'], self.peijian_table['ispeijian'], self.peijian_table['isunique']):
+                    if class_code == str(categorycode).zfill(4):
+                        if str(ispeijian) != '0':
+                            mark = '1'
+                        if str(isunique) != '0':
+                            mark2 = '1'
+                        break
+                ispeijian_lyst.append(mark)
+                isunique_lyst.append(mark2)
+            else:
+                ispeijian_lyst.append(mark)
+                isunique_lyst.append(mark2)
+        #print(len(ispeijian_lyst), len(data_table['指数子类编码']))
+        data_table['有无配件'] = ispeijian_lyst
+        data_table['型号_only'] = isunique_lyst
+        return data_table
+def judge_unit(string):
+    unit_list = {'MM','CM', 'DM', 'ML', 'W', 'KW'}
+    if not string[0].isdigit():
+        return True
+    m = 0
+    for char in string:
+        if char.isdigit() or char == '.':
+            m += 1
+            continue
+        elif char.isalpha():
+            if string[m:].upper() in unit_list:
+                return False
+            else:
+                return True
+    return True
+def type_extract_JD(name, params, brand):
+    #params = eval(params)
+    try:
+        brand_remove = re.findall(r"[A-Za-z0-9]+", brand)[0].upper()
+    except IndexError:
+        brand_remove = '没有英文品牌！'
+    param_xinghao = 'NA'
+    if '型号' in params:
+        param_xinghao = params['型号']
+    elif r'\t型号\t' in params:
+        param_xinghao = params[r'\t型号\t']
+    name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-+/.*]+", name)))
+    for i in range(len(name_xinghao_lyst)):
+        name_xinghao_lyst[i] = name_xinghao_lyst[i].upper()
+    try:
+        name_xinghao_lyst.remove(brand_remove)
+    except ValueError:
+        pass
+    if len(name_xinghao_lyst) == 0:
+        #type_lyst.append(param_xinghao.upper())
+        return param_xinghao.upper()
+    else:
+        if param_xinghao in name_xinghao_lyst:
+            #type_lyst.append(param_xinghao.upper())
+            return param_xinghao.upper()
+        else:
+            xinghao_data = max(name_xinghao_lyst, key=len)
+            for xinghao in name_xinghao_lyst:
+                if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
+                    xinghao_data = xinghao
+                    break
+            if not judge_unit(xinghao_data):
+                xinghao_data == 'NA'
+            #type_lyst.append(xinghao_data.upper())
+            return xinghao_data
+def type_extract(name, params):
+    #params = eval(params)
+    param_xinghao = 'NA'
+    if '型号' in params:
+        param_xinghao = params['型号']
+    elif r'\t型号\t' in params:
+        param_xinghao = params[r'\t型号\t']
+    name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-+/.*]+", name)))
+    if len(name_xinghao_lyst) == 0:
+        #type_lyst.append(param_xinghao.upper())
+        return param_xinghao.upper()
+    else:
+        if param_xinghao in name_xinghao_lyst:
+            #type_lyst.append(param_xinghao.upper())
+            return param_xinghao.upper()
+        else:
+            xinghao_data = max(name_xinghao_lyst, key=len)
+            for xinghao in name_xinghao_lyst:
+                if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
+                    xinghao_data = xinghao
+                    break
+            if not judge_unit(xinghao_data):
+                xinghao_data == 'NA'
+            #type_lyst.append(xinghao_data.upper())
+            return xinghao_data.upper()
+def param_load(product_id, xml_string):
+    """
+    传入sku，和xml原始代码
+    :param product_id:sku
+    :param xml_string:xml数据
+    :return:csv
+    """
+    xml_str = etree.HTML(xml_string)
+    #title = xml_str.xpath("//th[@class='tdTitle']")
+    secend = xml_str.xpath("//td[@class='tdTitle']")
+    zhi = xml_str.xpath("//tr//td[position()>1]")
+    data_dict = {}
+    for j, k in zip(secend, zhi):
+        #item = i.xpath("./text()")[0]
+        sec = j.xpath("./text()")[0]
+        value = k.xpath("./text()")[0]
+        data_dict[sec] = value
+    return data_dict
+if __name__ == '__main__':
+    sqlserver = sql_find('ZI_BAK', True)
+    sqlserver.cursor.execute("select * from ZI_Price_Quote where  productcode = '0506003750007'")
+    print(sqlserver.cursor.fetchall())
--- a/priceModel.py
+++ b/priceModel.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Dec 24 09:27:37 2019
+@author: rico
+"""
+#方案一：提前计算
+#方案二：实时计算
+#输入：产品名称输入、差异还原标识（未匹配到具体SKU，但匹配到SPU）
+import pymssql
+from func_timer import timer
+import pandas as pd
+import dask.dataframe as dd
+import datetime
+from function import Index
+import math
+from sqlalchemy import create_engine
+import warnings
+warnings.filterwarnings("ignore")
+def Descartes(*args,repeat=1):
+    pools = [tuple(pool) for pool in args] * repeat
+    result = [[]]
+    for pool in pools:
+        result = [x + [y] for x in result for y in pool]
+    for prod in result:
+        yield tuple(prod)
+@timer
+def get_attr_data(SubCategoryCode):
+    try:
+        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','ZI_DataBase')
+        cursor = conn.cursor()
+        sql_attr = '''select c.SubCategoryCode,a.productcode,c.productname,d.Name,c.BrandCode,b.subtitle,a.value,c.state from 
+                             (select productcode,productname,BrandCode,state,SubCategoryCode from info_product where SubCategoryCode = '''+SubCategoryCode+''') c 
+                                join info_productdetail a
+                                on a.productcode=c.productcode
+                                join vw_relation_property b
+                                on a.cfgid=b.subtitleid and c.SubCategoryCode=b.subcategorycode
+                                join ZI_Brand d
+                                on c.BrandCode=d.BrandCode
+                                '''
+        cursor.execute(sql_attr)
+        data_source = [v for v in cursor.fetchall()]
+        attr_data = pd.DataFrame(data_source,columns=[tuple[0] for tuple in cursor.description])
+        cursor.close()
+        conn.close()
+        return attr_data
+    except:
+        print('连接失败，重新连接')
+        return get_attr_data(SubCategoryCode)
+#生成SPU配型
+@timer
+def create_SPU_configure(attr_df):
+    res = []
+    cpu_series = ['i3','i5','i7']
+    for brand in attr_df['BrandCode'].unique().tolist():
+        product_series = attr_df[(attr_df['BrandCode'] == brand ) & (attr_df['subtitle'] == '产品系列')]['value'].unique().tolist()
+        res.append(list(Descartes([brand],product_series,cpu_series)))
+    return res
+#获取SPU对应的SKU
+@timer
+def get_product_code(attr_df,SPU_conf):
+    #attr_df = dd.from_pandas(attr_df,npartitions = 8)
+    index = Index()
+    res_spu = []
+    res_code = []
+    m = 1
+    for v in SPU_conf:
+        print(index(m, len(SPU_conf)-1), end='%')
+        m +=1
+        for v_ in v:
+            productcode = attr_df[(attr_df['BrandCode'] == v_[0]) & (attr_df['value'] == v_[1]) & ((attr_df['productname'].str.contains(v_[2].upper())) | (attr_df[attr_df['subtitle'] == 'CPU型号']['value'].str.contains(v_[2].upper())))]['productcode'].unique().tolist()
+            #if len(productcode) == 0:
+            #    continue
+            res_spu.append(v_)
+            res_code.append(productcode)
+    return res_spu,res_code
+@timer
+def get_price_data(channel):
+    try:
+        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
+        cursor = conn.cursor()
+        #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+        cursor.execute("select * from "+channel+" where periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+        price_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        cursor.close()
+        conn.close()
+        return price_df
+    except:
+        print('连接失败，重新连接')
+        return get_price_data(channel)
+@timer
+def get_zc_price_weight():
+    try:
+        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
+        cursor = conn.cursor()
+        cursor.execute("select price_id,price_name,weight from zd_price_weight")
+        price_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        cursor.close()
+        conn.close()
+        return price_df
+    except:
+        print('连接失败，重新连接')
+        return get_zc_price_weight()
+@timer
+def calculate_configure_price(productCode,categoryCode):
+    #productCode = '0101003612569'
+    attr_df = pd.read_csv('0101.csv', converters={'productcode':str,'BrandCode':str})
+    product_attr = attr_df[attr_df['productcode'] == productCode][['subtitle','value']]
+    return 0
+@timer
+def calculate_zc_with_df(zc_filter_price_df,zc_price_weight):
+    dfp = zc_filter_price_df.fillna(0)
+    price_dict = {'purchase_price':'PLCJJ',
+                  'agreement_price':'XYJ',
+                  'bid_price':'JJ',
+                  'contract_price':'HTJ',
+                  'electronic_mall_price':'DZSCJ'}
+    for col_name in ['purchase_price','agreement_price','bid_price','contract_price','electronic_mall_price','weight']:
+        dfp[col_name] = pd.to_numeric(dfp[col_name])
+    sum_weight_list = []
+    for index,row in dfp.iterrows(): 
+        sum_weight = 0
+        for col_name in ['purchase_price','agreement_price','bid_price','contract_price','electronic_mall_price']:
+            weight = int(zc_price_weight[zc_price_weight['price_id'] == price_dict[col_name]]['weight'].values[0])
+            if row[col_name] == 0:
+                sum_weight += 0
+            else:
+                #minize configure price
+                configure_price = calculate_configure_price(row['ProductCode'],'0101')
+                dfp[col_name][index] = (row[col_name]-configure_price)*weight
+                sum_weight += weight
+        sum_weight_list.append(sum_weight)
+    dfp['sum_weight'] = sum_weight_list
+    dfp['combine_price'] = (dfp['purchase_price'] + dfp['agreement_price'] + dfp['bid_price'] + dfp['contract_price'] + dfp['electronic_mall_price'])/dfp['sum_weight']
+    zc_avg_price = ((dfp['combine_price'] * dfp['weight']).sum())/(dfp['weight'].sum())
+    return zc_avg_price
+@timer
+def calculate_ds_with_df(ds_filter_price_df):
+    dfp = ds_filter_price_df.fillna(0)
+    for col_name in ['goods_price','weight']:
+        dfp[col_name] = pd.to_numeric(dfp[col_name])
+    drop_list = []
+    for index,row in dfp.iterrows(): 
+        if row['goods_price'] == 0 or row['goods_price'] == -1:
+            drop_list.append(index)
+        else:
+            #minize configure price
+            configure_price = calculate_configure_price(row['ProductCode'],'0101')    
+            dfp['goods_price'][index] = row['goods_price']-configure_price
+    dfp = dfp.drop(labels=drop_list,axis=0)
+    ds_avg_price = ((dfp['goods_price'] * dfp['weight']).sum())/(dfp['weight'].sum())
+    return ds_avg_price
+@timer
+def calculate_st_with_df(st_filter_price_df):
+    dfp = st_filter_price_df.fillna(0)
+    dfp['entry_retial_price'] = pd.to_numeric(dfp['entry_retail_price'])
+    for index,row in dfp.iterrows(): 
+        #minize configure price
+        configure_price = calculate_configure_price(row['ProductCode'],'0101')    
+        dfp['entry_retail_price'][index] = row['entry_retail_price']-configure_price
+    st_avg_price = dfp['entry_retail_price'].mean()
+    return st_avg_price
+#attr_df = get_attr_data('0101')
+#attr_df.to_csv('0101.csv')
+@timer
+def main():
+    attr_df = pd.read_csv('0101.csv', converters={'productcode':str,'BrandCode':str})
+    #attr_df['subtitle'].unique().tolist()
+    #！！！业务配合补全配件价格
+    #for attr in ['操作系统','内存容量','硬盘容量','显存容量','屏幕尺寸','质保时间']
+    #    attr_v = attr_df[attr_df['subtitle'] == '质保时间']['value'].unique().tolist()
+    SPU_conf = create_SPU_configure(attr_df)
+    res_spu,res_code = get_product_code(attr_df,SPU_conf)
+    #code_list_str = str(res_code[0]).replace('[','(').replace(']',')')
+    spu_price_list = []
+    process_index = 0
+    index = Index()
+    #for i in range(2):
+    for i in range(len(res_code)):
+        print(index(process_index,len(res_code)-1), end='%')
+        process_index+=1
+        code_list = res_code[i]
+        code_list_df = pd.DataFrame()
+        code_list_df['ProductCode'] = code_list
+        zc_price_df = get_price_data('vw_purchase_source_price')
+        zc_price_weight = get_zc_price_weight()
+        zc_filter_price_df = pd.merge(code_list_df,zc_price_df,on = 'ProductCode')
+        zc_avg_price = calculate_zc_with_df(zc_filter_price_df,zc_price_weight)
+        ds_price_df = get_price_data('vw_electricity_source_price')
+        ds_filter_price_df = pd.merge(code_list_df,ds_price_df,on = 'ProductCode')
+        ds_avg_price = calculate_ds_with_df(ds_filter_price_df)
+        st_price_df = get_price_data('vw_entry_source_price')
+        st_filter_price_df = pd.merge(code_list_df,st_price_df,on = 'ProductCode')
+        st_avg_price = calculate_st_with_df(st_filter_price_df)
+        if math.isnan(zc_avg_price):
+            zc_avg_price = 0
+            zc_avg_price_weight = 0
+        else:
+            zc_avg_price_weight = 0.1
+        if math.isnan(ds_avg_price):
+            ds_avg_price = 0
+            ds_avg_price_weight = 0
+        else:
+            ds_avg_price_weight = 0.2
+        if math.isnan(st_avg_price):
+            st_avg_price = 0
+            st_avg_price_weight = 0
+        else:
+            st_avg_price_weight = 0.7
+        sum_weight = zc_avg_price_weight+ds_avg_price_weight+st_avg_price_weight
+        if sum_weight == 0:
+            spu_price = 0
+        else:
+            spu_price = ((st_avg_price*st_avg_price_weight)+(ds_avg_price*ds_avg_price_weight)+(zc_avg_price*zc_avg_price_weight))/(st_avg_price_weight+ds_avg_price_weight+zc_avg_price_weight)
+        spu_price_list.append(spu_price)
+    return res_spu,res_code,spu_price_list
+res_spu,res_code,spu_price_list = main()
+spu_index_list = [res_spu.index(v) for v in res_spu]
+spu_data = pd.DataFrame()
+spu_data['spu_index'] = spu_index_list
+spu_data['configure'] = res_spu
+spu_data['price'] = spu_price_list
+spu_index_list_child = []
+sku_list_child = []
+for sku_list in res_code:
+    for sku in sku_list:
+        spu_index_list_child.append(res_code.index(sku_list))
+        sku_list_child.append(sku)
+sku_data = pd.DataFrame()
+sku_data['spu_index'] = spu_index_list_child
+sku_data['sku'] = sku_list_child
+data_to_db(spu_data,'spu_price')
+data_to_db(sku_data,'spu_sku_realtion')
+@timer
+def data_to_db(data,sheet_name):
+    engine = create_engine('mssql+pymssql://zgcindex:jiayou2017+2018@123.56.115.207/price_calculate?charset=utf8')
+    data.to_sql(sheet_name, con=engine)
+    print('导入完成')
+@timer
+def get_configure_price():
+    try:
+        conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
+        cursor = conn.cursor()
+        #cursor.execute("select * from vw_electricity_source_price where ProductCode in "+ code_list +" and periods = (select top 1 periods from vw_entry_source_price order by periods desc)")
+        cursor.execute("select sub_name,configure_attribute,configure_attribute_value,configure_price from zd_configure_price")
+        configure_price = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+        configure_price['configure_price'] = pd.to_numeric(configure_price['configure_price'])
+        cursor.close()
+        conn.close()
+        return configure_price
+    except:
+        print('连接失败，重新连接')
+        return get_configure_price()
+configure_price_df = get_configure_price()
+configure_price_df.to_excel('configurePrice.xlsx')
+@timer
+def calculate_with_dd(price_df):
+    dfp = price_df
+    dfp['entry_retail_price'] = pd.to_numeric(dfp['entry_retail_price'])
+    df = dd.from_pandas(dfp,npartitions = 1)
+    df.entry_retail_price = df.entry_retail_price*4
+    df = df.groupby('ProductCode').entry_retail_price.mean()
+    b = df.compute()
+    return b
+a = calculate_with_df(df)
+b = calculate_with_dd(df)
--- a/priceModel_ds.py
+++ b/priceModel_ds.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Dec 24 09:27:37 2019
+@author: rico
+"""
+#方案一：提前计算
+#方案二：实时计算
+#输入：产品名称输入、差异还原标识（未匹配到具体SKU，但匹配到SPU）
+import pymysql
+import pymssql
+from func_timer import timer
+import pandas as pd
+import dask.dataframe as dd
+from sqlalchemy import create_engine
+'''
+def Descartes(*args,repeat=1):
+    pools = [tuple(pool) for pool in args] * repeat
+    result = [[]]
+    for pool in pools:
+        result = [x + [y] for x in result for y in pool]
+    for prod in result:
+        yield tuple(prod)
+list(Descartes(['i3-7500','i5-7825'],['4G','8G']))
+'''
+@timer
+def get_price_data():
+    conn = pymssql.connect('123.56.115.207','zgcindex','jiayou2017+2018','price_calculate')
+    cursor = conn.cursor()
+    cursor.execute("select * from vw_electricity_source_price")
+    price_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
+    cursor.close()
+    conn.close()
+    return price_df
+@timer
+def calculate_with_df(price_df):
+    dfp = price_df
+    dfp['entry_retail_price'] = pd.to_numeric(dfp['entry_retail_price'])*4
+    dfp = dfp.groupby('ProductCode').entry_retail_price.mean()
+    return dfp
+@timer
+def calculate_with_dd(price_df):
+    dfp = price_df
+    dfp['entry_retail_price'] = pd.to_numeric(dfp['entry_retail_price'])
+    df = dd.from_pandas(dfp,npartitions = 1)
+    df.entry_retail_price = df.entry_retail_price*4
+    df = df.groupby('ProductCode').entry_retail_price.mean()
+    b = df.compute()
+    return b
+df = get_price_data()
+a = calculate_with_df(df)
+b = calculate_with_dd(df)