Commit fe6dac33 authored by sanlu's avatar sanlu

添加了返回错误类产品到回馈表中和进入页面数据置9的功能

parent b09a5e32
# -*- coding: utf-8 -*-
import pymssql
import pandas as pd
from lxml import etree
import requests
import json
from function import *
import pickle
#from product_type_extract import *
#from brand_name import brandd
import decimal
import time
import re
from lstm_predict import LSTMNER
import os
from ZOL_Crawler import CRAWLER
def param_extract_function(data_table,channel):
#读取数据
#data_table = pd.read_excel('ZH_LXWL_data_2019-12-07.xlsx', converters={'SKU':str,'指数子类编码':str,'指数品牌编码':str})
#channel = 'LXWL'
f_crawl = open('crawl_categorycode.txt', 'r', encoding='utf-8')
crawl_category_list = []
for line in f_crawl:
line = line.strip('\n')
crawl_category_list.append(line)
f_crawl.close()
now_time = time.strftime("%Y-%m-%d", time.localtime())
sql_ZIdatabase = sql_find('ZI_DataBase', False)
product_table = pd.DataFrame(data_table, columns=['SKU', '指数品牌','指数品牌编码','产品名称','指数子类','指数子类编码','产品价格','url','品牌'])
product_table.columns = ['productId', 'brandName','brandCode','producrName','categoryCodeName','categoryCode','price','url','original_brandName']
product_len = len(product_table['productId'])
product_table['batch'] = [now_time for i in range(product_len)]
product_table['state'] = ['0' for i in range(product_len)]
product_table['source'] = [channel for i in range(product_len)]
if os.path.isfile(f'{channel}_参数对应表_{now_time}.xlsx'):
merge_data = pd.read_excel(f'{channel}_参数对应表_{now_time}.xlsx', converters={'ZI_SubCategoryCode':str})
else:
sql_ZIdatabase.cursor.execute(f"select * from Product_Relation_Attribute_SubTitle where Source = '{channel}'")
match_data = sql_ZIdatabase.cursor.fetchall()
match_data = pd.DataFrame(match_data,columns=[tuple[0] for tuple in sql_ZIdatabase.cursor.description])
sql_ZIdatabase.cursor.execute('select * from vw_relation_property')
param_data_table = sql_ZIdatabase.cursor.fetchall()
param_data_table = pd.DataFrame(param_data_table,columns=[tuple[0] for tuple in sql_ZIdatabase.cursor.description])
merge_data = pd.merge(param_data_table,match_data[['ZI_SubTitle','Other_SubTitle','ZI_SubCategoryCode']],left_on=['SubCategoryCode','SubTitle'], right_on=['ZI_SubCategoryCode','ZI_SubTitle'])
merge_data = merge_data.drop_duplicates().reset_index()
merge_data = merge_data[(merge_data['ZI_SubTitle'].isnull() == False) | (merge_data['ISimportant'] == 1) | (merge_data['ispeijian'] == 1)]
merge_data.to_excel(f'{channel}_参数对应表_{now_time}.xlsx')
category_list = set(data_table['指数子类编码'].tolist())
try:
category_list.remove('没有匹配的指数子类编码')
except:
pass
update_table = {
'batch':[],
'source':[],
'productId':[],
'productName':[],
'param':[],
'paramAttributeImportant':[],
'paramAttributeMatch':[],
'paramAttributeStandard':[],
'value':[],
'paramSource':[],
'state':[],
'subcategorycode':[]
}
m = 0
for categorycode in category_list:
sql_ZIdatabase.cursor.execute(f"select SubTitle from VW_Relation_Property where SubCategoryCode='{categorycode}' and (Isimportant = '1' or Ispeijian = '1')")
param_standard_lyst = [x[0] for x in sql_ZIdatabase.cursor.fetchall()]
param_standard_dict = {}
for param_standard in param_standard_lyst:
param_standard_dict[param_standard] = []
sql_ZIdatabase.cursor.execute(f"select SubTitle,Isimportant,Isbiaozhunzhi,ispeijian from VW_Relation_Property where SubCategoryCode='{categorycode}' and (Isimportant = '1' or Ispeijian = '1')")
param_state_data = sql_ZIdatabase.cursor.fetchall()
param_state_dict = {}
for param in param_state_data:
param_state_dict[param[0]] = []
param_state_dict[param[0]].append(str(param[1]))
param_state_dict[param[0]].append(str(param[2]))
param_state_dict[param[0]].append(str(param[3]))
param_table = merge_data[(merge_data['ZI_SubCategoryCode'] == categorycode) & ((merge_data['ISimportant'] == 1) | (merge_data['ispeijian'] == 1))]
param_dict = {}
ZI_subtitle_list = []
for other_subtitle,ZI_subtitle in zip(param_table['Other_SubTitle'],param_table['ZI_SubTitle']):
param_dict[other_subtitle] = ZI_subtitle #非标转标词典
ZI_subtitle_list.append(ZI_subtitle)
data_categorycode = data_table[data_table['指数子类编码'] == categorycode]
crawler = CRAWLER(categorycode)
try:
model = LSTMNER(categorycode)
for SKU,name,params,producttype in zip(data_categorycode['SKU'],data_categorycode['产品名称'],data_categorycode['参数项'],data_categorycode['产品型号']):
m += 1
print(m,end='\r')
product_param_dict = {}#原始参数项对应
try:
params = eval(params)
except TypeError:
pass
for param_key in params.keys():
if '\t' in param_key or ' ' in param_key:
params[param_key.replace('\t','').replace(' ','')] = params.pop(param_key) #去除参数项的特殊符号
for param_key_2 in params.keys():
if param_key_2 in param_dict.keys():
product_param_dict[param_dict[param_key_2]] = params[param_key_2]
elif param_key_2 in ZI_subtitle_list:
product_param_dict[param_key_2] = params[param_key_2]
else:
pass
if '产品型号' not in product_param_dict.keys():
product_param_dict['产品型号'] = producttype
name_param_dict = model.param_extract(name)#名称提取参数项
if categorycode in crawl_category_list:
crawl_param_dict = crawler.crawl_zol(producttype)
else:
crawl_param_dict = {}
for param_key_name in name_param_dict.keys():
update_table['batch'].append(now_time)#
update_table['source'].append(channel)#
update_table['productId'].append(SKU)#
update_table['productName'].append(f'{name};')#
update_table['param'].append(param_key_name)##
try:
update_table['paramAttributeImportant'].append(param_state_dict[param_key_name][0])##
update_table['paramAttributeMatch'].append(param_state_dict[param_key_name][2])##
update_table['paramAttributeStandard'].append(param_state_dict[param_key_name][1])##
except KeyError:
update_table['paramAttributeImportant'].append('0')##
update_table['paramAttributeMatch'].append('0')##
update_table['paramAttributeStandard'].append('0')##
update_table['value'].append(name_param_dict[param_key_name])##
update_table['paramSource'].append('模型参数项')##
update_table['state'].append('0')##
update_table['subcategorycode'].append(categorycode)#
for param_key_ori in product_param_dict.keys():
update_table['batch'].append(now_time)#
update_table['source'].append(channel)#
update_table['productId'].append(SKU)#
update_table['productName'].append(f'{name};')#
update_table['param'].append(param_key_ori)##
try:
update_table['paramAttributeImportant'].append(param_state_dict[param_key_ori][0])##
update_table['paramAttributeMatch'].append(param_state_dict[param_key_ori][2])##
update_table['paramAttributeStandard'].append(param_state_dict[param_key_ori][1])##
except KeyError:
update_table['paramAttributeImportant'].append('0')##
update_table['paramAttributeMatch'].append('0')##
update_table['paramAttributeStandard'].append('0')##
update_table['value'].append(product_param_dict[param_key_ori])##
update_table['paramSource'].append('原始参数项')##
update_table['state'].append('1')##
update_table['subcategorycode'].append(categorycode)#
for param_key_crawl in crawl_param_dict.keys():
if crawl_param_dict[param_key_crawl] == '爬取不到数据':
continue
update_table['batch'].append(now_time)#
update_table['source'].append(channel)#
update_table['productId'].append(SKU)#
update_table['productName'].append(f'{name};')#
update_table['param'].append(param_key_crawl)##
try:
update_table['paramAttributeImportant'].append(param_state_dict[param_key_crawl][0])##
update_table['paramAttributeMatch'].append(param_state_dict[param_key_crawl][2])##
update_table['paramAttributeStandard'].append(param_state_dict[param_key_crawl][1])##
except KeyError:
update_table['paramAttributeImportant'].append('0')##
update_table['paramAttributeMatch'].append('0')##
update_table['paramAttributeStandard'].append('0')##
update_table['value'].append(crawl_param_dict[param_key_crawl])##
update_table['paramSource'].append('爬取参数项')##
update_table['state'].append('1')##
update_table['subcategorycode'].append(categorycode)#
model.clean()
except FileNotFoundError:
for SKU,name,params,producttype in zip(data_categorycode['SKU'],data_categorycode['产品名称'],data_categorycode['参数项'],data_categorycode['产品型号']):
m += 1
print(m,end='\r')
product_param_dict = {}#原始参数项对应
try:
params = eval(params)
except TypeError:
pass
for param_key in params.keys():
if '\t' in param_key or ' ' in param_key:
params[param_key.replace('\t','').replace(' ','')] = params.pop(param_key) #去除参数项的特殊符号
for param_key_2 in params.keys():
if param_key_2 in param_dict.keys():
product_param_dict[param_dict[param_key_2]] = params[param_key_2]
elif param_key_2 in ZI_subtitle_list:
product_param_dict[param_key_2] = params[param_key_2]
else:
pass
if '产品型号' not in product_param_dict.keys():
product_param_dict['产品型号'] = producttype
if categorycode in crawl_category_list:
crawl_param_dict = crawler.crawl_zol(producttype)
else:
crawl_param_dict = {}
for param_key_ori in product_param_dict.keys():
update_table['batch'].append(now_time)#
update_table['source'].append(channel)#
update_table['productId'].append(SKU)#
update_table['productName'].append(f'{name};')#
update_table['param'].append(param_key_ori)##
try:
update_table['paramAttributeImportant'].append(param_state_dict[param_key_ori][0])##
update_table['paramAttributeMatch'].append(param_state_dict[param_key_ori][2])##
update_table['paramAttributeStandard'].append(param_state_dict[param_key_ori][1])##
except KeyError:
update_table['paramAttributeImportant'].append('0')##
update_table['paramAttributeMatch'].append('0')##
update_table['paramAttributeStandard'].append('0')##
update_table['value'].append(product_param_dict[param_key_ori])##
update_table['paramSource'].append('原始参数项')##
update_table['state'].append('1')##
update_table['subcategorycode'].append(categorycode)#
for param_key_crawl in crawl_param_dict.keys():
if crawl_param_dict[param_key_crawl] == '爬取不到数据':
continue
update_table['batch'].append(now_time)#
update_table['source'].append(channel)#
update_table['productId'].append(SKU)#
update_table['productName'].append(f'{name};')#
update_table['param'].append(param_key_crawl)##
try:
update_table['paramAttributeImportant'].append(param_state_dict[param_key_crawl][0])##
update_table['paramAttributeMatch'].append(param_state_dict[param_key_crawl][2])##
update_table['paramAttributeStandard'].append(param_state_dict[param_key_crawl][1])##
except KeyError:
update_table['paramAttributeImportant'].append('0')##
update_table['paramAttributeMatch'].append('0')##
update_table['paramAttributeStandard'].append('0')##
update_table['value'].append(crawl_param_dict[param_key_crawl])##
update_table['paramSource'].append('爬取参数项')##
update_table['state'].append('1')##
update_table['subcategorycode'].append(categorycode)#
update_table_df = pd.DataFrame.from_dict(update_table)
update_table_df.to_excel('update_data_test.xlsx')
update_table_df.to_sql('Product_Parameter_Process', sql_ZIdatabase.engine, if_exists='append', index=False)
product_table.to_sql('Product_Api_Data', sql_ZIdatabase.engine, if_exists='append', index=False)
return update_table_df
class data_fetch():
def __init__(self, model = 'LXWL_model', localhost = True, source = 'ZH_LXWL'):
'''
model为‘JD_model’或者‘LXWL_model’,为字符串型。
localhost为布尔型,决定是否用本地服务器。
source为来源。
'''
self.class_80 = pd.read_excel('80类.xlsx', converters={'指数编码':str}, sheet_name='Sheet2')
self.brand_table = pd.read_excel('brandnormal.xlsx')
#self.brand_normallyst = dict(zip(brand_table['标准化品牌'],brand_table['BrandCode']))
self.tool = tool()
self.sql_JDZH = sql_find(source, localhost)
self.sql_LXWL = sql_find(source, localhost)
self.sql_ZH = sql_find(source, localhost)
if localhost:
self.sql_ZIdatabase_local = sql_find('ZI_BAK', localhost)
else:
self.sql_ZIdatabase_local = sql_find('ZI_DataBase', localhost)
self.model = model
self.source = source
self.channel = source.split('_')[1]
self.index = Index()
def find_brandcode(self, normalbrand, english_brand):
brand_find = self.brand_table[self.brand_table['标准化品牌']==normalbrand]['BrandCode'].tolist()
if len(brand_find) == 0:
return '没有对应指数品牌'
if len(brand_find) == 1:
return brand_find[0]
else:
brand_find = self.brand_table[self.brand_table['标准化品牌']==english_brand]['BrandCode'].tolist()
if len(brand_find) == 0:
return '没有对应指数品牌'
else:
return brand_find[0]
def brandcode_search(self, brand):
normalbrand = BN(brand)
english_brand = re.sub(r'[^A-Za-z]', '', brand).upper()
brandcode = self.find_brandcode(brand.upper(), 'XXXXXXXXXXXXX')
if brandcode != '没有对应指数品牌':
return brandcode
brandcode = self.find_brandcode(normalbrand, english_brand)
brandcode = self.tool.judge_brand(normalbrand, brandcode)
if brandcode != '没有对应指数品牌':
return brandcode
brandcode = self.tool.judge_brand(english_brand, brandcode)
return brandcode
def get_LXWL_sku(self):
try:
self.sql_LXWL.cursor.execute("select sku from product_all where productcode is null and state='1' and (ziying='1' or ziying is null) and (dingzhi='1' or ziying is null)")
except pymssql.ProgrammingError:
self.sql_LXWL.cursor.execute("select sku from product_all where productcode is null and state='1'")
sku_data = self.sql_LXWL.cursor.fetchall()
sku_list = []
#print(len(sku_data))
for sku in sku_data:
sku = sku[0]
sku_list.append(sku)
sku_set = list(set(sku_list))
#print(len(sku_set))
return sku_set
def get_JD_sku(self):
self.sql_JDZH.cursor.execute("select sku from SKU2pcode where productcode = 'NULL'")
#self.sql_JDZH.cursor.execute("select sku from goods_detail where band_name like '%联想%'")
sku_data = self.sql_JDZH.cursor.fetchall()
sku_list = []
for sku in sku_data:
sku = sku[0]
sku_list.append(sku)
sku_set = list(set(sku_list))
return sku_set
def get_data_LXWL(self, sku):
self.sql_LXWL.cursor.execute(f"select sku,brand,name,category,createdate,url from product_detail where sku='{sku}'")
data = self.sql_LXWL.cursor.fetchall()[0]
data = list(data)
symbol = 0
#sql_ZIdatabase.cursor.execute(f'select * from Product_Relation_Attribute_SubTitle where SourceSubCategory={data[3]}')
self.sql_ZIdatabase_local.cursor.execute(f"select ZI_SubCategoryCode,ZI_SubTitle from Product_Relation_Attribute_SubTitle where SourceSubCategory='{data[3]}' and Source='{self.channel}'")
try:
ZI_SubCategoryCode = self.sql_ZIdatabase_local.cursor.fetchone()[0]
except TypeError:
try:
self.sql_ZIdatabase_local.cursor.execute(f"select ZI_SubCategoryCode,ZI_SubTitle from Product_Relation_Attribute_SubTitle where SourceSubCategory='{data[3]}'")
ZI_SubCategoryCode = self.sql_ZIdatabase_local.cursor.fetchone()[0]
except TypeError:
ZI_SubCategoryCode = '没有匹配的指数子类编码'
symbol = 1
data.append(ZI_SubCategoryCode)
if ZI_SubCategoryCode != '没有匹配的指数子类编码':
self.sql_ZIdatabase_local.cursor.execute(f"select Name from ZI_SubCategory where SubcategoryCode ='{ZI_SubCategoryCode}'")
ZIname = self.sql_ZIdatabase_local.cursor.fetchone()[0]
else:
ZIname = '没有匹配的指数子类编码'
self.sql_LXWL.cursor.execute(f"select title,value from info_detail where sku='{sku}'")
params = self.sql_LXWL.cursor.fetchall()
param_dict = {}
for param in params:
param_dict[param[0].replace(r'\t','')]=param[1]
data.append(param_dict)
brand = data[1]
brandcode = str(self.brandcode_search(brand)).zfill(5)
data.append(brandcode)
if brandcode != '没有对应指数品牌':
self.sql_ZIdatabase_local.cursor.execute(f"select BrandName from ZI_BrandList where BrandID ='{brandcode}'")
try:
ZIbrand = self.sql_ZIdatabase_local.cursor.fetchone()[0]
except TypeError:
ZIbrand = '没有对应指数品牌'
else:
ZIbrand = '没有对应指数品牌'
producttype = type_extract_JD(data[2], param_dict ,brand)
data.append(producttype)
if producttype == 'NA':
symbol = 1
self.sql_LXWL.cursor.execute(f"select price from product_price where sku='{sku}'")
price = self.sql_LXWL.cursor.fetchone()[0]
data.append(price)
data.append(ZIname)
data.append(ZIbrand)
return data
def get_data_JD(self, sku):
self.sql_JDZH.cursor.execute(f"select sku,band_name,name,category,create_date from goods_detail where sku='{sku}'")
data = self.sql_JDZH.cursor.fetchall()[0]
data = list(data)
data.append(f'https://item.jd.com/{data[0]}.html')
symbol = 0
#sql_ZIdatabase.cursor.execute(f'select * from Product_Relation_Attribute_SubTitle where SourceSubCategory={data[3]}')
try:
self.sql_JDZH.cursor.execute(f"select name from goods_category where catId = '{data[3].split(';')[-1]}'")
JDname = self.sql_JDZH.cursor.fetchone()[0]
self.sql_ZIdatabase_local.cursor.execute(f"select subcategoryname from op_jd_zd_type where third = '{JDname}'")
ZIname = self.sql_ZIdatabase_local.cursor.fetchone()[0]
self.sql_ZIdatabase_local.cursor.execute(f"select SubCategoryCode from ZI_SubCategory where Name = '{ZIname}'")
ZI_SubCategoryCode = self.sql_ZIdatabase_local.cursor.fetchone()[0]
except TypeError:
ZI_SubCategoryCode = '没有匹配的指数子类编码'
ZIname = '没有匹配的指数子类'
symbol = 1
data.append(ZI_SubCategoryCode)
#此处京东需要解析
self.sql_JDZH.cursor.execute(f"select param from goods_detail where sku='{sku}'")
params_information = self.sql_JDZH.cursor.fetchone()[0]
try:
param_dict = param_load(sku, params_information)
except IndexError:
param_dict = {}
data.append(param_dict)
brand = data[1]
brandcode = str(self.brandcode_search(brand)).zfill(5)
data.append(brandcode)
if brandcode != '没有对应指数品牌':
self.sql_ZIdatabase_local.cursor.execute(f"select BrandName from ZI_BrandList where BrandID ='{brandcode}'")
try:
ZIbrand = self.sql_ZIdatabase_local.cursor.fetchone()[0]
except TypeError:
ZIbrand = '没有对应指数品牌'
else:
ZIbrand = '没有对应指数品牌'
producttype = type_extract_JD(data[2], param_dict ,brand)
data.append(producttype)
if producttype == 'NA':
symbol = 1
self.sql_JDZH.cursor.execute(f"select jd_price from goods_price where sku_id='{sku}'")
try:
price = self.sql_JDZH.cursor.fetchone()[0]
data.append(price)
except TypeError:
data.append('该产品没有价格数据。')
data.append(ZIname) #data[11]
data.append(ZIbrand)
return data
def run(self):
if self.model == 'LXWL_model':
sku_list = self.get_LXWL_sku()
#sku_list = ['1486456']
elif self.model == 'JD_model':
sku_list = self.get_JD_sku()
data_dict = {}
for i in range(len(sku_list)):
try:
if self.model == 'LXWL_model':
data = self.get_data_LXWL(sku_list[i])
elif self.model == 'JD_model':
data = self.get_data_JD(sku_list[i])
data_dict[data[0]] = {
'品牌':data[1],
'指数品牌':data[12],
'指数品牌编码':data[8],
'产品名称':data[2],
'供应商子类':data[3],
'指数子类':data[11],
'指数子类编码':data[6],
'url':data[5],
'更新日期':data[4],
'参数项':data[7],
'产品型号':data[9],
'产品价格':data[10],
#'如果为1,不通过。':data[11],
#'匹配的productcode':data[12],
'来源':f'{self.source}'
}
except OSError:
print(f'{sku_list[i]}出错。')
continue
try:
print(self.index(i, len(sku_list)-1), end=f'% 共{len(sku_list)}款产品,目前第{i+1}款。')
#pass
except:
pass
#print(data_dict)
StyleTime = time.strftime("%Y-%m-%d", time.localtime())
with open(f'api_data/{self.source}_data_dict_{StyleTime}.txt', 'wb') as file:
pickle.dump(data_dict, file)
data_table = pd.DataFrame.from_dict(data_dict, orient='index', dtype=None, columns=None)
#data_table = self.tool.judge_peijian(data_table)
data_table = data_table.reset_index()
data_table.columns = ['SKU','品牌','指数品牌','指数品牌编码','产品名称','供应商子类','指数子类','指数子类编码','url','更新日期','参数项','产品型号','产品价格','来源']
data_table.to_excel(f'api_data/{self.source}_data_{StyleTime}.xlsx')
no_category_table = data_table[data_table['指数子类编码'] == '没有匹配的指数子类编码']
no_category_table.to_excel(f'api_data/{self.source}_no_category_data_{StyleTime}.xlsx')
data_table = data_table[data_table['指数子类编码'] != '没有匹配的指数子类编码']
#self.data_table_80 = data_table[data_table['指数子类编码'].isin(self.class_80['指数编码'].tolist())]
#self.data_table_80.to_excel(f'api_data/{self.source}_data_80_{StyleTime}.xlsx')
#self.data_table_not80 = data_table[~data_table['指数子类编码'].isin(self.class_80['指数编码'].tolist())]
#self.data_table_not80.to_excel(f'api_data/{self.source}_data_not80_{StyleTime}.xlsx')
return data_table
if __name__ == '__main__':
channel = 'OFS'
data_api = data_fetch(model='LXWL_model', localhost=False, source=f'ZH_{channel}')
data_api_table = data_api.run()
#param_extract_function(data_api_table,channel)
\ No newline at end of file
...@@ -255,8 +255,29 @@ def param_extract_function(data_table,channel): ...@@ -255,8 +255,29 @@ def param_extract_function(data_table,channel):
update_table_df.to_excel('update_data_test.xlsx') update_table_df.to_excel('update_data_test.xlsx')
update_table_df.to_sql('Product_Parameter_Process', sql_ZIdatabase.engine, if_exists='append', index=False) update_table_df.to_sql('Product_Parameter_Process', sql_ZIdatabase.engine, if_exists='append', index=False)
product_table.to_sql('Product_Api_Data', sql_ZIdatabase.engine, if_exists='append', index=False) product_table.to_sql('Product_Api_Data', sql_ZIdatabase.engine, if_exists='append', index=False)
update_sku_list = set(list(product_table['SKU']))
channel_sql = sql_find(database=f'ZH_{channel}')
for sku in update_sku_list:
channel_sql.cursor.execute(f"update product_all set state='9' where sku={sku}")
return update_table_df return update_table_df
def return_error(table):
sql_zi_zh = sql_find('zi_zh', False)
NowTime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())
#table = pd.read_excel('api_data/ZH_SN_no_category_data_2019-12-27.xlsx', dtype = {'SKU' : str})
table = table[['SKU','产品名称','url','来源']]
l = len(table)
create_time_list = []
remark_list = []
for i in range(l):
create_time_list.append(NowTime)
remark_list.append('类别对应错误')
table['remark'] = remark_list
table['create_time'] = create_time_list
table.columns = ['sku','name','url','source','remark','create_time']
table.to_sql('API_returnErrorData', sql_zi_zh.engine, if_exists='append', index=False)
class data_fetch(): class data_fetch():
def __init__(self, model = 'LXWL_model', localhost = True, source = 'ZH_LXWL'): def __init__(self, model = 'LXWL_model', localhost = True, source = 'ZH_LXWL'):
''' '''
...@@ -384,7 +405,10 @@ class data_fetch(): ...@@ -384,7 +405,10 @@ class data_fetch():
symbol = 1 symbol = 1
self.sql_LXWL.cursor.execute(f"select price from product_price where sku='{sku}'") self.sql_LXWL.cursor.execute(f"select price from product_price where sku='{sku}'")
try:
price = self.sql_LXWL.cursor.fetchone()[0] price = self.sql_LXWL.cursor.fetchone()[0]
except TypeError:
price = decimal.Decimal(0)
data.append(price) data.append(price)
data.append(ZIname) data.append(ZIname)
...@@ -500,6 +524,7 @@ class data_fetch(): ...@@ -500,6 +524,7 @@ class data_fetch():
data_table.to_excel(f'api_data/{self.source}_data_{StyleTime}.xlsx') data_table.to_excel(f'api_data/{self.source}_data_{StyleTime}.xlsx')
no_category_table = data_table[data_table['指数子类编码'] == '没有匹配的指数子类编码'] no_category_table = data_table[data_table['指数子类编码'] == '没有匹配的指数子类编码']
no_category_table.to_excel(f'api_data/{self.source}_no_category_data_{StyleTime}.xlsx') no_category_table.to_excel(f'api_data/{self.source}_no_category_data_{StyleTime}.xlsx')
return_error(no_category_table)
data_table = data_table[data_table['指数子类编码'] != '没有匹配的指数子类编码'] data_table = data_table[data_table['指数子类编码'] != '没有匹配的指数子类编码']
#self.data_table_80 = data_table[data_table['指数子类编码'].isin(self.class_80['指数编码'].tolist())] #self.data_table_80 = data_table[data_table['指数子类编码'].isin(self.class_80['指数编码'].tolist())]
#self.data_table_80.to_excel(f'api_data/{self.source}_data_80_{StyleTime}.xlsx') #self.data_table_80.to_excel(f'api_data/{self.source}_data_80_{StyleTime}.xlsx')
...@@ -508,7 +533,7 @@ class data_fetch(): ...@@ -508,7 +533,7 @@ class data_fetch():
return data_table return data_table
if __name__ == '__main__': if __name__ == '__main__':
channel = 'OFS' channel = 'SN'
data_api = data_fetch(model='LXWL_model', localhost=False, source=f'ZH_{channel}') data_api = data_fetch(model='LXWL_model', localhost=False, source=f'ZH_{channel}')
data_api_table = data_api.run() data_api_table = data_api.run()
#param_extract_function(data_api_table,channel) #param_extract_function(data_api_table,channel)
\ No newline at end of file
from lstm_predict import LSTMNER import pandas as pd
import time
from function import *
model = LSTMNER('0101') def return_error(table):
model.param_extract('联想 ThinkPad E580 ThinkPad E580(02CD)15.6英寸轻薄窄边框笔记本电脑(i5-8250U 8G 256G PCIeSSD+1T 2G独显 FHD)黑色(计价单位:台)') sql_zi_zh = sql_find('zi_zh', False)
\ No newline at end of file NowTime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())
#table = pd.read_excel('api_data/ZH_SN_no_category_data_2019-12-27.xlsx', dtype = {'SKU' : str})
table = table[['SKU','产品名称','url','来源']]
l = len(table)
create_time_list = []
remark_list = []
for i in range(l):
create_time_list.append(NowTime)
remark_list.append('类别对应错误')
table['remark'] = remark_list
table['create_time'] = create_time_list
table.columns = ['sku','name','url','source','remark','create_time']
table.to_sql('API_returnErrorData', sql_zi_zh.engine, if_exists='append', index=False)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment