Commit 8fda62fe authored by rico.liu's avatar rico.liu

init

parents
Pipeline #181 canceled with stages
File added
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 11:07:18 2020
@author: rico
"""
import requests
def zgc_api(func,data):
headers = {
'Connection': 'Keep-Alive'
}
key = 'eRo1#ZFHY5N&GEzV'
api = f"http://59.110.219.171:8000/{func}/"
print(api)
data.update({'key':key})
session = requests.session()
result = session.post(api,json=data,headers=headers,timeout=14400).json()
return result
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 7 23:08:30 2020
@author: rico
"""
import pymssql
class MSSQL:
def __init__(self,host,db):
self.host = host
self.db = db
self._conn = self.GetConnect()
if(self._conn):
self._cur = self._conn.cursor()
#连接数据库
def GetConnect(self):
conn = False
if self.host == '123.57.45.119':
user = 'zgcprice'
pwd = 'zgcprice20200708'
elif self.host == '123.56.115.207':
user = 'zgcindex'
pwd = 'jiayou202006'
elif self.host == '10.0.120.131':
user = 'sa'
pwd = '1qaz@WSX'
try:
conn = pymssql.connect(
host=self.host,
user=user,
password=pwd,
database =self.db,
autocommit=True
)
except Exception as err:
print("连接数据库失败, %s" % err)
else:
return conn
#获取连接信息
def GetConnectInfo(self):
print( "连接信息:" )
print( "服务器:%s , 用户名:%s , 数据库:%s " % (self.host,self.user,self.db))
def Close(self):
self._cur.close()
self._conn.close()
'''
ms = MSSQL('123.56.115.207','zdindex')
conn = ms._conn
cursor = ms._cur
cursor.execute(f"select top 10 * from zd_week_price")
cursor.fetchall()
ms.Close()
cursor.close()
conn.close()
'''
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 25 09:59:23 2020
@author: rico
"""
from API import zgc_api
from db import MSSQL
import pandas as pd
import time
from public_function import *
import uuid
import datetime
#from delCalPrice_UpdatePriceDiff import check_configure_price,create_price_difference_single
#初始化数据
def InitializeData(path):
#加载上传数据
df_product = pd.read_excel(path,sheet_name = '产品表模板')
df_product = df_product.fillna('无')
df_price = pd.read_excel(path,sheet_name = '价格表模板')
df_price = df_price.fillna('无')
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#价格渠道字典
cursor_zi_new.execute(f"select channel_alias_cn,channel_alias_code from zdindex_channel_rel")
price_source = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
price_source_dict = dict(zip(price_source['channel_alias_cn'].tolist(),price_source['channel_alias_code'].tolist()))
#实例化进度条
index_ = Index()
counter = 1
#上传产品数据
insert_data = []
for index,row in df_product.iterrows():
try:
print(index_(counter, len(df_product)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['序号']
sku = "无"
category = row['类别(模糊)']
brand = row['品牌(模糊)']
name = row['名称']
channel = row['操作人']
batch = time.strftime("%Y-%m-%d",time.localtime())
url = str(df_price[df_price['序号'] == id_]['链接'].tolist())
#转换链接来源
url_source = []
for url_source_element in df_price[df_price['序号'] == id_]['来源(简称)'].tolist():
try:
url_source.append(price_source_dict[url_source_element])
except:
print(f"{url_source_element}无价格系统渠道对应")
return False
url_source = str(url_source)
url_price = str(df_price[df_price['序号'] == id_]['电子商城价'].tolist())
insert_data.append((sku,name,brand,category,url_price,url,url_source,url_price,channel,channel,channel,batch))
cursor_zi_service.executemany(f"insert into product_all (sku,name,brand,category,price,url,url_source,url_price,channel,channel_id,channel_alias,batch) \
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",insert_data)
mssql.Close()
mssql_new.Close()
#加载数据
def LoadData(batch,channel_alias,mode):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
if batch == "all":
if mode == 'deal':
cursor_zi_service.execute("select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute("select * from product_all")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
if mode == 'deal':
cursor_zi_service.execute(f"select id,sku,name,brand,category,params,price,channel_alias,batch from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
elif mode == 'collectDataConfirm':
cursor_zi_service.execute(f"select id,category,brand,sku,name,params,url,url_source,url_price,new_name,params_standard,zi_subcategoryname,zi_subcategorycode,zi_brandname,zi_brandcode,channel_alias,state from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
else:
cursor_zi_service.execute(f"select * from product_all where batch = '{batch}' and channel_alias = '{channel_alias}'")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df
#分析获取数据基本信息
def AnalyseBasicInfo(df):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#初始化参数
name_list = df['name'].tolist()
url_name_list = df['name'].tolist()
brand_list = df['brand'].tolist()
url_brand_list = df['brand'].tolist()
category_list = df['category'].tolist()
params_list = df['params'].tolist()
channelAlias_list = df['channel_alias'].tolist()
#获取预测类别结果
data = {
"category_info": {
"cust_name_list": name_list,
"url_name_list":url_name_list
}
}
res = zgc_api("GetPredictCategory",data)
df['predict_cat'] = res['category_list']
df['predict_cat_code'] = res['categoryCode_list']
#获取指数类别信息
data = {
"category_info": {
"cust_category_list": category_list,
"pre_category_list":df['predict_cat'].tolist()
}
}
res = zgc_api("GetZGCCategoryInfo",data)
df['zi_category'] = res['category_list']
df['zi_category_code'] = res['categoryCode_list']
#获取指数品牌信息
data = {
"brand_info": {
"cust_brand_list":brand_list,
"url_brand_list":url_brand_list,
"cust_name_list":name_list,
"url_name_list": url_name_list
}
}
res = zgc_api("GetZGCBrandInfo",data)
df['zi_brandname'] = res['brand_list']
df['zi_brandcode'] = res['brandId_list']
#获取型号信息
try:
params_list_ = [eval(params) for params in params_list]
except:
params_list_ = [{} for params in params_list]
_params_list_ = list()
for params in params_list_:
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
_params_list_.append({'认证型号':param_model})
elif '产品型号' in params.keys():
param_model = params['产品型号']
_params_list_.append({'产品型号':param_model})
elif '型号' in params:
param_model = params['型号']
_params_list_.append({'型号':param_model})
else:
_params_list_.append({})
else:
_params_list_.append({})
params_list_ = [str(params) for params in _params_list_]
data = {
"model_info": {
"channelAlias_list": channelAlias_list,
"cust_category_list":category_list,
"cust_name_list": name_list,
"url_name_list": url_name_list,
"cust_brand_list": brand_list,
"cust_params_list": params_list_
}
}
res = zgc_api("GetModelInfo",data)
df['model'] = res['model_list']
df['model_flag'] = res['modelFlag_list']
insert_data =list()
#实例化进度条
index_ = Index()
counter = 1
#更新基础信息
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
predict_category = row['predict_cat']
predict_category_code = row['predict_cat_code']
zi_subcategoryname = row['zi_category']
zi_subcategorycode = row['zi_category_code']
predict_result = 1 if predict_category == zi_subcategoryname else 0
zi_brandname = row['zi_brandname']
zi_brandcode = row['zi_brandcode']
#获取品牌对比结果
cust_name = str(row['name']).upper()
cust_brand = str(row['brand']).upper()
cn_name = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', zi_brandname))
en_name = (''.join(re.findall(r'[a-zA-Z0-9-]', zi_brandname))).upper()
if cn_name == '':
cn_name = '无数据'
if en_name == '':
en_name = '无数据'
if '新建品牌' in zi_brandname:
brand_match_result = '2'
elif (cn_name in cust_brand and cn_name in cust_name) or (en_name in cust_brand and en_name in cust_name):
brand_match_result = '1'
else:
brand_match_result = '0'
model = row['model']
modelFlag = row['model_flag']
_id = row['id']
insert_data.append((predict_category,predict_category_code,zi_subcategoryname,zi_subcategorycode,predict_result,zi_brandname,zi_brandcode,model,modelFlag,brand_match_result,_id))
cursor_zi_service.executemany("update product_all set predict_category = (%s) ,predict_category_code = (%s) ,zi_subcategoryname = (%s) ,zi_subcategorycode = (%s) ,predict_result = (%s) ,zi_brandname = (%s) ,zi_brandcode = (%s) ,model = (%s),model_flag = (%s),brand_match_result = (%s) where id = (%d)",insert_data)
mssql.Close()
#导出基础信息至excel
def ExportToExcelBasicConfirm(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select id,sku,name,brand,zi_brandname,zi_brandcode,category,zi_subcategoryname,zi_subcategorycode,model,model_flag,params,price,url,channel,productcode,remark from product_all where batch = '{batch}' and channel_alias = '{channel_alias}' and state is Null")
df = pd.DataFrame(cursor_zi_service.fetchall(), columns=[tuple[0] for tuple in cursor_zi_service.description])
df.to_excel(f"{channel_alias}基础信息确认{batch}.xlsx")
mssql.Close()
#更新基础信息确认后数据
def UpdateBasicData(path):
#读取Excel数据
df = pd.read_excel(path,sheet_name = "Sheet1",converters={'zi_brandcode':str,'zi_subcategorycode':str})
df['zi_brandcode'] = df['zi_brandcode'].apply(lambda x:str(x))
df['zi_subcategorycode'] = df['zi_subcategorycode'].apply(lambda x:str(x))
df['remark'] = df['remark'].apply(lambda x:str(x))
#创建数据库链接
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
#获取重点类列表
cursor_zi_new.execute("select category_name from important_category")
important_cat_list = pd.DataFrame((cursor_zi_new.fetchall()),columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#实例化进度条
index_ = Index()
counter = 1
#更新数据
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = row['id']
remark = row['remark']
if remark != 'nan':#驳回数据
cursor_zi_service.execute(f"update product_all set state = '2',remark = '{row['remark']}' where id = {id_}")
continue
zi_brandname = row['zi_brandname'].strip().replace("'","''").replace("[","").replace("]","")
zi_brandcode = row['zi_brandcode'].strip().replace("?","").replace("[","").replace("]","")
zi_subcategoryname = row['zi_subcategoryname'].strip()
zi_subcategorycode = row['zi_subcategorycode'].strip().replace("?","")
model = str(row['model']).strip()
if zi_brandcode == 'nan':
cursor_zi_new.execute(f"select id from p_brand where name = '{zi_brandname}'")
zi_brandcode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategorycode == 'nan' and zi_subcategoryname != '该类别非中电类别':
cursor_zi_new.execute(f"select id from p_category where name = '{zi_subcategoryname}'")
zi_subcategorycode = str(int(cursor_zi_new.fetchone()[0]))
if zi_subcategoryname in important_cat_list:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '1',model = '{model}' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',point_category_flag = '0',model = '{model}' where id = {id_}")
mssql_service.Close()
mssql_new.Close()
#获取重点类产品数据
def GetCollectData(batch,channel_alias):
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where (state is Null or state = '8') and batch = '{batch}' and channel_alias = '{channel_alias}'")
data = cursor_zi_service.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
mssql.Close()
return df_db
#解析重点类产品数据并导出
def GetCollectDataDetail(df,channel_alias,batch):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取属性规格项对应关系
cursor_zi_new.execute(f"select * from p_skusubtitle_out_map")
data = cursor_zi_new.fetchall()
subtitle_map_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data = cursor_zi_new.fetchall()
data_dict = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
'''
#依据品牌+型号获取ZOL参数信息(标准参数项信息)
zol_params_list = []
for index,row in df.iterrows():
print(index)
category = row['zi_subcategoryname']
brand = row['zi_brandname']
model = row['model']
data = {
"params_info": {
"category": category,
"brand": brand,
"model": model
}
}
res = zgc_api("GetZOLParamsInfo",data)
zol_params_list.append(res['params_dict'])
df['ZOL_params'] = zol_params_list
'''
#df['ZOL_params'] = []
writer = pd.ExcelWriter(f"{channel_alias}建库产品参数确认{batch}.xlsx")
index = 0
for category in df['zi_subcategoryname'].unique().tolist():
#获取每一个品类的dataframe
cat_df = df[df['zi_subcategoryname'] == category]
cat_df['remark'] = cat_df['remark'].apply(lambda x:str(x))
#获取这个类的属性规格项
single_subtitle_df = subtitle_df[subtitle_df['name'] == category]
#获取这个类的必填属性规格(CPU属性无需填写,系统在建库时自动给出)
requier_param_list = single_subtitle_df[single_subtitle_df['require_param'] == '1']['subtitle'].tolist()
#获取这个类的非必填
non_requier_param_list = []
for param in single_subtitle_df[single_subtitle_df['require_param'] != '1']['subtitle'].tolist():
if param in requier_param_list:
continue
else:
non_requier_param_list.append(param)
try:
requier_param_list.remove('CPU属性')
except:
pass
requier_param_list = [str(param) + "(*)" for param in requier_param_list]
param_list_all = requier_param_list + non_requier_param_list
id_list = list()
name_list = list()
zi_brandname_list = list()
zi_subcategoryname_list = list()
param_list = list()
value_list = list()
new_name_list = list()
for index,row in cat_df.iterrows():
id_ = row['id']
name = row['name']
zi_brandname = row['zi_brandname']
zi_subcategoryname = row['zi_subcategoryname']
for param in param_list_all:
id_list.append(id_)
name_list.append(name)
zi_brandname_list.append(zi_brandname)
zi_subcategoryname_list.append(zi_subcategoryname)
param_list.append(param)
value_list.append('')
new_name_list.append('')
export_df = pd.DataFrame()
export_df['id'] = id_list
export_df['name'] = name_list
export_df['zi_brandname'] = zi_brandname_list
export_df['zi_subcategoryname'] = zi_subcategoryname_list
export_df['param'] = param_list
export_df['value'] = value_list
export_df['new_name'] = new_name_list
category = category.replace('/','_')
export_df.to_excel(writer,f"{category}参数数据")
writer.save()
mssql.Close()
#校验建库参数数据,若有问题,添加数据字典
def CheckParamsData(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#加载库中参数项数据
cursor_zi_new.execute("select * from vw_property")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params = db_params[db_params['identy'] != '0000']
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
return_dict = dict()
complicated_data_dict_id_list = list()
flag = True
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
except:
continue
#实例化进度条
index_ = Index()
counter = 1
id_list = list()
params_list = list()
for id_ in cat_df['id'].unique().tolist():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
id_list.append(id_)
single_df = cat_df[cat_df['id'] == id_]
single_df['filter'] = ['1' if '(*)' in param else '0' for param in single_df['param'].tolist()]
single_df = single_df[single_df['filter'] == '1']
params_dict = dict(zip(single_df['param'].apply(lambda x:x.replace("(*)","")).tolist(),single_df['value'].tolist()))
params_list.append(params_dict)
#获取标准参数值
data ={
"params_info": {
"category": category,
"id_list": id_list,
"params_dict_list": params_list
}
}
res = zgc_api("GetZGCParamValuesInfo",data)
res_value_dict = res['paramsValue_dict']
return_dict.update(res_value_dict)
#实例化进度条
index_ = Index()
counter = 1
for id_ in res_value_dict.keys():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
for param in res_value_dict[id_].keys():
value,value_flag = tuple(res_value_dict[id_][param].items())[0]
if value_flag == False:
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{param}' and primitive = '{value}'")
check_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
flag = False
cursor_zi_new.execute(f"insert into ShuJuZiDian_Cfg (categoryname,subtitle,primitive) values ('{category}','{param}','{value}')")
cursor_zi_new.execute(f"select id from ShuJuZiDian_Cfg where categoryname = '{category}' and subtitle = '{param}' and primitive = '{value}'")
complicated_data_dict_id = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
complicated_data_dict_id_list.append(complicated_data_dict_id)
print(f"完成{category}参数校验")
if flag:
mssql.Close()
print("本批数据校验通过!,返回标准值字典")
return return_dict
else:
data_dict_path = path.split(".xlsx")[0] + '(数据字典补充).xlsx'
if len(complicated_data_dict_id_list) == 1:
_id = complicated_data_dict_id_list[0]
cursor_zi_new.execute(f"select * from ShuJuZiDian_Cfg where stdvalue is Null and id = {_id}")
else:
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg where stdvalue is Null and id in (%s)" % ','.join(['%s'] * len(complicated_data_dict_id_list)),tuple(complicated_data_dict_id_list))
fill_shujuzidian_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
fill_shujuzidian_df.to_excel(data_dict_path)
mssql.Close()
return {}
#上传数据字典补充数据
def ComplicatedDataDict(path):
#创建数据库链接
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
df = pd.read_excel(path)
for index,row in df.iterrows():
cursor_zi_new.execute(f"update ShuJuZiDian_Cfg set stdvalue = '{row['stdvalue']}',simplevalue = '{row['simplevalue']}' where id = {row['id']}")
print(index)
mssql.Close()
#记录标准参数项参数值
def save_collect_data_info(path,std_value_dict):
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
mssql = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#实例化进度条
index_ = Index()
counter = 1
for key in pd.read_excel(path,sheet_name = None):
category = key.replace("参数数据","")
cat_df = pd.read_excel(path,sheet_name = key)
if category in point_category_list:
for id_ in cat_df['id'].unique().tolist():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
subtitle_list = []
value_list = []
single_df = cat_df[cat_df['id'] == id_]
single_df['filter'] = ['1' if '(*)' in param else '0' for param in single_df['param'].tolist()]
single_df['param'] = single_df['param'].apply(lambda x:x.replace("(*)",""))
necessary_single_df = single_df[single_df['filter'] == '1']
unnecessary_single_df = single_df[single_df['filter'] == '0'].fillna("无该参数信息")
for index,row in necessary_single_df.iterrows():
id_ = str(id_)
params = row['param']
if params == 'CPU型号':
subtitle_list.append(params)
cpu_model = list(std_value_dict[id_][params].keys())[0]
value_list.append(cpu_model)
subtitle_list.append('CPU属性')
if '飞腾' in cpu_model or '龙芯' in cpu_model or '兆芯' in cpu_model:
pass
else:
cpu_model = cpu_model.split('-')[0]
value_list.append(cpu_model)
else:
subtitle_list.append(params)
value_list.append(list(std_value_dict[id_][params].keys())[0])
params_dict = str(dict(zip(subtitle_list + unnecessary_single_df['param'].tolist(),value_list + unnecessary_single_df['value'].tolist()))).replace("'","''")
cursor_zi_service.execute(f"update product_all set params_standard = '{params_dict}' where id = {id_}")
else:
for id_ in cat_df['id'].unique().tolist():
#print(index_(counter, len(cat_df['id'].unique().tolist())-1), end='%')
#counter += 1
subtitle_list = []
value_list = []
single_df = cat_df[cat_df['id'] == id_].fillna("无该参数信息")
single_df['param'] = single_df['param'].apply(lambda x:x.replace("(*)",""))
new_name = single_df['new_name'].unique().tolist()[0].replace("'","''")
params_dict = str(dict(zip(single_df['param'].tolist(),single_df['value'].tolist()))).replace("'","''")
cursor_zi_service.execute(f"update product_all set params_standard = '{params_dict}',new_name = '{new_name}' where id = {id_}")
mssql.Close()
mssql_service.Close()
print("完成记录标准参数项参数值")
def product_named(batch,channel_alias,std_value_dict):
collect_data_df = LoadData(batch,channel_alias,'collectDataConfirm')
#创建链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#获取重点类命名规则
cursor_zi_new.execute(f"select * from skuname_named_rule")
named_rules_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取重点类属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取重点类SKU数据
cursor_zi_new.execute(f"select * from vw_sku_params where categoryname in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
sku_db_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data_dict = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
for category in point_category_list:
cat_df = collect_data_df[collect_data_df['zi_subcategoryname'] == category]
if cat_df.empty:
continue
channel_list = cat_df['channel_alias'].unique().tolist()
for channel_alias in channel_list:
df = cat_df[cat_df['channel_alias'] == channel_alias]
cat_subtitle_df = subtitle_df[subtitle_df['name'] == category]#获取该类别的属性规格项数据
#cat_spu_db_df = spu_db_df[spu_db_df['categoryname'] == point_category]#获取该类别的所有SPU数据
cat_sku_db_df = sku_db_df[sku_db_df['categoryname'] == category]#获取该类别的所有SKU数据
cat_sku_db_df['skuname'] = cat_sku_db_df['skuname'].apply(lambda x :x.upper())
#获取这个类的数据字典
single_data_dict = data_dict[data_dict['categoryname'] == category]
single_data_dict['stdvalue'] = single_data_dict['stdvalue'].apply(lambda x : x.upper())
#该类别命名规则
named_rule = named_rules_df[named_rules_df['categoryname'] == category]['rule'].str.cat()
#spu_id = [] #若只匹配到SPU,则记录spuid,方便后续建库使用
#产品命名
for index,row in df.iterrows():
id_ = str(row['id'])
skuname = ''
brandname = row['zi_brandname']
for element in named_rule.split(" "):
if element == '品牌名称':
value = brandname
elif element == '类别名称':
value = category
elif element[0] == "(" and "/" in element:
element = element.replace("(","").replace(")","")
detail_element_list = element.split("/")
for i in range(len(detail_element_list)):
#value = cat_sku_df[(cat_sku_df['sku'] == sku) & (cat_sku_df['name'] == detail_element_list[i])]['value'].tolist()[0]
#获取标准参数值
param_ = detail_element_list[i]
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,detail_element_list[i],value.upper())
#if value:
# pass
#else:
# flag = False
# continue
if i == 0:
skuname += "(" + value + "/"
elif i == len(detail_element_list)-1:
skuname += value + ") "
else:
skuname += value + "/"
continue
elif element[0] == "(" and "/" not in element:
element = element.replace("(","").replace(")","")
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += "(" + value + ") "
continue
else:
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += value + " "
#if flag == False:
# continue
skuname = skuname.strip()
#处理名称中的空格
if category in ['复印纸','扫描仪']:
skuname = skuname
elif category in ['笔记本','台式机','一体电脑','碎纸机']:
skuname = dael_name_content(skuname,brandname)
elif category in ['复印机','多功能一体机','单反相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
elif category in ['投影机','激光打印机','空调']:
if category == '空调':
skuname_part1 = skuname.split("空调 (")[0].strip()
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.replace("中央空调","").replace("空调扇","").split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " + category + skuname_2
elif category in ['单电/微单相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " +category + skuname_2
print(skuname)
id_ = int(id_)
cursor_zi_service.execute(f"update product_all set new_name = '{skuname}' where id = {id_}")
mssql_new.Close()
mssql_service.Close()
def UpdateCollectDataConfirm(path):
#读取Excel数据
df = pd.read_excel(path,converters={'state':str})
#创建数据库链接
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
#更新数据
for index,row in df.iterrows():
try:
print(index_(counter, len(df)-1), end='%')
except:
print(index_(counter, 1), end='%')
counter += 1
id_ = int(row['id'])
state = row['state']
url = str(row['url']).strip().replace("'","''")
url_source = str(row['url_source']).strip().replace("'","''")
url_price = str(row['url_price']).strip()
new_name = row['new_name'].strip().replace("'","''")
zi_brandname = row['zi_brandname'].strip().replace("'","''").replace("[","").replace("]","")
zi_brandcode = str(row['zi_brandcode']).strip().replace("?","").replace("[","").replace("]","")
zi_subcategoryname = row['zi_subcategoryname'].strip()
zi_subcategorycode = str(row['zi_subcategorycode']).strip().replace("?","")
params_standard = row['params_standard'].strip().replace("'","''")
if state == '1':
cursor_zi_service.execute(f"update product_all set state = '8.5' where id = {id_}")
else:
cursor_zi_service.execute(f"update product_all set url = '{url}',url_source = '{url_source}',url_price = '{url_price}',zi_brandname = '{zi_brandname}',zi_brandcode = '{zi_brandcode}',zi_subcategoryname = '{zi_subcategoryname}',zi_subcategorycode = '{zi_subcategorycode}',new_name = '{new_name}',params_standard = '{params_standard}',state = '8.5' where id = {id_}")
mssql_service.Close()
def transform_simplevalue(cursor_zi_new,shujuzidiandf,categoryname,subtitle,stdvalue):
stdvalue = stdvalue.strip()
simple_subtitle_list = ['CPU型号','显存容量','操作系统','双面器','双面输稿器','网络打印','标配外服务及配件','标配外耗材','镜头描述','碎纸效果']
if subtitle not in simple_subtitle_list:
return stdvalue
simplevalue_list = list(set(shujuzidiandf[(shujuzidiandf['categoryname'] == categoryname) & (shujuzidiandf['subtitle'] == subtitle) & (shujuzidiandf['stdvalue'] == stdvalue)]['simplevalue'].tolist()))
if len(simplevalue_list) == 0:
print(f"非法值,不存在数据字典中。{subtitle},{stdvalue}")
return False
elif len(simplevalue_list) == 1:
if simplevalue_list[0] == '无简称':
#add_shujuzidian(categoryname,subtitle,stdvalue)
return " "
else:
return simplevalue_list[0]
else:
print(f"异常数据,具有多个简称。{subtitle},{stdvalue}")
return False
def dael_name_content(skuname,brand):
#处理产品名称中 括号里的内容
skuname = skuname.replace(brand,'')
name_head = skuname.split('(')[0]
name_tail = skuname.split('(')[1].split(')')[1]
name_content = skuname.split('(')[1].split(')')[0].split('/')
if (len(list(set(name_content))) == 1 and name_content[0] == ' ') or (len(list(set(name_content))) == 2 and ' ' in name_content and '/' in name_content):
if name_tail == '':
res = brand + name_head
else:
res = brand + name_head + " " + name_tail
else:
temp_content = ''
for element in name_content:
if element == ' ':
continue
else:
temp_content += element + '/'
temp_content = "(" + temp_content.strip('/') + ")"
if name_tail == '':
res = brand + name_head + temp_content
else:
if temp_content == '':
res = brand + name_head + " " + name_tail
else:
res = brand + name_head + temp_content + name_tail
return res.strip()
def create_to_db(path,std_value_dict):
#创建链接
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#获取重点类列表
cursor_zi_new.execute(f"select category_name from important_category")
point_category_list = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['category_name'].tolist()
#获取重点类命名规则
cursor_zi_new.execute(f"select * from skuname_named_rule")
named_rules_df = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取重点类属性规格项数据 并提取参数项自身属性(必填项、匹配项、标准项、配件项)
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['subtitle'] = subtitle_df['subtitle'].apply(lambda x: x.strip())
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
#获取重点类SKU数据
cursor_zi_new.execute(f"select * from vw_sku_params where categoryname in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
sku_db_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取数据字典
cursor_zi_new.execute("select * from ShuJuZiDian_Cfg")
data_dict = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
#获取spu价格黑名单
cursor_zi_price.execute("select spuid from spu_calculate_price where zc_price = 0 and ds_price = 0 and st_price = 0")
hmd_spuid_list = pd.DataFrame(cursor_zi_price.fetchall(), columns=[tuple[0] for tuple in cursor_zi_price.description])['spuid'].to_list()
new_df_combine_list = list()
for category in point_category_list:
try:
point_category = category.replace('/','_')
cat_df = pd.read_excel(path,sheet_name = f"{point_category}参数数据",converters = {'productcode':str})
channel_list = cat_df['channel_alias'].unique().tolist()
except:
continue
for channel_alias in channel_list:
df = cat_df[cat_df['channel_alias'] == channel_alias]
cat_subtitle_df = subtitle_df[subtitle_df['name'] == category]#获取该类别的属性规格项数据
#cat_spu_db_df = spu_db_df[spu_db_df['categoryname'] == point_category]#获取该类别的所有SPU数据
cat_sku_db_df = sku_db_df[sku_db_df['categoryname'] == category]#获取该类别的所有SKU数据
cat_sku_db_df['skuname'] = cat_sku_db_df['skuname'].apply(lambda x :x.upper())
#获取这个类的数据字典
single_data_dict = data_dict[data_dict['categoryname'] == category]
single_data_dict['stdvalue'] = single_data_dict['stdvalue'].apply(lambda x : x.upper())
#该类别命名规则
named_rule = named_rules_df[named_rules_df['categoryname'] == category]['rule'].str.cat()
new_product_flag_list = []#新建产品标识符号 1:需要创建的产品 0:不需要创建的产品
computer_cat = ['笔记本','台式机','一体电脑']#整机类 需要进行匹配
#spu_id = [] #若只匹配到SPU,则记录spuid,方便后续建库使用
df['remark'] = df['remark'].apply(lambda x:str(x))
df['productcode'] = df['productcode'].apply(lambda x:str(x))
#产品命名
new_name_list = []
sku_list = list()
spuid_list = list()
for index,row in df.iterrows():
if (row['remark'] != 'nan' and row['remark'] != 'None') or (row['productcode'] != 'nan' and row['productcode'] != 'None'):
new_name_list.append(" ")
continue
id_ = str(row['id'])
skuname = ''
brandname = row['zi_brandname']
for element in named_rule.split(" "):
if element == '品牌名称':
value = brandname
elif element == '类别名称':
value = category
elif element[0] == "(" and "/" in element:
element = element.replace("(","").replace(")","")
detail_element_list = element.split("/")
for i in range(len(detail_element_list)):
#value = cat_sku_df[(cat_sku_df['sku'] == sku) & (cat_sku_df['name'] == detail_element_list[i])]['value'].tolist()[0]
#获取标准参数值
param_ = detail_element_list[i]
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,detail_element_list[i],value.upper())
#if value:
# pass
#else:
# flag = False
# continue
if i == 0:
skuname += "(" + value + "/"
elif i == len(detail_element_list)-1:
skuname += value + ") "
else:
skuname += value + "/"
continue
elif element[0] == "(" and "/" not in element:
element = element.replace("(","").replace(")","")
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += "(" + value + ") "
continue
else:
param_ = element
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
value = transform_simplevalue(cursor_zi_new,single_data_dict,category,element,value.upper())
#if value:
# pass
#else:
# flag = False
# continue
skuname += value + " "
#if flag == False:
# continue
skuname = skuname.strip()
#处理名称中的空格
if category in ['复印纸','扫描仪']:
skuname = skuname
elif category in ['笔记本','台式机','一体电脑','碎纸机']:
skuname = dael_name_content(skuname,brandname)
elif category in ['复印机','多功能一体机','单反相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
elif category in ['投影机','激光打印机','空调']:
if category == '空调':
skuname_part1 = skuname.split("空调 (")[0].strip()
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.replace("中央空调","").replace("空调扇","").split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " + category + skuname_2
elif category in ['单电/微单相机']:
skuname_part1 = skuname.split(category)[0]
skuname_part2 = skuname.split(category)[1]
skuname_1 = dael_name_content(skuname_part1,brandname)
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_1 + " " + category + skuname_2
else:
skuname_part1 = skuname.split(category)[0].strip()
skuname_part2 = skuname.split(category)[1]
skuname_2 = dael_name_content(skuname_part2,brandname).replace(brandname,"")
skuname = skuname_part1 + " " +category + skuname_2
print(skuname)
new_name_list.append(skuname)
df['new_name'] = new_name_list
for index,row in df.iterrows():
if row['remark'] != 'nan' and row['remark'] != 'None':#驳回数据
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '2',new_name = '{row['new_name']}',remark = '{row['remark']}' where id = {row['id']}")
elif row['productcode'] != 'nan' and row['productcode'] != 'None':
new_product_flag_list.append('0')
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{row['productcode']}' where id = {row['id']}")
else:
newname = row['new_name']
#匹配数据(整机类进行匹配)
if cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()].empty:#未匹配上数据
new_product_flag_list.append('1')
cursor_zi_service.execute(f"update product_all set state = '2',remark = '未匹配上产品',new_name = '{newname}' where id = {row['id']}")
else:#匹配上数据
productcode = cat_sku_db_df[cat_sku_db_df['skuname'] == row['new_name'].upper()]['sku'].tolist()[0]
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{productcode}',new_name = '{newname}',remark = Null,pic_state = '0' where id = {row['id']}")
new_product_flag_list.append('0')
print('完成名称匹配')
df['new_product_flag'] = new_product_flag_list
new_df = df[df['new_product_flag'] == '1']#需要新建的产品
if category in computer_cat:
cpu_attr_list = []
for cpu in new_df['CPU型号'].tolist():
if '飞腾' in cpu or '龙芯' in cpu or '兆芯' in cpu:
pass
else:
cpu = cpu.split('-')[0]
cpu_attr_list.append(cpu)
new_df['CPU属性'] = cpu_attr_list
else:
continue
#新建产品
print(f"开始新建产品,共计:'{len(new_df)}'件产品")
index_ = 0
delete_index_list = list()
for index,row in new_df.iterrows():
id_ = str(row['id'])
brand = row['zi_brandname'].strip()
brandcode = row['zi_brandcode'] # 建库需要的品牌编码
#创建SPU 获取SPUid
name = str(brand)
for spu_param in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].sort_values("Expr1")['subtitle'].tolist():
if spu_param == '产品品牌':
continue
else:
name += " "
name += str(row[spu_param])
name = name + " " +str(row['zi_subcategoryname'])#spu名称
spu = str(uuid.uuid1()).replace('-','')#spu编码
category_code = int(str(row['zi_subcategorycode']).replace("?",""))#类别编码
brand_code = brandcode#品牌编码
cursor_zi_new.execute(f"select id from p_spu where categoryid = {category_code} and brandid = {brand_code} and spuname = '{name}'")
data = cursor_zi_new.fetchall()
spu_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False #是否处理参数
if spu_df.empty:
#cursor_zi_new.execute(f"insert into p_spu (spuname,spu,categoryid,brandid) values ('{name}','{spu}',{category_code},{brand_code})")
#param_deal_flag = True
delete_index_list.append(index)
continue
else:
spuid = spu_df['id'].tolist()[0]
if spuid in hmd_spuid_list:
delete_index_list.append(index)
continue
spuid_list.append(spuid)
#cursor_zi_service.execute(f"update product_all_original_list set spuid = {spuid} where id = {row['id']}")
if param_deal_flag:
#创建SPU属性
for index,spu_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'spu'].iterrows():
subtitle = spu_param_row['subtitle'].strip()
subtitleid = spu_param_row['subtitleid']
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
cursor_zi_new.execute(f"select a.valueid from p_valuemap a \
left join p_value b \
on a.valueid = b.id \
left join p_subtitle c \
on b.subtitleid = c.id \
where a.spuid = {spuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_valuemap where spuid = {spuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_value where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_valuemap where spuid = {spuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_value (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_value where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_valuemap (spuid,valueid) values ({spuid}, {valueid})")
#创建SKU
skuname = str(row['new_name']).strip()
sku = datetime.datetime.now().strftime("%Y-%m-%d%H:%M:%S.%f").replace('-','').replace(':','').replace('.','')[:-3]
time.sleep(0.001)
#spuid = int(row['spuid'])
state = 1
source = '1'
createddate = datetime.datetime.now().strftime("%Y-%m-%d %X")
cursor_zi_new.execute(f"select sku from p_sku where skuname = '{skuname}' and spuid = {spuid}")
data = cursor_zi_new.fetchall()
sku_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
param_deal_flag = False#是否处理参数
if sku_df.empty:
cursor_zi_new.execute(f"insert into p_sku (skuname,sku,spuid,state,source,createddate) values ('{skuname}','{sku}',{spuid},{state},'{source}','{createddate}')")
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku}',new_name = '{row['new_name']}',remark = Null,pic_state = '1' where id = {row['id']}")
param_deal_flag = True
else:
cursor_zi_service.execute(f"update product_all set state = '9',productcode = '{sku_df['sku'].tolist()[0]}',new_name = '{row['new_name']}',remark = Null,pic_state = '0' where id = {row['id']}")
sku = sku_df['sku'].tolist()[0]
cursor_zi_new.execute(f"select id from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
skuid = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0] #skuid
sku_list.append(sku)
if param_deal_flag:
#录入产品详细参数
for index,sku_param_row in cat_subtitle_df[cat_subtitle_df['skuorspu'] == 'sku'].iterrows():
subtitle = sku_param_row['subtitle'].strip()
subtitleid = sku_param_row['subtitleid']
if subtitle == '产品品牌':
continue
try:
param_ = subtitle
value,value_flag = tuple(std_value_dict[id_][param_].items())[0]
except:
continue
cursor_zi_new.execute(f"select a.valueid from p_skuvaluemap a \
left join p_skuvalue b \
on a.valueid = b.id \
left join p_skusubtitle c \
on b.subtitleid = c.id \
where a.skuid = {skuid} and c.name = '{subtitle}'")
data = cursor_zi_new.fetchall()
valueid_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if valueid_df.empty:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"select id from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
data = cursor_zi_new.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
if check_df.empty:
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
valueid_list = valueid_df['valueid'].tolist()
if len(valueid_list) == 1:
valueid = valueid_list[0]
cursor_zi_new.execute(f"select value from p_skuvalue where id = {valueid}")
find_value = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['value'].tolist()[0]
if value == find_value:
continue
else:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
else:
for valueid in valueid_list:
cursor_zi_new.execute(f"delete from p_skuvaluemap where skuid = {skuid} and valueid = {valueid}")
#找一个合适的valueid
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
try:
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
except:
cursor_zi_new.execute(f"insert into p_skuvalue (subtitleid,value) values ({subtitleid}, '{value}')")
cursor_zi_new.execute(f"select id from p_skuvalue where subtitleid = {subtitleid} and value = '{value}'")
valueid = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])['id'].tolist()[0]
cursor_zi_new.execute(f"insert into p_skuvaluemap (skuid,valueid) values ({skuid}, {valueid})")
index_+=1
print(f"已处理完'{index_}'件产品")
print(f"完成{channel_alias}{category}数据建库")
#计算价格
new_df = new_df.drop(index= delete_index_list)
new_df['sku'] = sku_list
new_df['spuid'] = spuid_list
export_df = new_df[['id','sku','spuid','zi_subcategoryname','zi_brandname','new_name','channel_alias']]
new_df_combine_list.append(export_df)
new_df_combine = pd.concat(new_df_combine_list)
mssql_new.Close()
mssql_price.Close()
mssql_service.Close()
return new_df_combine
def check_configure(new_df_all):
category_list = new_df_all['zi_subcategoryname'].unique().tolist()
for category in category_list:
new_df = new_df_all[new_df_all['zi_subcategoryname'] == category]
sku_list = new_df['sku'].unique().tolist()
#录入新增产品配件差异价格
check_configure_price(category,sku_list)
print("检查完毕")
def get_data_all():
#创建数据库链接
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and (cal_price is Null or new_name is Null)")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
#df_point = df[df['point_category_flag'] == '1']
#df_unpoint = df[df['point_category_flag'] == '0']
mssql.Close()
return df
def get_price(df):
#创建链接
mssql_index = MSSQL('123.56.115.207','zdindex')
cursor_zdindex = mssql_index._cur
mssql_price = MSSQL('123.57.45.119','price_calculate')
cursor_zi_price = mssql_price._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = str(row['productcode']).strip()
#cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods ='20200824' and goods_id = '{sku}'")
cursor_zdindex.execute(f"select index_price_wave from zd_week_price where periods in (select top 1 max(periods) from zd_entry_goods_price) and goods_id = '{sku}'")
data = cursor_zdindex.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zdindex.description])
if check_df.empty:
cursor_zi_price.execute(f"select sku_price from sku_calculate_price where sku = '{sku}'")
data = cursor_zi_price.fetchall()
check_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_price.description])
if check_df.empty:
cal_price = "无法获取价格,请核查"
else:
cal_price = check_df['sku_price'].tolist()[0]
else:
cal_price = check_df['index_price_wave'].tolist()[0]
cursor_zi_service.execute(f"update product_all set cal_price = '{cal_price}',productcode='{sku}' where id = {row['id']}")
mssql_index.Close()
mssql_price.Close()
mssql_service.Close()
def get_name(df):
mssql_new = MSSQL('123.56.115.207','ZI_NEW')
cursor_zi_new = mssql_new._cur
mssql_service = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql_service._cur
#实例化进度条
index_ = Index()
counter = 1
for index,row in df.iterrows():
print(index_(counter, len(df)-1), end='%')
counter += 1
sku = row['productcode']
cursor_zi_new.execute(f"select skuname from p_sku where sku = '{sku}'")
data = cursor_zi_new.fetchall()
try:
name = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])['skuname'].tolist()[0]
except:
name = "无法获取产品名称"
cursor_zi_service.execute(f"update product_all set new_name = '{name}',remark = Null where id = {row['id']}")
mssql_new.Close()
mssql_service.Close()
def save_sku_relationship(filter,channel_alias):
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
cursor_zi_service.execute(f"select * from product_all where state = '9' and channel_alias = '{channel_alias}' and batch = '{filter}'")
data = cursor_zi_service.fetchall()
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_service.description])
cust_sku_list = df['sku'].tolist()
sku_list = df['productcode'].tolist()
channel_alias_list = df['channel_alias'].tolist()
data = {
"params_info": {
"cust_sku_list": cust_sku_list,
"sku_list": sku_list,
"channelAlias_list": channel_alias_list
}
}
res = zgc_api("Stock-InSkuRelationshipInfo",data)
print(res)
mssql.Close()
#建库模板路径
path = '/Users/rico/project/模板建库v2/历史数据/20201202/路桥建库模板12.2.xlsx'
#初始化数据
InitializeData(path)
#初始化参数
channel_alias = 'CL-MBJK'
batch = '2020-12-02'
#加载数据
df = LoadData(batch,channel_alias,'deal')
#分析获取数据基本信息(品牌,类别,型号)
AnalyseBasicInfo(df)
#导出数据(导出文件至当前文件执行路径)
ExportToExcelBasicConfirm(batch,channel_alias)
#返回数据并更新
path = "/Users/rico/project/模板建库v2/CL-MBJK基础信息确认2020-12-02.xlsx"
UpdateBasicData(path)
#获取建库数据
df = GetCollectData(batch,channel_alias)
#导出建库数据参数补充
GetCollectDataDetail(df,channel_alias,batch)
#处理建库数据
path = "/Users/rico/project/模板建库v2/CL-MBJK建库产品参数确认2020-12-02.xlsx"
##校验参数数据
std_value_dict = CheckParamsData(path)
##导入补充的数据字典
data_dict_path = "/Users/rico/project/模板建库v2/CL-MBJK建库产品参数确认2020-11-20(1)(数据字典补充).xlsx"
ComplicatedDataDict(data_dict_path)
if std_value_dict:
##记录建库信息(标准参数项参数值)
save_collect_data_info(path,std_value_dict)
##产品名称
product_named(batch,channel_alias,std_value_dict)
#导出建库前确认数据
df = LoadData(batch,channel_alias,'collectDataConfirm')
df.to_excel(f"{channel_alias}建库信息确认{batch}.xlsx")
#建库信息结果确认更新
path = '/Users/rico/project/模板建库v2/CL-MBJK建库信息确认2020-12-02.xlsx'
UpdateCollectDataConfirm(path)
df = pd.read_excel(path)
mssql = MSSQL('123.57.45.119','ZI_Service')
cursor_zi_service = mssql._cur
#入库
id_list = df['id'].tolist()
source_name_list = df['new_name'].tolist()
price_list = df['url_price'].apply(lambda x:eval(x)[0]).tolist()
url_list = df['url'].apply(lambda x:eval(x)[0]).tolist()
channelId_list = df['url_source'].apply(lambda x:eval(x)[0]).tolist()
brand_list = df['zi_brandname'].tolist()
brandId_list = df['zi_brandcode'].apply(lambda x:str(x)).tolist()
category_list = df['zi_subcategoryname'].tolist()
categoryId_list = df['zi_subcategorycode'].apply(lambda x:str(x)).tolist()
params_list = df['params_standard'].apply(lambda x: eval(x)).tolist()
name_list = df['new_name'].apply(lambda x: x.replace("'","''")).tolist()
data = {
"params_info": {
"brand_list": brand_list,
"brandId_list": brandId_list,
"category_list": category_list,
"categoryId_list": categoryId_list,
"params_list": params_list,
"name_list": name_list
}
}
res = zgc_api("Stock-InProductInfo",data)
sku_list = res['sku_list']
#更新product_all表
for _id,sku in zip(id_list,sku_list):
cursor_zi_service.execute(f"update product_all set productcode = '{sku}',remark = Null,state = '9' where id = {_id}")
#价格关系入库
data = {
"params_info": {
"sku_list": sku_list,
"url_name_list": source_name_list,
"url_price_list":price_list,
"url_list": url_list,
"channelId_list": channelId_list
}
}
res = zgc_api("Stock-InPriceInfo",data)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 9 23:37:15 2020
@author: rico
"""
import pymssql
import pandas as pd
import os
import requests
import tensorflow as tf
from lxml import etree
import re
import datetime
import json
import time
#from text_moudle.run_cnn import name2subcategory as generl_name2subcategory
#from text_moudle_LXWL.run_cnn import name2subcategory as LXWL_name2subcategory
#print (os.getcwd())
class Index(object):
def __init__(self, number=50, decimal=2):
"""
:param decimal: 你保留的保留小数位
:param number: # 号的 个数
"""
self.decimal = decimal
self.number = number
self.a = 100/number # 在百分比 为几时增加一个 # 号
def __call__(self, now, total):
# 1. 获取当前的百分比数
percentage = self.percentage_number(now, total)
# 2. 根据 现在百分比计算
well_num = int(percentage / self.a)
# print("well_num: ", well_num, percentage)
# 3. 打印字符进度条
progress_bar_num = self.progress_bar(well_num)
# 4. 完成的进度条
result = "\r%s %s" % (progress_bar_num, percentage)
return result
def percentage_number(self, now, total):
"""
计算百分比
:param now: 现在的数
:param total: 总数
:return: 百分
"""
return round(now / total * 100, self.decimal)
def progress_bar(self, num):
"""
显示进度条位置
:param num: 拼接的 “#” 号的
:return: 返回的结果当前的进度条
"""
# 1. "#" 号个数
well_num = "#" * num
# 2. 空格的个数
space_num = " " * (self.number - num)
return '[%s%s]' % (well_num, space_num)
def match_sku(sku_list,frm):
'''
对供应商提供的链接进行排重
frm : DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
'''
#sku_list = ['11867713605']
#frm = 'SN'
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
conn_zi_database = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database='ZI_DataBase',autocommit=True)
cursor_zi_database = conn_zi_database.cursor()
#print('正在删除库中产品状态为6的sku,稍等。。。')#首先将库中错误状态产品的JD/SN/GM的SKU删除
#cursor.execute("DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')")
#print('正在删除库中重复的SKU,稍等。。。')#需先删除库中重复的SKU,避免匹配相同的编码
'''
cursor.execute("declare OperCursor Cursor for\
SELECT productcode,sku,frm,count(*) ca FROM productcode_sku\
GROUP BY productcode,sku,frm\
HAVING COUNT(*)>1\
open OperCursor\
declare @PRODUCTCODE as nvarchar(20)\
declare @sku as nvarchar(50)\
declare @frm as nvarchar(20)\
declare @ca as int\
declare @return_value as int\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
\
while @@fetch_status=0\
begin\
fetch next from OperCursor into @PRODUCTCODE,@sku,@frm,@ca\
IF @@fetch_status=0\
delete from productcode_sku\
where id in (select top (@ca-1) id from productcode_sku\
where productcode=@PRODUCTCODE and sku=@sku and frm=@frm)\
end\
close OperCursor\
deallocate OperCursor")
conn.commit()
'''
print('正在获取当前库中所有sku,请稍等.....')#获取库中所有的SKU,以供检查
try:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm in {frm} and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
except:
get_all_sku = f"select productcode,sku from Productcode_Sku where frm='{frm}' and productcode not like '78%'"
cursor.execute(get_all_sku)
data_sku = (cursor.fetchall())
data_sku_file = pd.DataFrame(data_sku,columns=['productcode','sku'])
#获取op_productcode_sku
if frm == 'SN':
frm = 'SN-NEW'
get_all_sku = f"select productcode,sku from op_product_sku where frm='{frm}' and productcode not like '78%' "
cursor_zi_database.execute(get_all_sku)
data_sku = (cursor_zi_database.fetchall())
data_sku_file_op = pd.DataFrame(data_sku,columns=['productcode','sku'])
data_sku_file = pd.concat([data_sku_file,data_sku_file_op]).drop_duplicates()
data_sku_file['productcode'] = data_sku_file['productcode'].apply(lambda x:x.strip())
data_sku_file['sku'] = data_sku_file['sku'].apply(lambda x:x.strip())
print('sku获取完毕')
conn.close()
cursor.close()
conn_zi_database.close()
cursor_zi_database.close()
sku_check = {}
for sku in set(sku_list):
sku = sku.strip()
#print(sku)
#sku = 100004460761
if str(sku) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
if frm == 'SN' or frm == 'SN-NEW':
sku_add = '0000000000/'+ str(sku)
if str(sku_add) in list(data_sku_file['sku']):
try:
product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku_add) == sku_source]
sku_check[f'{sku}'] = product_no[0]
except:
sku_check[f'{sku}'] = '2' #匹配上多个SKU,属于问题数据
else:
sku_check[f'{sku}'] = '0'
else:
sku_check[f'{sku}'] = '0'
return sku_check
def supporturlDataDeal(source,batch):
'''
对反爬的数据进行审核排重
:return:
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'reverse_data',autocommit=True)
cursor = conn.cursor()
cursor.execute(f'select * from product where channel_url_validate is Null and 来源={source} and batch={batch}')
data = (cursor.fetchall())
df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])[['product_id', 'channel_sku', 'product_name', 'channel_id', 'channel_product_id']]
df.rename(columns={'product_id': 'id', 'channel_sku': 'sku', 'product_name': 'name', 'channel_id': 'source','channel_product_id': 'url'}, inplace=True)
df['price'] = '0'
df_otherDS = df[(df['source'] != 'JD') & (df['source'] != 'GM') & (df['source'] != 'SN')]
for id_ in df_otherDS['id'].tolist():
cursor.execute(f"update product set channel_url_validate='通过',zgc_productcode='无' where product_id='{id_}'")
df_DS = df[(df['source'] == 'JD') | (df['source'] == 'GM') | (df['source'] == 'SN')].reset_index(drop=True)
# 审核三大电商
check_data = check_reverse_data(df_DS)
for id_, suggestion in zip(check_data['update_id'], check_data['审核意见']):
cursor.execute(f"update product set channel_url_validate='{suggestion}' where product_id='{id_}'")
# 三大电商SKU与库内排重
for source in list(df_DS['source'].unique()):
print(source + 'sku排重中')
sku_list = df_DS[df_DS['source'] == source]['sku'].tolist()
sku_check = match_sku(sku_list, source)
for k, v in sku_check.items():
if v == '0':
cursor.execute(f"update product set zgc_productcode='无' where channel_sku='{k}'")
else:
cursor.execute(f"update product set zgc_productcode='{v}' where channel_sku='{k}'")
print(source + 'sku排重完毕')
conn.close()
def check_reverse_data(check_data):
'''
审核爬到的数据
:param check_data: 需要审核的数据
:return: 审核完毕的数据
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
cursor.execute('select stop_word,white_word from Stopwords')
data = (cursor.fetchall())
word_df = pd.DataFrame(data, columns=['stopword', 'whiteword'])
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
if check_data.empty:
print('今日无新增数据')
return check_data
print('共' + str(len(check_data)) + '条数据待审核')
result = []
id_all = []
sku_list = []
name_list = []
url_list = []
source_list = []
price_list = []
for i in range(len(check_data)):
df = check_data.loc[i]
date_id = df['id']
sku = df['sku']
name = df['name']
main_url = df['url'].strip()
source = df['source']
price = df['price']
brand = df['brand']
subcategory = df['category']
print(main_url)
if "jd" in str(main_url):
try:
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = get_response(session, url, headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
jd_price = price
if jd_price == '-1.00':
jd_price = price
price_list.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
price_list.append(jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in is_purchase['stock'][
'StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "gome" in str(main_url):
try:
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
price_list.append(gm_price)
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = get_response(session, url, headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "suning" in str(main_url):
try:
# main_url = 'http://product.suning.com/0000000000/10643583782.html'
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = get_response(session, main_url_, headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = price
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = get_response(session, url_json, headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1':
if '此款有货' in str(youhuo_):
flag = False
for stopword, whitewords in zip(list(word_df['stopword']), list(word_df['whiteword'])):
try:
whiteword_list = whitewords.split('/')
print(whiteword_list)
except:
whiteword_list = []
for whiteword in whiteword_list:
if stopword in str(name) and whiteword not in str(name):
flag = True
break
if flag:
print("定制/专用/书籍类产品暂不通过")
result.append("定制/专用/书籍类产品暂不通过")
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
str_price = html.xpath("//a[@id='addCart2']/@sa-data")
str1 = re.findall("'prdid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str2 = re.findall("'shopid':'(.*?)','", str(str_price))[0] # 为了拼接所需要的url,需要三个字段
str3 = html.xpath("//input[@name='procateCode']/@value")[0] # 为了拼接所需要的url,需要三个字段
real_url = f'https://pas.suning.com/nspcsale_0_{str1}_{str1}_{str2}_10_010_0100100_157122_1000000_9017_10106_Z001___{str3}.html?callback=pcData'
try:
price_response = requests.get(real_url, timeout=5)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
except:
sn_price = price
if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result.append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result.append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
#if len(sn_price) != 0:
price_list.append(sn_price)
print('通过')
result. append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
sn_price = price
price_list.append(sn_price)
print('该地区不销售(北京市丰台区)')
result. append('该地区不销售(北京市丰台区)')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
'''
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
sn_price = price
result.append('无货,请按要求提供在销渠道证明')
price_list.append(sn_price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('非自营,请按要求提供在销渠道证明')
result.append('非自营,请按要求提供在销渠道证明')
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非三大电商,请按要求提供在销渠道证明")
result.append("非三大电商,请按要求提供在销渠道证明")
price_list.append(price)
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
print(f"已经处理{i + 1}条数据")
check_data['审核意见'] = result
check_data['update_id'] = id_all
check_data['sku'] = sku_list
check_data['name'] = name_list
check_data['url'] = url_list
check_data['source'] = source_list
check_data['price'] = price_list
conn.close()
return check_data
def get_response(session, url, headers):
'''
deal timeout request
'''
network_status = True
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
network_status = False
if network_status == False:
'''timeout'''
for i in range(1, 10):
print('请求超时,第%s次重复请求' % i)
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
continue
return -1
def check_data(fileName,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='zi_zh',autocommit=True)
cursor = conn.cursor()
reverse_conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='reverse_data',autocommit=True)
reverse_cursor = reverse_conn.cursor()
result = []
price = []
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
cursor.execute("select stop_word,white_word from stopwords")
stopwords = (cursor.fetchall())
word_list = pd.DataFrame(stopwords, columns=['stopword', 'white_word'])
#获取需要审核的政采的供应商的id
#cursor.execute("select id from users where frm is not null and frm != ''")
#need_check_id_from_zc = [i[0] for i in cursor.fetchall()]
# 审核链接
for i in range(len(fileName)):
df = fileName.loc[i]
if 'otherurl' in df.index.tolist():
if df['otherurl']:
try:
otherurl = eval(df['otherurl'])[0].strip()
except:
otherurl = df['otherurl'].strip()
else:
otherurl = ''
elif 'supporturl' in df.index.tolist():
other = eval(df['supporturl'].strip())
#other = df['supporturl'].strip()
if isinstance(other,list) and len(other) != 0:
otherurl = other[0]
else:
otherurl = other
else:
otherurl = ''
print('该渠道为API流程中的没有提供其他链接的渠道,其product_all表既没有other字段,也没有supporturl字段')
if df['url']:
try:
main_url = eval(df['url'])[0].strip()
except:
main_url = df['url'].strip()
else:
main_url = otherurl
if "jd" not in str(main_url) and "gome" not in str(main_url) and "suning" not in str(main_url):
main_url = otherurl
# if len(main_url) == 0 and len(otherurl) != 0:
# main_url = otherurl
print(f'开始处理:{main_url}')
if len(main_url) == 0:
print("其他销售渠道证明")
result.append("其他销售渠道证明")
price.append("其他销售渠道价格")
elif "jd" in str(main_url) or "gome" in str(main_url) or "suning" in str(main_url):
if "jd" in str(main_url):
if "i-item" in str(main_url):
jd_price = df['price']
price.append(jd_price)
result.append("通过")
else:
try:
try_ = session.get(main_url, headers=headers)
sku = main_url.split('/')[-1].split('.')[0]
# 获取价格
url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
r = session.get(url, headers=headers).json()
jd_price = r[0]['p']
if len(jd_price) == 0:
print('未获取到价格,使用供应商提交价格!!!')
jd_price = df['price']
if jd_price == '-1.00':
jd_price = df['price']
price.append(jd_price)
result.append("无货,请按要求提供在销渠道证明")
else:
print('价格为:', jd_price)
# 获取其他信息
main_url_ = "https://item.jd.com/" + sku + ".html"
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath("//div[@class='sku-name']/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or
# ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name))
# or ("军迷"in str(name)) or ("携行具"in str(name)) or ("酒"in str(name) and "酒精" not in str(name))
# or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name))
# or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name))
# or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name))
# or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
# name = "地图"
pass_word_jd = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_jd = '暂通过'
else:
pass_word_jd = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_jd = '暂通过'
if pass_word_jd == '暂通过':
# 获取库存信息
url = "https://c0.3.cn/stock?skuId=" + str(
sku) + "&area=1_2901_2906_0&cat=9987,653,655"
r = get_response(session, url, headers)
if r == -1:
print("通过")
result.append("通过")
else:
r.encoding = 'gbk'
is_purchase = json.loads(r.text)
try:
if "无货" in is_purchase['stock']['stockDesc'] or "无货" in \
is_purchase['stock']['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
except:
if "无货" in is_purchase['StockStateName']:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(jd_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "gome" in str(main_url):
# 获取价格
try:
try_ = session.get(main_url, headers=headers)
main_url_1 = re.findall(".cn/(.*?).html", main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
# if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("酒"in str(name) and "酒精" not in str(name)) or ("军用" in str(name)) or ("作战" in str(name)) or ("部队" in str(name)) or ("迷彩" in str(name)) or ("军队" in str(name)) or ("陆军" in str(name)) or ("海军" in str(name)) or ("空军" in str(name)) or ("火箭军" in str(name)) or ("涉密" in str(name)) or ("保密" in str(name)) or ("补给单元" in str(name)) or ("书籍" in str(name)) or ("出版物" in str(name)) or ("地图" in str(name)):
pass_word_gm = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_gm = '暂通过'
else:
pass_word_gm = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_gm = '暂通过'
if pass_word_gm == '暂通过':
# print("定制/专用/含禁止上架关键词,产品暂不通过")
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# else:
# 获取库存信息
sku = main_url_.split('.html')[0].split('/')[-1].replace('-', '/')
url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/" + str(
sku) + "/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
r = session.get(url, headers=headers)
content = r.text.replace('allStores(', '')
content = content.replace(')', '')
content = json.loads(content)
wuhuo = content['result']['stock']['status']
if wuhuo == False:
print("无货,请按要求提供在销渠道证明")
result.append("无货,请按要求提供在销渠道证明")
else:
print("通过")
result.append("通过")
else:
pass
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(gm_price)
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
elif "suning" in str(main_url):
# sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
# main_url = 'https://product.suning.com/0000000000/11768660427.html?safp=d488778a.13701.productWrap.2&safc=prd.3.ssdsn_pic01-1_jz'
print(f'苏宁:{main_url}')
try:
try_ = session.get(main_url, headers=headers)
sku = re.findall(".com/(.*?).html", main_url)[0]
main_url_ = 'https://product.suning.com/' + sku + '.html'
r = session.get(main_url_, headers=headers)
html = etree.HTML(r.text)
daaa = r.text
sn_price = df.price
# sn_price = '58.00-558.00'
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i", daaa)
if "自营" in ziying1 or "自营" in ziying2:
daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = session.get(url_json, headers=headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
name = itemDetail["cmmdtyTitle"]
if isPublished == '1' or isPublished == '0':
if '此款有货' in str(youhuo_):
state = '有货'
# if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
# or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
# or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
# or ("军迷"in str(product_name)) or ("携行具"in str(product_name)) or ("酒"in str(product_name) and "酒精" not in str(product_name)) or ("军用" in str(product_name)) or ("作战" in str(product_name)) or ("部队" in str(product_name)) or ("迷彩" in str(product_name)) or ("军队" in str(product_name)) or ("陆军" in str(product_name)) or ("海军" in str(product_name)) or ("空军" in str(product_name)) or ("火箭军" in str(product_name)) or ("涉密" in str(product_name)) or ("保密" in str(product_name)) or ("补给单元" in str(product_name)) or ("书籍" in str(product_name)) or ("出版物" in str(product_name)) or ("地图" in str(product_name)):
# result.append("定制/专用/含禁止上架关键词,产品暂不通过")
# price.append(sn_price)
pass_word_sn = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
# print (sw,ww)
if ww is None:
if str(sw) in str(name):
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_sn = '暂通过'
else:
pass_word_sn = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
#result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_sn = '暂通过'
if pass_word_sn == '暂通过':
# else:
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
if len(str22) == 0:
str22 = '0000000000'
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_0000000000_10_010_0100100_501126_1000000_9017_10106_Z001___{str33}_1.0_0___000321NJB____0__.html?callback=pcData'
# real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"refPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) == 0:
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
else:
pass
else:
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) != 0:
try:
sn_price = float(sn_price)
price.append(sn_price)
result.append('通过')
except:
print(f'该链接获取的价格有问题:{sn_price},{main_url_}')
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
sn_price = df.price
result.append('该链接无法定位到唯一商品')
if len(sn_price) != 0:
price.append(sn_price)
else:
price.append('0')
else:
result.append('定制/专用/含禁止上架关键词,产品暂不通过')
price.append(sn_price)
else:
state = '无货,请按要求提供在销渠道证明'
sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
state = "无货,请按要求提供在销渠道证明"
# sn_price = df.price
result.append('无货,请按要求提供在销渠道证明')
price.append(sn_price)
else:
result.append('非自营,请按要求提供在销渠道证明')
price.append(df['price'])
except:
print("链接有误,请按要求提供在销渠道证明")
result.append("链接有误,请按要求提供在销渠道证明")
price.append(df['price'])
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
else:
#系统申请的、非三大电商的、有供应商的需要判断停用词
if 'otherurl' in df.index.tolist():
#main_url = "http://shanxi.gpmart.cn/productInfo/3300947.html"
# 去reverse_data获取对应id的产品名称和价格
print(main_url)
try:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
except:
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['id']}'")
name_price = reverse_cursor.fetchone()
name = name_price[0]
price_ = name_price[1]
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
# 非系统申请、非三大电商
# 去reverse_data获取对应id的产品名称和价格
reverse_cursor.execute(f"select a.product_name,b.price from product a left join product_price b on a.product_id=b.product_id where a.id='{df['sku']}' and CHARINDEX(a.channel_product_id,'{main_url}')>0")
name_price = reverse_cursor.fetchone()
if name_price:
print(f'name_price:{name_price},价格:{str(name_price[1])}')
name = name_price[0]
price_ = str(name_price[1])
pass_word_zc = '即将判断'
for sw, ww in zip(list(word_list['stopword']), list(word_list['white_word'])):
if ww is None:
if str(sw) in str(name):
pass_word_zc = '不通过'
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
break
else:
pass_word_zc = '暂通过'
else:
if str(sw) in str(name):
if str(ww) in str(name):
pass_word_zc = '通过'
result.append("通过")
break
else:
print("定制/专用/含禁止上架关键词,产品暂不通过")
result.append("定制/专用/含禁止上架关键词,产品暂不通过")
pass_word_zc = '不通过'
break
else:
pass_word_zc = '暂通过'
if pass_word_zc == '暂通过':
result.append("通过")
price.append(price_)
else:
result.append("非自营,请按要求提供在销渠道证明")
price.append(df['price'])
print(f"已经处理{i + 1}条数据\n")
if len(result)!=len(price):
print("问题链接:",main_url)
break
print(f'审核意见:{len(result)}')
print(f'price:{len(price)}')
print(f'总条数:{len(fileName)}')
fileName['审核意见'] = result
fileName['price'] = price
urlss_all = []
for i in range(len(fileName)):
dw = fileName.loc[i]
if dw.url:
url_u = dw.url.strip()
if len(url_u) == 0:
pass
elif "jd" in str(url_u) or "gome" in str(url_u) or "suning" in str(url_u):
print(url_u)
try:
if "jd" in str(url_u):
skuu = url_u.split('/')[-1].split('.')[0]
elif "suning" in str(url_u):
skuu = re.findall(".com/(.*?).html", url_u)[0]
elif "gome" in str(url_u):
skuu = re.findall(".cn/(.*?).html", url_u)[0]
# print(skuu)
if str(skuu) in urlss_all:
fileName.loc[i, '审核意见'] = '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等'
else:
urlss_all.append(skuu)
except:
fileName.loc[i, '审核意见'] = '链接有误,请按要求提供在销渠道证明'
else:
pass
print('更新状态中。。。')
id_pass = []
id_nopass = []
id_nojd = []
id_noname = []
id_qita = []
id_sku = []
id_buxiaoshou = []
id_ljcw = []
id_suning = []
id_cfcf = []
id_noding = []
# elif suggestion == '苏宁产品链接略过,人工审核':
# id_suning.append(id_name)
# elif sugestion == '已在库中':
# id_sku.append(id_name)
if source == 'zi_zh':
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['product_zh_id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_zh set state='1' where id= '{id_num}'")
cursor.execute(f"update product_all set state='1' where product_zh_id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_zh set state='2',remark='无货,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='无货,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_zh set state='2',remark='非自营,请按要求提供在销渠道证明',isdo='1' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='非自营,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_zh set state='1',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where product_zh_id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到最终产品,存在多个价格' where product_zh_id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_zh set state='1',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='链接有误,请按要求提供在销渠道证明' where product_zh_id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_zh set state='1',remark='其他销售渠道证明,需人工审核' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='其他销售渠道证明,需人工审核' where product_zh_id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_zh set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where product_zh_id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_zh set state='1',remark='该链接无法定位到唯一商品' where id='{i}'")
cursor.execute(f"update product_all set state='2',remark='该链接无法定位到唯一商品' where product_zh_id='{i}'")
conn.commit()
else:
today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
for id_name, suggestion in zip(fileName['id'], fileName['审核意见']):
if suggestion == '通过':
id_pass.append(id_name)
elif suggestion == '无货,请按要求提供在销渠道证明':
id_nopass.append(id_name)
elif suggestion == '非自营,请按要求提供在销渠道证明':
id_nojd.append(id_name)
elif suggestion == '定制/专用/含禁止上架关键词,产品暂不通过':
id_noname.append(id_name)
elif suggestion == '该链接无法定位到最终产品,存在多个价格':
id_buxiaoshou.append(id_name)
elif suggestion == '链接有误,请按要求提供在销渠道证明':
id_ljcw.append(id_name)
elif suggestion == '其他销售渠道证明':
id_qita.append(id_name)
elif suggestion == '该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等':
id_cfcf.append(id_name)
elif suggestion == '该链接无法定位到唯一商品':
id_noding.append(id_name)
#for id_, price_ in zip(fileName['product_zh_id'], fileName['price']):
#cursor.execute(f"update product_zh set price='{price_}' where id='{id_}'")
#cursor.execute(f"update product_all set price='{price_}' where product_zh_id='{id_}'")
print('更新数据库~')
for id_num in id_pass:
cursor.execute(f"update product_all_api set state='1' where id= '{id_num}'")
conn.commit()
for i in id_nopass:
cursor.execute(f"update product_all_api set state='2',remark='无货,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_nojd:
cursor.execute(f"update product_all_api set state='2',remark='非自营,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_noname:
cursor.execute(f"update product_all_api set state='2',remark='定制/专用/含禁止上架关键词,产品暂不通过' where id='{i}'")
conn.commit()
#for i_price, i_id in zip(price, fileName['product_zh_id']):
#cursor.execute(f"update product_zh set price='{i_price}' where id='{i_id}'")
#cursor.execute(f"update product_all set price='{i_price}' where product_zh_id='{i_id}'")
#conn.commit()
for i in id_buxiaoshou:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到最终产品,存在多个价格' where id='{i}'")
conn.commit()
for i in id_ljcw:
cursor.execute(f"update product_all_api set state='2',remark='链接有误,请按要求提供在销渠道证明' where id='{i}'")
conn.commit()
for i in id_qita:
cursor.execute(f"update product_all_api set state='2',remark='其他销售渠道证明,需人工审核' where id='{i}'")
conn.commit()
for i in id_cfcf:
cursor.execute(f"update product_all_api set state='2',remark='该链接与其他供应商提供链接重复,暂定不通过,正在处理,请稍等' where id='{i}'")
conn.commit()
for i in id_noding:
cursor.execute(f"update product_all_api set state='2',remark='该链接无法定位到唯一商品' where id='{i}'")
conn.commit()
print('更新完成~')
def get_zgc_brand_info(url_brand_list,brand_list):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute('select id,name,cname,ename from p_brand where id not in (select distinct(pid) from p_brand where pid <> 0)') # 不匹配主品牌
data = (cursor.fetchall())
brand_table = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates().fillna('EMPTY')
brand_table['name'] = brand_table['name'].apply(lambda x : str(x).strip().upper())
brand_table['cname'] = brand_table['cname'].apply(lambda x : str(x).strip().upper())
brand_table['ename'] = brand_table['ename'].apply(lambda x : str(x).strip().upper())
zgc_brand_list = []
zgc_brand_code_list = []
for brand,source_brand in zip(url_brand_list,brand_list):
combine_list = [brand,source_brand]
temp_name = []
temp_code = []
for brand in combine_list:
if brand:
if "联想" in str(brand):
brand = "联想"
#if "国产" in str(brand):
# brand = "错误信息"
else:
brand = '无信息'
brand = str(brand)
try:
country = brand.split('[')[1].split(']')[-2]
brand = brand.replace(country, '')
except IndexError:
pass
cn_res = ''.join(re.findall(r'[0-9\u4E00-\u9FA5]', brand))
if cn_res.isdigit():
cn_res = ''
cnbrand = cn_res
en_res = (''.join(re.findall(r'[a-zA-Z0-9-]', brand))).upper()
if en_res.isdigit():
en_res = ''
enbrand = en_res
if cnbrand == '':
cnbrand = '无对应'
if enbrand == '' or len(enbrand) == 1:
enbrand = '无对应'
findres = [[brandcode, name] for brandcode, enname,name in
zip(brand_table['id'].tolist(), brand_table['ename'].tolist(),brand_table['name'].tolist()) if enbrand == str(enname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
if brand == "联想":
findres = [[brandcode, name] for brandcode, cnname,enname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['ename'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname) or "THINKPAD" in str(enname)]
else:
findres = [[brandcode, name] for brandcode, cnname,name in
zip(brand_table['id'].tolist(), brand_table['cname'].tolist(),brand_table['name'].tolist()) if cnbrand == str(cnname)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
findres = [[brandcode, name] for brandcode,name in
zip(brand_table['id'].tolist(), brand_table['name'].tolist()) if brand == str(name)]
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
continue
else:
temp_name.append('无对应')
temp_code.append('无对应')
continue
if findres:
for find in findres:
temp_name.append(find[1])
temp_code.append(str(find[0]))
else:
temp_name.append('无对应')
temp_code.append('无对应')
zgc_brand_list.append(temp_name)
zgc_brand_code_list.append(temp_code)
conn.close()
return zgc_brand_list, zgc_brand_code_list
def get_zgc_classify_info(classify_list,source):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
database='ZI_DataBase')
cursor = conn.cursor()
if source == 'zi_zh':
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
#subcategory_name = '打印机'
zd_category_single = []
zd_category_code_single = []
cursor.execute(f"select a.subcategoryname,b.SubCategoryCode from op_zh_zd_type a \
left join VW_Relation_Property b \
on a.subcategoryname = b.SubCategoryName \
where a.second = '{subcategory_name}'")
data = (cursor.fetchall())
data = pd.DataFrame(data,columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None or "错误" in name:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
if zd_category_single:
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
else:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
return zd_category,zd_category_code
else:
source = source.replace('_', '').replace('ZH', '')
zd_category = []
zd_category_code = []
for subcategory_name in classify_list:
zd_category_single = []
zd_category_code_single = []
cursor.execute(f'''select b.name 'subcategoryname',a.ZI_SubCategoryCode 'SubCategoryCode' from Product_Relation_Attribute_SubTitle a LEFT JOIN ZI_SubCategory b
on a.zi_subcategorycode=b.subcategorycode
where a.source='{source}' and a.SourceSubCategory='{subcategory_name}' ''')
data = (cursor.fetchall())
data = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description]).drop_duplicates()
if data.empty:
zd_category.append(['无总后类别对应'])
zd_category_code.append(['无总后类别对应'])
else:
for name,code in zip(data['subcategoryname'].tolist(),data['SubCategoryCode']):
if code is None:
continue
else:
zd_category_single.append(name)
zd_category_code_single.append(code)
zd_category.append(zd_category_single)
zd_category_code.append(zd_category_code_single)
return zd_category, zd_category_code
def remove_error_productcode(productcodes):
'''
排除productcode对应的子类或品牌包含错误两个字的productcode
:param productcodes: 排重之后的productcode列表
:return: 删除错误品牌之后的新productcode列表
'''
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_DataBase', autocommit=True)
cursor = conn.cursor()
error_code = []
sql = '''
select distinct c.productcode,c.zi_brandname,d.name 'zi_subcategoryname' from
(select a.productcode,a.subcategorycode,b.name 'zi_brandname' from info_product a left join zi_brand b on a.brandcode=b.brandcode) c
left join zi_subcategory d on c.subcategorycode=d.subcategorycode
where c.productcode in (%s)
''' % ','.join(['%s'] * len(productcodes))
cursor.execute(sql,tuple(productcodes))
all_productcodes = cursor.fetchall()
for i in all_productcodes:
if '错误' in i[1] or '错误' in i[2]:
error_code.append(i[0])
return error_code
def remove_old_productcode(productcodes):
conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',database='ZI_NEW', autocommit=True)
cursor = conn.cursor()
error_code = []
for code in productcodes:
cursor.execute(f"select sku from p_sku where sku = '{code}'")
check_df = pd.DataFrame(cursor.fetchall(), columns=[tuple[0] for tuple in cursor.description])
if check_df.empty:
error_code.append(code)
else:
pass
conn.close()
return error_code
def get_cn_number(char):
"""
判断字符串中,中文的个数
:param char: 字符串
:return:
"""
count = 0
for item in char:
if 0x4E00 <= ord(item) <= 0x9FA5:
count += 1
return count
def judge_unit(string):
unit_list = ['MM','CM', 'DM', 'ML', 'W', 'KW','KG','G','M','L','KBPS','MMM','P','V','KM']
error_unit_list = ['公斤','NULL','PVC','MONITOR','QIANG','I3','I5','I7','I9','LED','NO','寸','USB','OPTIX','OSN','ZXMP','BASE','ZFSD']
for error_unit in error_unit_list:
if error_unit in string:
return False
if get_cn_number(string) >= 3:
return False
elif not string[0].isdigit():
return True
m = 0
n = 0
for char in string:
if char.isdigit() or char == '.' or char == '-' or char == 'X':
m += 1
if n != 0:
m = m-1
break
elif char.isalpha():
n += 1
if string[m:m+n].upper() in unit_list:
return False
return True
def get_model(name, params, brand):
#name = '得力(deli)A4浅红色复印纸 彩色打印纸手工折纸卡纸 ZFSD 80g100张/包 7757'
#params = "{'产品型号': '得力7757', '产品详细参数': '得力\t彩色复印纸 A4 80g 210*297mm 100张/包 浅红 '}"
#brand = '[得力]'
try:
params = eval(params)
except:
params = ''
try:
name = name.split('(')[0] + name.split(')')[1]
except:
try:
name = name.split('(')[0] + name.split(')')[1]
except:
name = name
brand = str(brand)
brand_flag = 0
speical_brand_list = ['得力','世达','华为']
for special_brand in speical_brand_list:
if special_brand in brand:
brand_flag = 1
try:
brand_remove = re.findall(r"[A-Za-z0-9-]+", brand)[0].upper()
except IndexError:
brand_remove = '没有英文品牌!'
param_model = ''
if params:
if '认证型号' in params.keys():
param_model = params['认证型号']
elif '产品型号' in params.keys():
param_model = params['产品型号']
elif '型号' in params:
param_model = params['型号']
else:
pass
param_model = param_model.upper()
param_model = param_model.replace(brand_remove,'')
if param_model:
if judge_unit(param_model) == False:
param_model = ''
if '*' in param_model:
param_model = ''
if '×' in param_model:
param_model = ''
if param_model.isdigit() and brand_flag !=1:
param_model = ''
else:
param_model = ''
name_xinghao_lyst = list(filter(lambda x: len(x) >= 2, re.findall(r"[A-Za-z0-9-]+", name)))
for i in range(len(name_xinghao_lyst)):
name_xinghao_lyst[i] = name_xinghao_lyst[i].upper()
name_xinghao_lyst[i] = name_xinghao_lyst[i].replace(brand_remove,'')
if name_xinghao_lyst[i]:
if judge_unit(name_xinghao_lyst[i]) == False:
name_xinghao_lyst[i] = '?'
if '*' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = '?'
if '×' in name_xinghao_lyst[i]:
name_xinghao_lyst[i] = ''
if name_xinghao_lyst[i].isdigit() and brand_flag !=1:
name_xinghao_lyst[i] = '?'
else:
name_xinghao_lyst[i] = '?'
if len(name_xinghao_lyst) == 0:
return_model = param_model
else:
return_model = max(name_xinghao_lyst, key=len)
if param_model:
res_model = param_model
else:
res_model = return_model
#res_model = max([param_model,return_model], key=len)
if res_model == '?' or res_model == '':
res_model = '无型号'
if '/T' in res_model or '\T' in res_model:
res_model = res_model.replace('/T','').replace('\T','')
return res_model
'''
if len(name_xinghao_lyst) == 0:
#type_lyst.append(param_xinghao.upper())
if param_xinghao.isdigit():
param_xinghao = '无型号'
return param_xinghao
else:
return param_xinghao.upper()
else:
if param_xinghao in name_xinghao_lyst:
#type_lyst.append(param_xinghao.upper())
return param_xinghao.upper()
else:
xinghao_data = max(name_xinghao_lyst, key=len)
for xinghao in name_xinghao_lyst:
if len(xinghao) > 2 and '*' not in xinghao and judge_unit(xinghao):
xinghao_data = xinghao
break
if not judge_unit(xinghao_data):
xinghao_data == '无型号'
#type_lyst.append(xinghao_data.upper())
return xinghao_data
'''
def predict_category(name_list):
'''
获取预测类别结果
:param name_list: 原始名称
:return: 预测结果列表
'''
os.chdir("./text_moudle_LXWL")
tf.reset_default_graph()
model = LXWL_name2subcategory()
category_list = model.namelyst_predict(name_list)
os.chdir("../")
return category_list
def fuzzy_match(zgc_brand_code_list,model_list,price_list):
#获取库中所有产品信息
conn = pymssql.connect('123.56.115.207','zgcindex','jiayou202006','ZI_NEW')
cursor = conn.cursor()
cursor.execute(f"select a.sku,a.skuname,b.brandid,c.index_price_wave from p_sku a \
left join p_spu b \
on a.spuid = b.id \
left join (select goods_id,index_price_wave from zdindex.dbo.zd_week_price where periods in (select top 1 periods from zdindex.dbo.zd_entry_goods_price)) c \
on a.sku = c.goods_id\
where a.state in ('1','4')")
data = cursor.fetchall()
df_db= pd.DataFrame(data, columns=[tuple[0] for tuple in cursor.description])
res_name_list = []
res_code_list = []
for brand_code,model,price in zip(zgc_brand_code_list,model_list,price_list):
try:
price = int(float(price))
except:
price = 0
try:
brand_df = df_db[df_db['brandid'] == int(brand_code)]
except:
brand_df = df_db[df_db['brandid'] == str(brand_code)]
temp_df = pd.DataFrame()
temp_res_name_list = []
temp_res_code_list = []
temp_res_price_diff_list = []
for name,product_code,index_price in zip(brand_df['skuname'].tolist(),brand_df['sku'].tolist(),brand_df['index_price_wave'].tolist()):
if index_price == None:
index_price = 0
if str(model).strip().upper() in name.upper():
if product_code not in temp_res_code_list:
temp_res_name_list.append(name)
temp_res_code_list.append(product_code)
temp_res_price_diff_list.append(abs(price-index_price))
temp_df['name'] = temp_res_name_list
temp_df['code'] = temp_res_code_list
temp_df['diff'] = temp_res_price_diff_list
temp_df = temp_df.sort_values('diff',ascending=True).head(3)
res_name_list.append(temp_df['name'].tolist())
res_code_list.append(temp_df['code'].tolist())
return res_name_list,res_code_list
def standard_point_sku_name(category_name,name_rules,cat_subtitle_df):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select * from vw_property where name in (select category_name from important_category)")
data = cursor_zi_new.fetchall()
subtitle_df = pd.DataFrame(data, columns=[tuple[0] for tuple in cursor_zi_new.description])
subtitle_df['require_param'] = subtitle_df['identy'].apply(lambda x: x[0])
subtitle_df['match_param'] = subtitle_df['identy'].apply(lambda x: x[1])
subtitle_df['standard_param'] = subtitle_df['identy'].apply(lambda x: x[2])
subtitle_df['part_param'] = subtitle_df['identy'].apply(lambda x: x[3])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment