Commit afe6589d authored by rico.liu's avatar rico.liu

update

parent 16f76ec8
......@@ -929,9 +929,9 @@ class ES_Client:
}
}
print(self.es.update_by_query(index=self.index_name,body=body))
res = self.es.update_by_query(index=self.index_name,body=body)
print(res)
return res
def UpdateByMultipleFiveCondition(self,search_field_list,search_field_value_list,update_field,update_field_value):
inline = "ctx._source['" + str(update_field) + "']='" + str(update_field_value) + "'"
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 3 10:44:20 2021
@author: rico
"""
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from ESCore.ES import ES_Client
import datetime
import pandas as pd
import pymssql
import pymysql
from public import zgc_api,Index
from SimilarCharactor.string_similarity import similarity_cn,similarity_en
#获取指定类别的参数项
def get_category_params_list(category_name):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select subtitle,identy from vw_property where name = '{category_name}' and subtitle != '产品型号'")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params['require_param'] = db_params['identy'].apply(lambda x: x[0])
db_params['standard_param'] = db_params['identy'].apply(lambda x: x[2])
db_params = db_params[(db_params['standard_param'] != '0') & (db_params['require_param'] != '0')]
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
params_list = db_params['subtitle'].tolist()
return params_list
def export_params_data(es,filter_col,filter_value,category_name):
#导出指定项目数据
if filter_col == 'DATA_BATCH':
df = es.SearchByCondition(filter_col,filter_value)
else:
filter_value = filter_value[0]
df = es.SearchAimAllByCondition(filter_col,filter_value)
#df = df[df['DATA_BATCH'] == "202111160940430001"]
#导出补参环节数据
if category_name in ['笔记本','台式机','一体电脑','复印纸','工作站']:
FLOW_NODE_STATUS = "SYS_GET_PARAMS_FAILD_REDUCTIVE"
COL_LIST = ['DOC_ID','SOURCE_P_NAME','FINISH_P_LASTCATEGORY_NAME','FINISH_P_BRAND_NAME','SOURCE_P_PARAMS','SOURCE_P_URL']
else:
FLOW_NODE_STATUS = "SYS_GET_PARAMS_FAILD_NONREDUCTIVE"
COL_LIST = ['DOC_ID','SOURCE_P_NAME','FINISH_P_LASTCATEGORY_NAME','FINISH_P_BRAND_NAME','FINISH_P_MODEL','SOURCE_P_PARAMS','SOURCE_P_URL']
cat_df = df[(df['FINISH_P_LASTCATEGORY_NAME'] == category_name) & (df['FLOW_NODE_STATUS'] == FLOW_NODE_STATUS) & (df['FLOW_FINISH_STATUS'] == 'N')]
params_list = get_category_params_list(category_name)
for param in params_list:
param_var = '_' + ''.join(param.split()).replace('(','').replace(')','').replace('/','').replace('(','').replace(')','').replace('+','').replace('-','').replace('*','').replace('.','')
exec('%s_list=[]'%param_var)
for index,row in cat_df.iterrows():
#params_dict = eval(row['DEAL_UNSTD_PARAMS_DICT'])#取第一行的数据
params_dict = eval(row['DEAL_STD_PARAMS_DICT'])
for param in params_list:
param_var = '_' + ''.join(param.split()).replace('(','').replace(')','').replace('/','').replace('(','').replace(')','').replace('+','').replace('-','').replace('*','').replace('.','')
exec("%s_list.append('%s')"%(param_var,params_dict[param].replace("\\","需人工补充")))
cat_df = cat_df[COL_LIST]#取指定的字段信息
for param in params_list:
param_var = '_' + ''.join(param.split()).replace('(','').replace(')','').replace('/','').replace('(','').replace(')','').replace('+','').replace('-','').replace('*','').replace('.','')
exec("cat_df['%s']=%s_list"%(param,param_var))
#结果导出excel
date = str(datetime.datetime.now()).split(" ")[0]
if "/" in category_name:
category_name = category_name.replace("/","_")
cat_df.to_excel(f"{category_name}参数补充{date}.xlsx")
#创建ES客户端
host = "http://123.56.114.138:9200/"
index_name = "model_params_test"
es = ES_Client(host,index_name)
#导出单个类补参数据
filter_col = "DATA_PROJECT_NAME" # DATA_PROJECT_NAME 项目名称 / DATA_BATCH 批次号
filter_value = ['徽采商城数据入库'] # 项目名称/批次号 eg. 全渠道数据入库/202111171554120001/徽采商城数据入库
category_name = '笔记本'
#导出数据
export_params_data(es,filter_col,filter_value,category_name)
#导出多个类补参数据
#category_name_list = ['空调','摄像机','多功能一体机','U盘','移动硬盘','硒鼓/墨粉']
#导出数据
#for category_name in category_name_list:
# export_params_data(es,filter_col,filter_value,category_name)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 3 11:12:31 2021
@author: rico
"""
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from ESCore.ES import ES_Client
import datetime
import pandas as pd
import pymssql
import pymysql
from public import zgc_api,Index
from SimilarCharactor.string_similarity import similarity_cn,similarity_en
#上传数据字典补充数据
def ComplicatedDataDict(path):
#创建新产品库链接
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
df = pd.read_excel(path)
process_index = 0
index_ = Index()
for index,row in df.iterrows():
try:
print(index_(process_index,len(df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
cursor_zi_new.execute(f"update ShuJuZiDian_Cfg set stdvalue = '{row['stdvalue']}',simplevalue = '{row['simplevalue']}' where id = {row['id']}")
conn_zi_new.close()
#更新数据字典
path = '/Users/rico/Downloads/台式机数据字典补充(3)(1).xlsx'
ComplicatedDataDict(path)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 3 11:14:31 2021
@author: rico
"""
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from ESCore.ES import ES_Client
import datetime
import pandas as pd
import pymssql
import pymysql
from public import zgc_api,Index
from SimilarCharactor.string_similarity import similarity_cn,similarity_en
#获取指定类别的参数项
def get_category_params_list(category_name):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select subtitle,identy from vw_property where name = '{category_name}' and subtitle != '产品型号'")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params['require_param'] = db_params['identy'].apply(lambda x: x[0])
db_params['standard_param'] = db_params['identy'].apply(lambda x: x[2])
db_params = db_params[(db_params['standard_param'] != '0') & (db_params['require_param'] != '0')]
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
params_list = db_params['subtitle'].tolist()
return params_list
def check_configure_price(category,path):
if category not in ['笔记本','台式机','一体电脑','工作站']:
return "该类别无配件,无需校验配件价格"
params_list = get_category_params_list(category)
conn = pymssql.connect(host='123.56.115.207', user='zgcindex', password='jiayou202006', database='ZI_NEW',autocommit=True)
cursor = conn.cursor()
cursor.execute(f"select name,id from p_category where name in ('笔记本','台式机','一体电脑','工作站')")
category_df = pd.DataFrame([v for v in cursor.fetchall()],columns=[tuple[0] for tuple in cursor.description])
category_dict = dict(zip(category_df['name'].tolist(),category_df['id'].tolist()))
date = str(datetime.datetime.now()).split(" ")[0]
category_list = [category]
configure_price_df = get_configure_price()
cat_params_df = pd.read_excel(path)
try:
cat_params_df['驳回原因'] = cat_params_df['驳回原因'].apply(lambda x:str(x))
except:
cat_params_df['驳回原因'] = ['nan' for i in range(len(cat_params_df))]
cat_configure_price_df = configure_price_df[configure_price_df['category_name'] == category]
process_index = 0
index_ = Index()
res_list = []
for index,row in cat_params_df.iterrows():
try:
print(index_(process_index,len(cat_params_df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
remark = row['驳回原因']
if remark != 'nan':
continue
for params in params_list:
value = row[params]
if params not in list(cat_configure_price_df['part']):
continue
else:
try:
current_price = cat_configure_price_df[(cat_configure_price_df['part'] == params) & (cat_configure_price_df['detail'] == value)]['price'].tolist()[0]
except:
mark = "无该配件价格:" + category + " " + params + " " + value
if mark in res_list:
continue
else:
res_list.append(mark)
category_code = category_dict[category]
cursor.execute(f"insert into configure_price (category_code,part,detail,create_date,category_name) \
values ('{category_code}','{params}','{value}','{date}','{category}')")
if res_list:
print("请补充配件价格")
print(res_list)
else:
print("配件价格齐全,无需补充")
return res_list
#校验通过的数据需要进行配件价格校验
category_name = '笔记本'
checked_path = "/Users/rico/Downloads/激光打印机参数补充回导.xlsx"
check_configure_price(category_name,checked_path)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 3 11:17:16 2021
@author: rico
"""
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from ESCore.ES import ES_Client
import datetime
import pandas as pd
import pymssql
import pymysql
from public import zgc_api,Index
from SimilarCharactor.string_similarity import similarity_cn,similarity_en
#获取指定类别的参数项
def get_category_params_list(category_name):
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
cursor_zi_new.execute(f"select subtitle,identy from vw_property where name = '{category_name}' and subtitle != '产品型号'")
db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
db_params['require_param'] = db_params['identy'].apply(lambda x: x[0])
db_params['standard_param'] = db_params['identy'].apply(lambda x: x[2])
db_params = db_params[(db_params['standard_param'] != '0') & (db_params['require_param'] != '0')]
db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
params_list = db_params['subtitle'].tolist()
return params_list
def import_params_data(es,category_name,path):
#更新补参数据
df = pd.read_excel(path,converters = {'CPU型号':str})
try:
df['驳回原因'] = df['驳回原因'].apply(lambda x:str(x))
except:
df['驳回原因'] = ['nan' for i in range(len(df))]
params_list = get_category_params_list(category_name)
process_index = 0
index_ = Index()
for index,row in df.iterrows():
try:
print(index_(process_index,len(df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
search_field = "DOC_ID"
search_field_value = str(row['DOC_ID'])
remark = row['驳回原因']
if remark != 'nan':
now_time = str(datetime.datetime.now()).replace(' ','T').split('.')[0]+"+08:00"
es.UpdateParamsReturnData(search_field,search_field_value,remark,now_time)
continue
#update_field = "DEAL_INPUT_PARAMS_DICT" #DEAL_STD_PARAMS_DICT DEAL_INPUT_PARAMS_DICT FLOW_NODE_STATUS
#update_field_value = "SYS_GET_CATEGORY_SUCCESS_REDUCTIVE"
update_dict = {}
for params in params_list:
update_dict.update({params:str(row[params]).replace('\xa0',' ')})
params_info = str(update_dict).replace("'",'\"')
es.UpdateParamsDealData(category_name,search_field,search_field_value,params_info)
#导入数据
host = "http://123.56.114.138:9200/"
index_name = "model_params_test"
es = ES_Client(host,index_name)
category_name = '笔记本'
checked_path = "/Users/rico/Downloads/激光打印机参数补充回导.xlsx"
import_params_data(es,category_name,checked_path)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 3 11:29:33 2021
@author: rico
"""
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from ESCore.ES import ES_Client
import datetime
import pandas as pd
import pymssql
import pymysql
from public import zgc_api,Index
from SimilarCharactor.string_similarity import similarity_cn,similarity_en
host = "http://123.56.114.138:9200/"
index_name = "model_params_test"
es = ES_Client(host,index_name)
#按批次号检查结果数据
res = es.SearchByCondition("DATA_BATCH",["202111231900550001"])
#按项目检查结果数据
#res = es.SearchAimAllByCondition("DATA_PROJECT_NAME","徽采商城数据入库")
##按批次下或者项目下的指定类检查结果数据
#res = res[res['FINISH_P_LASTCATEGORY_NAME'] == '台式机']
'''
#状态总揽
FLOW_NODE_STATUS
INIT:初始化状态
STOP_WORD_FAILD:停用词失败
SKU_EXIST:SKU匹配成功(终止态)
URL_EXIST:URL匹配成功(终止态)
FIRST_CHECK_SUCCESS:初步审核成功
SYS_GET_BRAND_SUCCESS:品牌提取成功
SYS_GET_BRAND_FAILD:品牌提取失败
SYS_GET_CATEGORY_SUCCESS_NONPOINT:类别提取成功-非重点类
SYS_GET_CATEGORY_SUCCESS_NONREDUCTIVE:类别提取成功-非还原类
SYS_GET_CATEGORY_SUCCESS_REDUCTIVE:类别提取成功-还原类
SYS_GET_CATEGORY_FAILD:类别提取失败
SYS_GET_MODEL_SUCCESS_NONPOINT:型号提取成功-非重点类
SYS_GET_MODEL_SUCCESSS_NONREDUCTIVE:型号提取成功-非还原类
SYS_GET_MODEL_FAILD_NONPOINT:型号提取失败-非重点类
SYS_GET_MODEL_FAILD_NONREDUCTIVE:型号提取失败-非还原类
SYS_GET_PARAMS_SUCCESS_NONREDUCTIVE:匹配标准参数提取成功-非还原类
SYS_GET_PARAMS_SUCCESS_REDUCTIVE:匹配标准参数提取成功-还原类
SYS_GET_PARAMS_FAILD_NONREDUCTIVE:匹配标准参数提取失败-非还原类
SYS_GET_PARAMS_FAILD_REDUCTIVE:匹配标准参数提取失败-还原类
SYS_FINISH_MATCH:产品名称或匹配项匹配成功(终止态)
SYS_GET_PARAMS_SUCCESS_END:必填项参数提取成功(终止态)
SYS_GET_PARAMS_FAILD_END:必填项参数提取失败
'''
#分析原则:确保下方统计的数据除了驳回的以外其它数据都处于终止态。否则去页面查看处理,或者反馈北京
#数据总揽
print(res[['DOC_ID','FLOW_NODE_STATUS','FINISH_P_LASTCATEGORY_NAME']].groupby(['FLOW_NODE_STATUS','FINISH_P_LASTCATEGORY_NAME']).count())
#各节点驳回的
print(res[['DOC_ID','FLOW_NODE_STATUS','FINISH_P_REMARK']].groupby(['FLOW_NODE_STATUS','FINISH_P_REMARK']).count())
#校验是否有未命名的商品
category_name = '台式机'
if category_name:
check_flag = True
if category_name in ['笔记本','台式机','一体电脑','复印纸','工作站']:
FLOW_NODE_STATUS = "SYS_GET_PARAMS_SUCCESS_REDUCTIVE"
else:
FLOW_NODE_STATUS = "SYS_GET_PARAMS_SUCCESS_NONREDUCTIVE"
search_field_list = ['FINISH_P_LASTCATEGORY_NAME','FLOW_NODE_STATUS','FINISH_P_NAME','FLOW_FINISH_STATUS']
search_field_value_list = [category_name,'SYS_FINISH_MATCH','','Y']
update_field = "FLOW_FINISH_STATUS"
update_field_value = "N"
update_res_1 = es.UpdateByMultipleFourCondition(search_field_list,search_field_value_list,update_field,update_field_value)
if update_res_1['updated'] != 0:
check_flag = False
search_field_list = ['FINISH_P_LASTCATEGORY_NAME','FLOW_NODE_STATUS','FINISH_P_NAME','FLOW_FINISH_STATUS']
search_field_value_list = [category_name,'SYS_FINISH_MATCH','','N']
update_field = "FLOW_NODE_STATUS"
update_field_value = FLOW_NODE_STATUS
update_res_2 = es.UpdateByMultipleFourCondition(search_field_list,search_field_value_list,update_field,update_field_value)
if update_res_2['updated'] != 0:
check_flag = False
search_field_list = ['FINISH_P_LASTCATEGORY_NAME','FLOW_NODE_STATUS','FINISH_P_NAME','FLOW_FINISH_STATUS']
search_field_value_list = [category_name,'SYS_GET_PARAMS_SUCCESS_END','','Y']
update_field = "FLOW_FINISH_STATUS"
update_field_value = "N"
update_res_3 = es.UpdateByMultipleFourCondition(search_field_list,search_field_value_list,update_field,update_field_value)
if update_res_3['updated'] != 0:
check_flag = False
search_field_list = ['FINISH_P_LASTCATEGORY_NAME','FLOW_NODE_STATUS','FINISH_P_NAME','FLOW_FINISH_STATUS']
search_field_value_list = [category_name,'SYS_GET_PARAMS_SUCCESS_END','','N']
update_field = "FLOW_NODE_STATUS"
update_field_value = FLOW_NODE_STATUS
update_res_4 = es.UpdateByMultipleFourCondition(search_field_list,search_field_value_list,update_field,update_field_value)
if update_res_4['updated'] != 0:
check_flag = False
if check_flag:
date = str(datetime.datetime.now()).split(".")[0]
res.to_excel(f"(未确认){date}数据.xlsx")
......@@ -239,7 +239,10 @@ def check_data(es,type,path):
return False,path
'''
品牌、类别、型号补充完成后导入脚本
根据数据情况,修改type和path参数即可
'''
#创建ES客户端
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment