Commit 9130348c authored by rico.liu's avatar rico.liu

init

parent 3cbd2818
This diff is collapsed.
# Auto detect text files and perform LF normalization
* text=auto
# SimilarCharactor
基于音形码,EditDistance的字符串纠正相似度算法
音形码格式:【韵母,声母,结构,四角编码,笔画数】 共8位
音形码相似度算法 参考博客https://blog.csdn.net/chndata/article/details/41114771
TODO 字符串错误匹配算法 参考
结构、四角编码 抓取http://zidian.miaochaxun.com 数据
韵母、声母 使用pinyin包
笔画数抓取https://bihua.51240.com 数据
入口函数在string_similarity.py
繁简切换 Done
ongoing 相似度分值映射调整(sigmod函数映射)
TODO 字符串包含关系
ongoing 相似度算法添加与调整(bm25)
TODO 字符串错位
char_number_directionary = {
'0':'零',
'1':'一',
'2':'二',
'3':'三',
'4':'四',
'5':'五',
'6':'六',
'7':'七',
'8':'八',
'9':'九'
}
\ No newline at end of file
def minEditDist(sm, sn):
m, n = len(sm) + 1, len(sn) + 1
matrix = [[0] * n for i in range(m)]
matrix[0][0] = 0
for i in range(1, m):
matrix[i][0] = matrix[i - 1][0] + 1
for j in range(1, n):
matrix[0][j] = matrix[0][j - 1] + 1
cost = 0
for i in range(1, m):
for j in range(1, n):
if sm[i - 1] == sn[j - 1]:
cost = 0
else:
cost = 1
matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost)
for i in range(m):
print
matrix[i]
return matrix[m - 1][n - 1]
final_code_dictionary={
'b':'1',
'p':'2',
'm':'3',
'f':'4',
'd':'5',
't':'6',
'n':'7',
'l':'7',
'g':'8',
'k':'9',
'h':'A',
'j':'B',
'q':'C',
'x':'D',
'zh':'E',
'ch':'F',
'sh':'G',
'r':'H',
'z':'E',
'c':'F',
's':'G',
'y':'I',
'w':'J',
'0':'0'
}
\ No newline at end of file
initial_code_dictionary = {
'a':'1',
'o':'2',
'e':'3',
'i':'4',
'u':'5',
'v':'6',
'ai':'7',
'ei':'7',
'ui':'8',
'ao':'9',
'ou':'A',
'iu':'B',
'ie':'C',
've':'D',
'er':'E',
'an':'F',
'en':'G',
'in':'H',
'un':'I',
'ven':'J',
'ang':'F',
'eng':'G',
'ing':'H',
'ong':'k',
'uo':'L',
'ian':'M',
'iao':'N',
'uai':'O',
'uan':'P',
'uang':'Q',
'ua':'R',
'iong':'S',
'ia':'T',
'iang':'O',
'ue':'P',
'':'0'
}
\ No newline at end of file
similar_pronunciation_dictionary = {
'n':'l',
'l':'n',
'an':'ang',
'ang':'an',
'en':'eng',
'eng':'en',
'in':'ing',
'ing':'in',
'z':'zh',
'c':'ch',
's':'sh',
'zh':'z',
'ch':'c',
'sh':'s'
}
\ No newline at end of file
from SimilarCharactor.string_util import string2code,traditional2simplified
from SimilarCharactor.edit_distance import minEditDist
import difflib
import Levenshtein
def similarity_cn(string1,string2):
code_string1 = string2code(traditional2simplified(string1))
code_string2 = string2code(traditional2simplified(string2))
distance = minEditDist(code_string1,code_string2)
return 1 - distance/max(len(code_string1),len(code_string2))
def similarity_en(string1,string2):
fraction_part1 = Levenshtein.ratio(string1,string2)
fraction_part2 = difflib.SequenceMatcher(None, string1, string2).quick_ratio()
fraction = fraction_part1*0.5+fraction_part2*0.5
return fraction
from pypinyin import pinyin,Style,lazy_pinyin
from SimilarCharactor.quadrilateral_code_dictionary import quadrilateral_code_dictionary as qcd
from SimilarCharactor.structure_code_dictionary import structure_code_dictionary as scd
from SimilarCharactor.initial_code_dictionary import initial_code_dictionary as icd
from SimilarCharactor.final_code_dictionary import final_code_dictionary as fcd
from SimilarCharactor.write_number_dictionary import write_number_dictionary as wnd
from SimilarCharactor.character import symbol_lst
from SimilarCharactor.code_directionary import code_directionary as cd
from SimilarCharactor.char_number_directionary import char_number_directionary as cnd
from zhconv import convert
def extract_initial_and_final(pinyin_string):
if pinyin_string[0:2] not in ['zh','ch','sh']:
if pinyin_string[0] not in ['b','p','m','f','d','t','n','l','g','k','h','j','q','x','r','z','c','s','y','w']:
final = '0'
initial = pinyin_string
else:
final = pinyin_string[0] # 此处四行为声母韵母抽取
initial = pinyin_string[1:]
else:
final = pinyin_string[0:2]
initial = pinyin_string[2:]
return initial,final
#编码格式【韵母,声母,结构,四角编码,笔画数】 共8位
def string2code(string):
code_string = ''
for char in string:
if char in ['1','2','3','4','5','6','7','8','9','0']:
char = cnd[char]
code_string = code_string + cd.get(char,'')
return code_string
#计算每个汉字的音形码
def get_code():
char_array = symbol_lst()
file = open('./SimilarCharactor/音型码.txt','w+')
for char in char_array:
pinyin_char = lazy_pinyin(char)[0]
initial, final = extract_initial_and_final(pinyin_char)
code_string = icd[initial] + fcd[final] + scd[char] + qcd[char] + wnd[char]
file.writelines("'"+ char+"':'"+code_string+"',\n")
file.close()
def traditional2simplified(string):
return convert(string, 'zh-cn')
get_code()
\ No newline at end of file
# -*-coding:utf-8-*-
# 此模块用于爬取汉字结构字典
import requests
from bs4 import BeautifulSoup
import re
from tqdm import tqdm
def get_url():
file1 = open('C:/Users/fooww/Desktop/cv/Word_Structure_Dict.txt', 'w', encoding='utf-8')
file1.truncate()
file1.close()
key_word_lst = ['danyi', 'zuoyou', 'shangxia', 'zuozhongyou', 'shangzhongxia', 'youshangbaowei', 'zuoshangbaowei', 'zuoxiabaowei','shangsanbaowei','xiasanbaowei','zuosanbaowei','quanbaowei','xiangqian','pinzi']
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/63.0.3239.132 Safari/537.36'}
for index, key_word in enumerate(key_word_lst):
file1 = open('C:/Users/fooww/Desktop/cv/Word_Structure_Dict.txt', 'a', encoding='utf-8')
url = 'http://zidian.miaochaxun.com/'+key_word+'.html'
print(url)
res1 = requests.get(url, headers=header)
res1.encoding = 'utf-8'
soup1 = BeautifulSoup(res1.text, 'html.parser')
zi_list = soup1.find_all('p', class_='zi')
for s in zi_list:
[p.extract() for p in s.find_all('span')]
for s in zi_list:
for word in s.find_all('a'):
# print(word.get_text())
try:
if index<10:
tag = index
else:
tag = chr(55+index)
file1.write("'{0}':'{1}',\n".format(word.get_text(), tag))
except TypeError:
pass
continue
file1.close()
if __name__ == '__main__':
get_url()
def pinyin_2_hanzi(pinyinList):
from Pinyin2Hanzi import DefaultDagParams
from Pinyin2Hanzi import dag
dagParams = DefaultDagParams()
result = dag(dagParams, pinyinList, path_num=1, log=True)#10代表侯选值个数
for item in result:
socre = item.score
res = item.path # 转换结果
print(socre, res)
pinyin_2_hanzi(['hao kai xin'])
\ No newline at end of file
# coding=utf-8
import os
import pygame
import character
# 此程序用于将汉字转图片输出,以便利用opencv进行相似度识别
chinese_dir = 'D:/py/chinese/'
if not os.path.exists(chinese_dir):
os.mkdir( chinese_dir)
pygame.init()
for i,word in enumerate(character.symbol_lst()):
font = pygame.font.Font("C:\Windows\Fonts\msyh.ttf", 100) # 当前目录下要有微软雅黑的字体文件msyh.ttc,或者去c:\Windows\Fonts目录下找
rtext = font.render(word, True, (0, 0, 0), (255, 255, 255))
pygame.image.save(rtext, os.path.join(chinese_dir+ str(i) + ".png"))
import requests
from bs4 import BeautifulSoup
import re
import character
from tqdm import tqdm
def transutf8(symbol):
symbol = str(symbol.encode('utf-8'))
utf8_code = symbol[4:6]+symbol[8:10]+symbol[12:14]
return utf8_code
def writenum(symbol):
url_head = 'https://bihua.51240.com/'
url_tail = '__bihuachaxun/'
# 遍历输入汉字的utf8编码,爬取对应的笔画数
url_mid = transutf8(symbol)
url = url_head + url_mid + url_tail
res = requests.get(url)
soup = BeautifulSoup(res.text, 'lxml')
pattern = re.compile('笔画数')
pattern2 = re.compile('\d{1,2}')
write_num = soup.find('td',text=pattern).parent.find('td', text=pattern2).get_text()
return write_num
def get_dict():
symbol_lst = character.symbol_lst()
write_num_dict = {}
for char_one in tqdm(symbol_lst):
write_num_dict[char_one] = writenum(char_one)
return write_num_dict
def main():
with open("D:/py/write_num.txt", 'w') as f:
for i,j in get_dict().items():
f.write(i+' '+j)
f.close()
if __name__ == '__main__':
main()
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 23 23:36:40 2021
@author: rico
"""
import requests
def zgc_api(func,data):
headers = {
'Connection': 'Keep-Alive'
}
key = 'eRo1#ZFHY5N&GEzV'
api = f"http://59.110.219.171:8000/{func}/"
print(api)
data.update({'key':key})
session = requests.session()
result = session.post(api,json=data,headers=headers,timeout=6000).json()
return result
#进度条
class Index(object):
def __init__(self, number=50, decimal=2):
"""
:param decimal: 你保留的保留小数位
:param number: # 号的 个数
"""
self.decimal = decimal
self.number = number
self.a = 100/number # 在百分比 为几时增加一个 # 号
def __call__(self, now, total):
# 1. 获取当前的百分比数
percentage = self.percentage_number(now, total)
# 2. 根据 现在百分比计算
well_num = int(percentage / self.a)
# print("well_num: ", well_num, percentage)
# 3. 打印字符进度条
progress_bar_num = self.progress_bar(well_num)
# 4. 完成的进度条
result = "\r%s %s" % (progress_bar_num, percentage)
return result
def percentage_number(self, now, total):
"""
计算百分比
:param now: 现在的数
:param total: 总数
:return: 百分
"""
return round(now / total * 100, self.decimal)
def progress_bar(self, num):
"""
显示进度条位置
:param num: 拼接的 “#” 号的
:return: 返回的结果当前的进度条
"""
# 1. "#" 号个数
well_num = "#" * num
# 2. 空格的个数
space_num = " " * (self.number - num)
return '[%s%s]' % (well_num, space_num)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 15 18:05:40 2021
@author: rico
"""
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from ESCore.ES import ES_Client
import datetime
import pandas as pd
import pymssql
from public import zgc_api,Index
import re
#导入数据
def import_data(es,type,path):
if type == 'brand':
deal_data_col = '人工确认品牌'
elif type == 'category':
deal_data_col = '人工确认类别'
elif type == 'model_nopoint' or type == 'model_nonreductice':
deal_data_col = '人工确认型号'
else:
print("不支持的类型")
return False
#更新品牌处理后数据
df = pd.read_excel(path)
try:
df['驳回原因'] = df['驳回原因'].apply(lambda x:str(x))
except:
df['驳回原因'] = ['nan' for i in range(len(df))]
print("无驳回原因数据")
process_index = 0
index_ = Index()
for index,row in df.iterrows():
try:
print(index_(process_index,len(df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
search_field = "ID"
search_field_value = str(row['ID'])
remark = row['驳回原因']
if remark != 'nan':
#处理驳回数据
now_time = str(datetime.datetime.now()).replace(' ','T').split('.')[0]+"+08:00"
es.UpdateReturnData(type,search_field_value,remark,now_time)
continue
#导入正常数据
deal_data = row[deal_data_col]
es.UpdateDealData(type,search_field_value,deal_data)
#校验数据
def check_data(es,type,path):
#校验品牌数据
if type == 'brand':
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
#更新品牌处理后数据
df = pd.read_excel(path)
try:
df['驳回原因'] = df['驳回原因'].apply(lambda x:str(x))
except:
df['驳回原因'] = ['nan' for i in range(len(df))]
print("无驳回原因数据")
process_index = 0
index_ = Index()
res_flag = True
for index,row in df.iterrows():
try:
print(index_(process_index,len(df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
DOC_ID = str(row['ID'])
remark = row['驳回原因']
if remark != 'nan':
continue
brand = str(row['人工确认品牌'])
cursor_zi_new.execute(f"select id from p_brand where name = '{brand}'")
check_result = cursor_zi_new.fetchone()
if not check_result:
res_flag = False
print(f"ID:'{DOC_ID}','{brand}'不是库内品牌,校验不通过")
if res_flag:
print(f"{path},该数据校验通过,可执行导入")
else:
print(f"{path},该数据校验通不过,请修改")
cursor_zi_new.close()
conn_zi_new.close()
return res_flag,path
#校验类别数据
elif type == 'category':
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
#更新品牌处理后数据
df = pd.read_excel(path)
try:
df['驳回原因'] = df['驳回原因'].apply(lambda x:str(x))
except:
df['驳回原因'] = ['nan' for i in range(len(df))]
print("无驳回原因数据")
process_index = 0
index_ = Index()
res_flag = True
for index,row in df.iterrows():
try:
print(index_(process_index,len(df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
DOC_ID = str(row['ID'])
remark = row['驳回原因']
if remark != 'nan':
continue
category = str(row['人工确认类别'])
cursor_zi_new.execute(f"select id from p_category where name = '{category}' and id not in (select DISTINCT pid from p_category)")
check_result = cursor_zi_new.fetchone()
if not check_result:
res_flag = False
print(f"ID:'{DOC_ID}','{category}'不是库内类别,或不是末级类别,校验不通过")
if res_flag:
print(f"{path},该数据校验通过,可执行导入")
else:
print(f"{path},该数据校验通不过,请修改")
cursor_zi_new.close()
conn_zi_new.close()
return res_flag,path
#校验型号数据
elif type == 'model_nopoint' or type == 'model_nonreductice':
conn_zi_new = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='zgcprice20200628',database= 'ZI_NEW',autocommit=True)
cursor_zi_new = conn_zi_new.cursor()
#更新品牌处理后数据
df = pd.read_excel(path)
try:
df['驳回原因'] = df['驳回原因'].apply(lambda x:str(x))
except:
df['驳回原因'] = ['nan' for i in range(len(df))]
print("无驳回原因数据")
process_index = 0
index_ = Index()
res_flag = True
res_list = []
for index,row in df.iterrows():
try:
print(index_(process_index,len(df)-1), end='%')
except:
print(index_(process_index,1), end='%')
process_index+=1
DOC_ID = str(row['ID'])
category = str(row['结果产品末级类名称'])
brand = str(row['结果产品品牌名称'])
remark = row['驳回原因']
if remark != 'nan':
res_list.append('驳回数据')
continue
model = str(row['人工确认型号'])
model_alias = "".join(re.findall(r"[\u4e00-\u9fa5A-Za-z0-9+.]+", model)).upper()
cursor_zi_new.execute(f"select model from sku_model where model_alias = '{model_alias}' and categoryname = '{category}' and brandname = '{brand}'")
check_result = cursor_zi_new.fetchone()
if not check_result:
res_flag = False
print(f"ID:'{DOC_ID}','{model}'不是型号表内型号,校验不通过")
res_list.append('型号表外型号,请添加')
else:
res_list.append(check_result[0])
if res_flag:
df['人工确认型号'] = res_list
pass_path = path.split(".")[0] + "(校验通过)." + path.split(".")[1]
df.to_excel(pass_path)
print(f"{pass_path},该数据校验通过,可执行导入")
cursor_zi_new.close()
conn_zi_new.close()
return res_flag,pass_path
else:
print(f"{path},该数据校验通不过,请修改")
cursor_zi_new.close()
conn_zi_new.close()
return res_flag,path
else:
print("不支持的类型")
return False,path
#创建ES客户端
host = "http://123.56.114.138:9200/"
index_name = "model_params_test"
es = ES_Client(host,index_name)
#请选择要校验及更新的数据类型
#1、品牌提取与标化:brand
#2、类别提取与标化:category
#3、非重点类型号提取与标化:model_nopoint
#4、非还原类型号提取与标化:model_nonreductice
#eg. type = 'brand'
type = 'model_nonreductice'
#补充完成的文件路径(若有驳回原因,请添加列名为:“驳回原因”列,填写驳回原因)
path = "/Users/rico/Downloads/扫描仪型号标化_第一批(1).xlsx"
#校验文件内容
check_status,pass_path = check_data(es,type,path)
#导入数据(校验通过后才可导入!!!)
if check_status:
import_data(es,type,pass_path)
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 7 23:08:30 2020
@author: rico
"""
import pymssql
import pymysql
import oss2
import os
import datetime
class MSSQL:
def __init__(self,host,db):
self.host = host
self.db = db
self._conn = self.GetConnect()
if(self._conn):
self._cur = self._conn.cursor()
#连接数据库
def GetConnect(self):
conn = False
if self.host == '123.57.45.119':
user = 'zgcprice'
pwd = 'zgcprice20200708'
elif self.host == '123.56.115.207':
user = 'zgcindex'
pwd = 'jiayou202006'
elif self.host == '10.0.120.131':
user = 'sa'
pwd = '1qaz@WSX'
elif self.host == '10.0.120.79':
user = 'sa'
pwd = '1qaz@WSX'
elif self.host == '39.107.254.235':
user = 'sa'
pwd = '1qaz@WSX'
try:
conn = pymssql.connect(
host=self.host,
user=user,
password=pwd,
database =self.db,
autocommit=True
)
except Exception as err:
print("连接数据库失败, %s" % err)
else:
return conn
#获取连接信息
def GetConnectInfo(self):
print( "连接信息:" )
print( "服务器:%s , 用户名:%s , 数据库:%s " % (self.host,self.user,self.db))
def Close(self):
self._cur.close()
self._conn.close()
class MYSQL:
def __init__(self,host,port,db):
self.host = host
self.port = port
self.db = db
self._conn = self.GetConnect()
if(self._conn):
self._cur = self._conn.cursor()
#连接数据库
def GetConnect(self):
conn = False
if self.host == '39.105.1.55':
user = 'root'
pwd = 'l*C#70CIAxgb6c%'
try:
conn = pymysql.connect(
host=self.host,
port=self.port,
user=user,
password=pwd,
database =self.db,
charset="utf8",
autocommit=True
)
except Exception as err:
print("连接数据库失败, %s" % err)
else:
return conn
#获取连接信息
def GetConnectInfo(self):
print( "连接信息:" )
print( "服务器:%s , 用户名:%s , 数据库:%s " % (self.host,self.user,self.db))
def Close(self):
self._cur.close()
self._conn.close()
'''
ms = MSSQL('123.56.115.207','zdindex')
conn = ms._conn
cursor = ms._cur
cursor.execute(f"select top 10 * from zd_week_price")
cursor.fetchall()
ms.Close()
cursor.close()
conn.close()
'''
class OSS(object):
"""定义一个简单的oss操作类,支持文件上传和下载"""
def __init__(self, accessKey_id, accessKey_secret, endpoint, bucket_name):
self.auth = oss2.Auth(accessKey_id, accessKey_secret)
self.bucket = oss2.Bucket(self.auth, endpoint, bucket_name)
def download_from_oss(self, oss_folder_prefix, object_name, local_save_path):
"""拼接本地保存时的文件路径,且保持oss中指定目录以下的路径层级"""
oss_path_prefix = object_name.split(oss_folder_prefix)[-1] # oss原始路径,以'/'为路径分隔符
oss_path_prefix = os.sep.join(oss_path_prefix.strip('/').split('/')) # 适配win平台
local_file_path = os.path.join(local_save_path, oss_path_prefix)
local_file_prefix = local_file_path[:local_file_path.rindex(os.sep)] # 本地保存文件的前置路径,如果不存在需创建
if not os.path.exists(local_file_prefix):
os.makedirs(local_file_prefix)
self.bucket.get_object_to_file(object_name, local_file_path)
def upload_to_oss(self, prefix, suffix, local_upload_path):
"""上传指定路径下的目录或文件,如果oss路径不存在,则自动创建"""
# 当前日期时间作为最新上传的目录名
folder_name = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
oss_upload_prefix = prefix.rstrip('/') + '/' + folder_name
# 遍历指定上传目录文件,并上传
for root, dirs, files in os.walk(local_upload_path):
local_upload_path = local_upload_path.rstrip(os.sep) # 去除外部输入时结尾可能带入的路径符号
for file in files:
file_path = os.path.join(root, file)
relative_file_path = file_path.split(local_upload_path)[1] # 保持upload目录下的路径层级
relative_file_path = relative_file_path.strip(os.sep)
oss_relative_path = relative_file_path.replace(os.sep, '/') # 转换成oss的路径格式,适配linux\win
oss_upload_path = oss_upload_prefix + '/' + oss_relative_path
# 上传该文件
if file.endswith(suffix):
self.bucket.put_object_from_file(oss_upload_path, file_path)
def travel_download(self, prefix, suffix, local_save_path):
"""
:param prefix: oss目录前缀,即遍历以prefix开头的文件
:param suffix: 文件后缀名,如,.csv,指定下载何种类型的文件
:param local_save_path: 下载文件的保存路径
:return:
"""
# 下载指定目录下的指定后缀的文件,且保存时维持目录层级格式
# 列举指定prefix目录下的层级目录,定位到目标目录后,再做深度遍历
local_save_path = local_save_path.rstrip(os.sep) # 去除外部输入时结尾可能带入的路径符号
top_level_folder = []
for obj in oss2.ObjectIterator(self.bucket, prefix=prefix, delimiter='/'):
if obj.is_prefix():
# 目录
top_level_folder.append(obj.key)
else:
# 文件
pass
# 获取最近一次更新的目录,并下载该目录及其子目录下指定后缀的文件
target_folder = max(top_level_folder)
for obj in oss2.ObjectIterator(self.bucket, prefix=target_folder):
if obj.is_prefix():
# 目录
continue
else:
# 只下载指定后缀的文件,oss中xxx/xxx/也会被认为是文件,根据prefix而定
if obj.key.endswith(suffix):
# 下载
self.download_from_oss(target_folder, obj.key, local_save_path)
\ No newline at end of file
This diff is collapsed.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 22 22:38:22 2021
@author: rico
"""
from db import MYSQL,MSSQL,OSS
import pandas as pd
path = "/Users/rico/WorkSpace/1_Project/Company/中电中采/TEMP Workspace/ES处理相关/ES线下补参处理/(已确认)投影机结果202111241215280008.xlsx"
df = pd.read_excel(path,converters = {'FINISH_P_SKU':str})
export_df = df[['DOC_ID','DATA_BATCH','DATA_END_TIME','FLOW_NODE_STATUS','SOURCE_P_SKU','SOURCE_CHANNEL_NAME','SOURCE_CHANNEL_SNAME','SOURCE_P_NAME',
'SOURCE_P_LASTCATEGORY_NAME','SOURCE_P_BRAND_NAME','SOURCE_P_PRICE','SOURCE_P_URL','FINISH_P_REMARK','FINISH_P_BRAND_NAME','FINISH_P_BRAND_CODE','FINISH_P_LASTCATEGORY_NAME',
'FINISH_P_LASTCATEGORY_CODE','FINISH_P_MODEL','FINISH_P_SKU','FINISH_P_PARAMS','FINISH_P_NAME','FINISH_P_STATUS',
'FINISH_P_CHECK_STATUS']]
export_df = df[['DOC_ID','DATA_BATCH','DATA_END_TIME','FLOW_NODE_STATUS','SOURCE_P_SKU','SOURCE_CHANNEL_NAME','SOURCE_CHANNEL_SNAME','SOURCE_P_NAME',
'SOURCE_P_LASTCATEGORY_NAME','SOURCE_P_BRAND_NAME','SOURCE_P_PRICE','SOURCE_P_URL','FINISH_P_REMARK','FINISH_P_BRAND_NAME','FINISH_P_BRAND_CODE','FINISH_P_LASTCATEGORY_NAME',
'FINISH_P_LASTCATEGORY_CODE','FINISH_P_PARAMS','FINISH_P_NAME',
'FINISH_P_CHECK_STATUS']]
export_df = export_df.fillna('无')
mysql = MYSQL('39.105.1.55',2883,'ZD_PUBLIC_pro')
cursor = mysql._cur
#同步数据至DW层
tablename = "DW_PRODUCT_ALL_RES"
cols = ','.join(export_df.columns)
val = (tuple(i) for i in export_df.values)
sqlstr = "INSERT INTO {} ({}) VALUES ({})".format(tablename,cols,','.join(['%s']*len(export_df.columns)))
try:
cursor.executemany(sqlstr, val)
print('>>> 插入数据成功,表 {} 共插入 {} 行数据'.format(tablename,len(export_df)))
except Exception as e:
print('>>> 插入数据失败', e)
df= pd.read_excel("/Users/rico/WorkSpace/1_Project/Company/中电中采/TEMP Workspace/ES处理相关/ES线下补参处理/安徽三大类结果1119(1).xlsx")
for index,row in df.iterrows():
doc_id = row['DOC_ID']
name = row['SOURCE_P_LASTCATEGORY_NAME']
brand = row['SOURCE_P_BRAND_NAME']
cursor.execute(f"update DW_PRODUCT_ALL_RES set SOURCE_P_LASTCATEGORY_NAME = '{name}', SOURCE_P_BRAND_NAME = '{brand}' where DOC_ID = '{doc_id}'")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment