Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
重
重点类信息提取
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
重点类信息提取
Commits
5de9caf5
Commit
5de9caf5
authored
Apr 15, 2021
by
Jialin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据库更新
parent
4521e64e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
308 additions
and
0 deletions
+308
-0
产品品牌更新.py
公共代码/产品品牌更新.py
+199
-0
重复产品更新.py
公共代码/重复产品更新.py
+109
-0
No files found.
公共代码/产品品牌更新.py
0 → 100644
View file @
5de9caf5
#!usr/bin/env python
# -*- coding:utf-8 -*-
"""
@author: dell
@file: 产品品牌更新.py
@time: 2021/04/08
@desc:
"""
import
pymssql
import
pandas
as
pd
#import uuid
import
time
import
os
def
duplicate_product_update
(
filepath
,
category
,
sheet_name
=
0
):
timestr
=
time
.
strftime
(
'
%
Y-
%
m-
%
d
%
H
%
M
%
S'
,
time
.
localtime
())
folderpath
=
"./数据库更新日志"
folder
=
os
.
path
.
exists
(
folderpath
)
if
not
folder
:
os
.
makedirs
(
folderpath
)
logname
=
f
"{folderpath}/品牌更新{timestr}.txt"
with
open
(
f
"{logname}"
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
df
=
pd
.
read_excel
(
filepath
,
sheet_name
=
sheet_name
,
converters
=
{
'正确品牌'
:
str
})
conn_zi_new
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'zgcprice20200628'
,
database
=
'ZI_NEW'
,
autocommit
=
True
)
cursor_zi_new
=
conn_zi_new
.
cursor
()
conn_zi_test
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'zgcprice20200628'
,
database
=
'ZI_NEW_TEST'
,
autocommit
=
True
)
cursor_zi_test
=
conn_zi_test
.
cursor
()
conn_zdindex
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'zgcprice20200628'
,
database
=
'zdindex'
,
autocommit
=
True
)
cursor_zdindex
=
conn_zdindex
.
cursor
()
# cursor_zi_new.execute(f"select * from vw_property where name = '{category}' and identy != '0000'")
# db_params = pd.DataFrame(cursor_zi_new.fetchall(), columns=[tuple[0] for tuple in cursor_zi_new.description])
# db_params['subtitle'] = db_params['subtitle'].apply(lambda x: x.strip())
# 找到正确的类别id
cursor_zi_test
.
execute
(
f
"select id from p_category where name='{category}'"
)
category_id
=
cursor_zi_test
.
fetchall
()
if
len
(
category_id
)
>
1
:
print
(
f
"品牌名:'{category}' 在p_category中不是唯一的,请检查"
,
file
=
f
)
return
if
not
category_id
:
print
(
'输入的category不在p_category中'
,
file
=
f
)
return
category_id
=
category_id
[
0
][
0
]
# 找到该类别下的品牌名,放入existed_brand_list
cursor_zi_test
.
execute
(
f
"select distinct brandid from p_spu where categoryid={category_id}"
)
brand_id_fetch
=
cursor_zi_test
.
fetchall
()
brand_id_list
=
[]
for
brand_id_tuple
in
brand_id_fetch
:
brand_id_list
.
append
(
brand_id_tuple
[
0
])
existed_brand_list
=
[]
for
brand_id
in
brand_id_list
:
cursor_zi_test
.
execute
(
f
"select name from p_brand where id={brand_id}"
)
existed_brand_name
=
cursor_zi_test
.
fetchone
()
if
existed_brand_name
:
existed_brand_list
.
append
(
existed_brand_name
[
0
])
for
index
,
row
in
df
.
iterrows
():
if
type
(
row
[
'正确品牌'
])
!=
float
:
correct_brand
=
row
[
'正确品牌'
]
print
(
f
"index: {index},正确品牌为:{row['正确品牌']}"
,
file
=
f
)
brand_tobe_modified
=
[]
if
row
[
'品牌A'
]
!=
row
[
'正确品牌'
]:
brand_tobe_modified
.
append
(
row
[
'品牌A'
])
if
row
[
'品牌B'
]
!=
row
[
'正确品牌'
]:
brand_tobe_modified
.
append
(
row
[
'品牌B'
])
print
(
f
"brand to be modified:{brand_tobe_modified}"
,
file
=
f
)
# 找到正确的品牌id
if
correct_brand
not
in
existed_brand_list
:
cursor_zi_test
.
execute
(
f
"insert into p_brand (name) values ('{correct_brand}')"
)
# 除了名称,啥都没加
print
(
f
"'{correct_brand}' 被添加到了p_brand"
,
file
=
f
)
cursor_zi_test
.
execute
(
f
"select id from p_brand where name='{correct_brand}'"
)
#and id not in (select distinct pid from p_brand)
correct_brand_id
=
cursor_zi_test
.
fetchall
()
if
len
(
correct_brand_id
)
>
1
:
print
(
f
"品牌名:'{correct_brand}' 在p_brand中不唯一,请检查"
,
file
=
f
)
return
correct_brand_id
=
correct_brand_id
[
0
][
0
]
# 一个品牌一个品牌地改
for
wrong_brand
in
brand_tobe_modified
:
print
(
f
"正在修改'{wrong_brand}':"
,
file
=
f
)
cursor_zi_test
.
execute
(
f
"select id from p_brand where name='{wrong_brand}'"
)
wrong_brand_id
=
cursor_zi_test
.
fetchone
()
if
not
wrong_brand_id
:
print
(
f
"更改前的品牌:'{wrong_brand}' 不在p_brand中,请检查"
,
file
=
f
)
return
wrong_brand_id
=
wrong_brand_id
[
0
]
cursor_zi_test
.
execute
(
f
"select distinct spuname, id from p_spu where brandid={wrong_brand_id}"
)
# 我们把所有类别的该品牌id的spuname都挑出来了,spuname一起改
wrong_list
=
cursor_zi_test
.
fetchall
()
#找出所有的错误品牌的spuname,挨个改
if
not
wrong_list
:
print
(
f
"!!!!! 品牌id:'{wrong_brand_id}' 不在p_spu里,该品牌下spu或以被修改"
,
file
=
f
)
continue
# wrong_list里是所有要改的spuname, 它们的brandid还没改
print
(
f
" 需要更改的spuid:'{wrong_list}'"
,
file
=
f
)
for
wrong_tuple
in
wrong_list
:
print
(
f
" 正在更改的spuid:'{wrong_tuple[1]}'"
,
file
=
f
)
wrong_spuname
=
wrong_tuple
[
0
]
correct_spuname
=
wrong_spuname
.
replace
(
wrong_brand
,
correct_brand
)
# 生成新的spuname
cursor_zi_test
.
execute
(
f
"select id from p_spu where spuname='{correct_spuname}'"
)
spu_id_tuple
=
cursor_zi_test
.
fetchall
()
if
len
(
spu_id_tuple
)
>
1
:
print
(
f
"spuname:'{correct_spuname}'在p_spu表中不是唯一的,请检查"
,
file
=
f
)
return
if
spu_id_tuple
:
# 如果存在该spuname,提取它的id
spu_id
=
spu_id_tuple
[
0
][
0
]
change_name
=
0
else
:
# 如果不存在,直接更改该spuname,提取id
# spu = str(uuid.uuid1()).replace('-', '')
# cursor_zi_test.execute(f"select categoryid from p_spu where spuname='{wrong_spuname}'")
# spu_category_id = cursor_zi_test.fetchone()[0] # 这个是一定有的因为wrong_spuname就是从p_spu中提取的
# cursor_zi_test.execute(f"insert into p_spu values ('{correct_spuname}','{spu}',{spu_category_id},{wrong_brand_id})") #注意,我们新创建的spu的brandid是错误的
spu_id
=
wrong_tuple
[
1
]
# 注意,此时我们还没更改spuname只是提取了它的spuid
change_name
=
1
# 到这一步,我们已经取出了正确的spuid
wrong_spuid
=
wrong_tuple
[
1
]
cursor_zi_test
.
execute
(
f
"select skuname, sku from p_sku where spuid={wrong_spuid}"
)
wrong_sku_list
=
cursor_zi_test
.
fetchall
()
# 提取所有spuid是错误spuid的skuname, sku
# 接下来就是sku的命名过程
print
(
f
" 接下来是skuname更改:"
,
file
=
f
)
for
wrong_sku_tuple
in
wrong_sku_list
:
wrong_skuname
=
wrong_sku_tuple
[
0
]
wrong_skucode
=
wrong_sku_tuple
[
1
]
correct_skuname
=
wrong_skuname
.
replace
(
wrong_brand
,
correct_brand
)
cursor_zi_test
.
execute
(
f
"select skuname, sku from p_sku where skuname='{correct_skuname}'"
)
skuname
=
cursor_zi_test
.
fetchone
()
#我们只取了一个,因为我们觉得一个skuname应该只存在一个
if
skuname
:
#如果有返回结果,就代表有重复的sku
sku_name
=
skuname
[
0
]
# 正确的产品名称
sku_code
=
skuname
[
1
]
# 正确的产品编码
sku_name_repeated
=
sku_name
+
f
"(sku:{wrong_skucode};重复sku:{sku_code})"
cursor_zi_test
.
execute
(
f
"update p_sku set state=6, skuname='{sku_name_repeated}' where sku='{wrong_skucode}'"
)
print
(
f
" if successful:p_sku, '{wrong_skucode}'的 skuname updated to '{sku_name_repeated}', state updated to 6"
,
file
=
f
)
cursor_zi_test
.
execute
(
f
"update Productcode_Sku set sku='{sku_code}' where sku='{wrong_skucode}' "
)
print
(
f
" if successful:Productcode_Sku, sku updated from '{wrong_skucode}' into '{sku_code}'"
,
file
=
f
)
# cursor_zdindex.execute("select top 1 max(periods) from zd_electricity_price")
# zd_e_p = cursor_zdindex.fetchone()[0]
# cursor_zdindex.execute(
# f"update zd_electricity_price set goods_id='{sku_code}' where goods_id = '{wrong_skucode}' and periods = {zd_e_p} ")
# print(f" zd_electricity_price, goods_id updated from '{wrong_skucode}' into '{sku_code}'",file=f)
# cursor_zdindex.execute("select top 1 max(periods) from zd_purchase_price")
# zd_p_p = cursor_zdindex.fetchone()[0]
# cursor_zdindex.execute(
# f"update zd_purchase_price set goods_id='{sku_code}' where goods_id = '{wrong_skucode}' and periods = {zd_p_p} ")
# print(f" zd_purchase_price, goods_id updated from '{wrong_skucode}' into '{sku_code}'",file=f)
# cursor_zdindex.execute("select top 1 max(periods) from zd_entry_goods_price")
# zd_e_g_p = cursor_zdindex.fetchone()[0]
# cursor_zdindex.execute(
# f"update zd_entry_goods_price set goods_id='{sku_code}' where goods_id = '{wrong_skucode}' and periods = {zd_e_g_p} ")
# print(f" zd_entry_goods_price, goods_id updated from '{wrong_skucode}' into '{sku_code}'",file=f)
# cursor_zdindex.execute("select top 1 max(periods) from zd_week_price")
# zd_w_p = cursor_zdindex.fetchone()[0]
# cursor_zdindex.execute(
# f"update zd_week_price set goods_id='{sku_code}' where goods_id = '{wrong_skucode}' and periods = {zd_w_p} ")
# print(f" zd_week_price, goods_id updated from '{wrong_skucode}' into '{sku_code}'",file=f)
else
:
#如果新的skuname没有重复,直接修改该sku的skuname(spuid已经被修改了)
cursor_zi_test
.
execute
(
f
"update p_sku set skuname='{correct_skuname}' where skuname='{wrong_skuname}' and spuid={wrong_spuid}"
)
print
(
f
" p_sku, skuname updated from '{wrong_skuname}' into '{correct_skuname}' where spuid='{wrong_spuid}'"
,
file
=
f
)
# 改完skuname后,更改spuid
cursor_zi_test
.
execute
(
f
"update p_sku set spuid={spu_id} where spuid={wrong_spuid}"
)
# 我们只更改了p_sku表里的spuid,其他表有没有不知道
print
(
f
" if successful:更改了p_sku中的spuid,从'{wrong_spuid}'改到了'{spu_id}'"
,
file
=
f
)
if
change_name
==
1
:
# 如果,需要改名字,我们把spu表的spuname改了
cursor_zi_test
.
execute
(
f
"update p_spu set spuname='{correct_spuname}' where spuname='{wrong_spuname}'"
)
print
(
f
" if successful: 更改了 p_spu 中的 spuname to '{correct_spuname}' where spuname='{wrong_spuname}'"
,
file
=
f
)
# 改完所有的spuid后我们改brandid
cursor_zi_test
.
execute
(
f
"update p_spu set brandid={correct_brand_id} where brandid={wrong_brand_id}"
)
# 我们把所有类别的该品牌id都改了
print
(
f
"if successful:更改了p_spu中的brandid, 从'{wrong_brand_id}'改到了'{correct_brand_id}'"
,
file
=
f
)
print
(
''
,
file
=
f
)
if
__name__
==
'__main__'
:
filepath
=
"E:
\\
ZDZC
\\
扫描仪产品品牌分析.xlsx"
category
=
'扫描仪'
duplicate_product_update
(
filepath
,
category
)
公共代码/重复产品更新.py
0 → 100644
View file @
5de9caf5
#!usr/bin/env python
# -*- coding:utf-8 -*-
"""
@author: dell
@file: 重复产品更新.py
@time: 2021/04/07
@desc:
"""
import
pymssql
import
pandas
as
pd
import
time
import
os
def
duplicate_product_update
(
filepath
,
sheet_name
=
0
):
df
=
pd
.
read_excel
(
filepath
,
sheet_name
=
sheet_name
,
converters
=
{
'产品编码'
:
str
})
conn_zi_new
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'zgcprice20200628'
,
database
=
'ZI_NEW'
,
autocommit
=
True
)
cursor_zi_new
=
conn_zi_new
.
cursor
()
conn_zi_test
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'zgcprice20200628'
,
database
=
'ZI_NEW_TEST'
,
autocommit
=
True
)
cursor_zi_test
=
conn_zi_test
.
cursor
()
conn_zdindex
=
pymssql
.
connect
(
host
=
'123.56.115.207'
,
user
=
'zgcprice3311'
,
password
=
'zgcprice20200628'
,
database
=
'zdindex'
,
autocommit
=
True
)
cursor_zdindex
=
conn_zdindex
.
cursor
()
timestr
=
time
.
strftime
(
'
%
Y-
%
m-
%
d
%
H
%
M
%
S'
,
time
.
localtime
())
folderpath
=
"./数据库更新日志"
folder
=
os
.
path
.
exists
(
folderpath
)
if
not
folder
:
os
.
makedirs
(
folderpath
)
logname
=
f
"{folderpath}/重复产品更新{timestr}.txt"
with
open
(
f
"{logname}"
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
for
index
,
row
in
df
.
iterrows
():
print
(
f
"index:{index}"
,
file
=
f
)
product_id
=
row
[
'产品编码'
]
product_id_correct
=
row
[
'正确产品编码'
]
product_name_correct
=
row
[
'正确产品名称'
]
# .split('(')[0].strip()
cursor_zi_new
.
execute
(
f
"update p_sku set state=6, skuname='{product_name_correct}' where sku='{product_id}' "
)
print
(
f
" if successful: p_sku表,sku编码为'{product_id}'的数据,state被改为6,skuname被改为'{product_name_correct}'"
,
file
=
f
)
cursor_zi_new
.
execute
(
f
"update Productcode_Sku set sku='{product_id_correct}' where sku='{product_id}' "
)
print
(
f
" if successful: Productcode_Sku表,sku编码为'{product_id}'的数据,sku被改为'{product_id_correct}'"
,
file
=
f
)
cursor_zdindex
.
execute
(
"select top 1 max(periods) from zd_electricity_price"
)
zd_e_p
=
cursor_zdindex
.
fetchone
()[
0
]
cursor_zdindex
.
execute
(
f
"update zd_electricity_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = {zd_e_p} "
)
print
(
f
" if successful: zd_electricity_price表,goods_id为'{product_id}'且periods为最新的数据,goods_id被改为'{product_id_correct}'"
,
file
=
f
)
cursor_zdindex
.
execute
(
"select top 1 max(periods) from zd_purchase_price"
)
zd_p_p
=
cursor_zdindex
.
fetchone
()[
0
]
cursor_zdindex
.
execute
(
f
"update zd_purchase_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = {zd_p_p} "
)
print
(
f
" if successful: zd_purchase_price表,goods_id为'{product_id}'且periods为最新的数据,goods_id被改为'{product_id_correct}'"
,
file
=
f
)
cursor_zdindex
.
execute
(
"select top 1 max(periods) from zd_entry_goods_price"
)
zd_e_g_p
=
cursor_zdindex
.
fetchone
()[
0
]
cursor_zdindex
.
execute
(
f
"update zd_entry_goods_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = {zd_e_g_p} "
)
print
(
f
" if successful: zd_entry_goods_price表,goods_id为'{product_id}'且periods为最新的数据,goods_id被改为'{product_id_correct}'"
,
file
=
f
)
cursor_zdindex
.
execute
(
"select top 1 max(periods) from zd_week_price"
)
zd_w_p
=
cursor_zdindex
.
fetchone
()[
0
]
cursor_zdindex
.
execute
(
f
"update zd_week_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = {zd_w_p} "
)
print
(
f
" if successful: zd_week_price表,goods_id为'{product_id}'且periods为最新的数据,goods_id被改为'{product_id_correct}'"
,
file
=
f
)
if
__name__
==
'__main__'
:
# filepath="E:\\ZDZC\\激光打印机产品重复型号分析.xlsx"
filepath
=
"E:
\\
ZDZC
\\
处理后数据-扫描仪数据0407.xlsx"
sheet_name
=
'重复产品'
duplicate_product_update
(
filepath
)
# filepath="E:\\ZDZC\\处理后数据-扫描仪数据0407.xlsx"
# sheet_name='重复产品'
# df=pd.read_excel(filepath,sheet_name=sheet_name,converters={'产品编码': str})
# conn_zi_new=pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628', database='ZI_NEW', autocommit=True)
# cursor_zi_new=conn_zi_new.cursor()
# conn_zi_test=pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
# database='ZI_NEW_TEST', autocommit=True)
# cursor_zi_test=conn_zi_test.cursor()
# conn_zdindex=pymssql.connect(host='123.56.115.207', user='zgcprice3311', password='zgcprice20200628',
# database='zdindex', autocommit=True)
# cursor_zdindex=conn_zdindex.cursor()
#
# for index,row in df.iterrows():
# product_id=row['产品编码']
# product_id_correct=row['正确编码']
# product_name_correct=row['修改状态,错误编码下报价改为正确编码,替换错误编码下商品名称'] # .split('(')[0].strip()
#
# cursor_zi_new.execute(f"select skuname, state, sku from p_sku where sku='{product_id}'")
# output=cursor_zi_new.fetchone()
# # print((output))
# # cursor_zi_test.execute(f"insert into p_sku (skuname, state, sku) values ({output[0]},{output[1]},'{product_id}')")
# # cursor_zi_test.execute(f"select * from p_sku where sku={product_id}")
# # out_put=cursor_zi_test.fetchall()
# # print(out_put)
#
# cursor_zi_new.execute(f"update p_sku set state=6, skuname='{product_name_correct}' where sku='{product_id}' ")
# cursor_zi_new.execute(f"update Productcode_Sku set sku='{product_id_correct}' where sku='{product_id}' ")
# cursor_zdindex.execute(f"update zd_electricity_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = 20210405 ")
# cursor_zdindex.execute(
# f"update zd_purchase_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = 20210405 ")
# cursor_zdindex.execute(
# f"update zd_entry_goods_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = 20210405 ")
# cursor_zdindex.execute(
# f"update zd_week_price set goods_id='{product_id_correct}' where goods_id = '{product_id}' and periods = 20210405 ")
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment