Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
重
重点类信息提取
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
重点类信息提取
Commits
6c787da6
Commit
6c787da6
authored
Apr 08, 2021
by
Jialin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
产品品牌分析重新修改了,然后重新生成了brand_filter
parent
ac0f7adb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
19 deletions
+19
-19
brand_filter.xlsx
公共代码/brand_filter.xlsx
+0
-0
产品品牌分析.py
公共代码/产品品牌分析.py
+19
-19
No files found.
公共代码/brand_filter.xlsx
0 → 100644
View file @
6c787da6
File added
公共代码/产品品牌分析.py
View file @
6c787da6
...
@@ -8,7 +8,7 @@ import re
...
@@ -8,7 +8,7 @@ import re
import
xlsxwriter
import
xlsxwriter
def
brand_washing
(
filepath
,
thre
=
0.
5
,
inner_thre
=
0.8
,
a
=
1
,
sheet_name
=
0
):
def
brand_washing
(
filepath
,
thre
=
0.
4
,
inner_thre
=
0.5
,
a
=
1
,
sheet_name
=
0
):
# filepath:文件路径,thre为两个品牌下型号重合率阈值,inner_thre为两个品牌下某条型号内关键词重合率阈值,a为权重调整,sheet_name为表单名
# filepath:文件路径,thre为两个品牌下型号重合率阈值,inner_thre为两个品牌下某条型号内关键词重合率阈值,a为权重调整,sheet_name为表单名
df
=
pd
.
read_excel
(
filepath
,
sheet_name
=
sheet_name
,
converters
=
{
'产品编码'
:
str
})
df
=
pd
.
read_excel
(
filepath
,
sheet_name
=
sheet_name
,
converters
=
{
'产品编码'
:
str
})
# 处理缺失值
# 处理缺失值
...
@@ -158,43 +158,43 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.8,a=1,sheet_name=0):
...
@@ -158,43 +158,43 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.8,a=1,sheet_name=0):
# 第一个worksheet
# 第一个worksheet
worksheet
=
workbook
.
add_worksheet
(
name
=
'Sheet1'
)
worksheet
=
workbook
.
add_worksheet
(
name
=
'Sheet1'
)
worksheet
.
write
(
'A1'
,
'品牌A
-1
'
,
bold_format
)
worksheet
.
write
(
'A1'
,
'品牌A'
,
bold_format
)
worksheet
.
write
(
'B1'
,
'品牌B
-1
'
,
bold_format
)
worksheet
.
write
(
'B1'
,
'品牌B'
,
bold_format
)
worksheet
.
write
(
'C1'
,
'
品牌-1
'
,
bold_format
)
worksheet
.
write
(
'C1'
,
'
正确品牌
'
,
bold_format
)
worksheet
.
write
(
'D1'
,
'
品牌A-2
'
,
bold_format
)
worksheet
.
write
(
'D1'
,
'
方法
'
,
bold_format
)
worksheet
.
write
(
'E1'
,
'品牌B-2'
,
bold_format
)
#
worksheet.write('E1', '品牌B-2', bold_format)
worksheet
.
write
(
'F1'
,
'品牌-2'
,
bold_format
)
#
worksheet.write('F1', '品牌-2', bold_format)
worksheet
.
write
(
'G1'
,
'品牌A-3'
,
bold_format
)
#
worksheet.write('G1', '品牌A-3', bold_format)
worksheet
.
write
(
'H1'
,
'品牌B-3'
,
bold_format
)
#
worksheet.write('H1', '品牌B-3', bold_format)
worksheet
.
write
(
'I1'
,
'品牌-3'
,
bold_format
)
#
worksheet.write('I1', '品牌-3', bold_format)
col
=
0
col
=
0
row
=
1
row
=
1
for
list_i
in
related_brand1
:
for
list_i
in
related_brand1
:
for
brand_i
in
range
(
2
):
for
brand_i
in
range
(
2
):
worksheet
.
write_string
(
row
,
col
,
list_i
[
brand_i
])
worksheet
.
write_string
(
row
,
col
,
list_i
[
brand_i
])
col
+=
1
col
+=
1
col
=
3
worksheet
.
write_string
(
row
,
col
,
'1'
)
row
+=
1
row
+=
1
col
=
0
col
=
0
col
=
3
row
=
1
for
list_i
in
related_brand2
:
for
list_i
in
related_brand2
:
for
brand_i
in
range
(
2
):
for
brand_i
in
range
(
2
):
worksheet
.
write_string
(
row
,
col
,
list_i
[
brand_i
])
worksheet
.
write_string
(
row
,
col
,
list_i
[
brand_i
])
col
+=
1
col
+=
1
row
+=
1
col
=
3
col
=
3
worksheet
.
write_string
(
row
,
col
,
'2'
)
col
=
6
row
+=
1
row
=
1
col
=
0
for
list_i
in
related_brand3
:
for
list_i
in
related_brand3
:
for
brand_i
in
range
(
2
):
for
brand_i
in
range
(
2
):
worksheet
.
write_string
(
row
,
col
,
list_i
[
brand_i
])
worksheet
.
write_string
(
row
,
col
,
list_i
[
brand_i
])
col
+=
1
col
+=
1
col
=
3
worksheet
.
write_string
(
row
,
col
,
'3'
)
row
+=
1
row
+=
1
col
=
6
col
=
0
# 第二个worksheet
# 第二个worksheet
worksheet2
=
workbook
.
add_worksheet
(
name
=
'Sheet2'
)
worksheet2
=
workbook
.
add_worksheet
(
name
=
'Sheet2'
)
...
@@ -244,6 +244,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.8,a=1,sheet_name=0):
...
@@ -244,6 +244,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.8,a=1,sheet_name=0):
workbook
.
close
()
workbook
.
close
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
filepath
=
'E:
\\
ZDZC
\\
激光打印机
参数确认.xlsx'
filepath
=
'E:
\\
ZDZC
\\
扫描仪
参数确认.xlsx'
brand_washing
(
filepath
)
brand_washing
(
filepath
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment