Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
重
重点类信息提取
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
重点类信息提取
Commits
3b298f82
Commit
3b298f82
authored
Apr 16, 2021
by
Jialin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
真·真·代码最终修改
parent
5de9caf5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
36 additions
and
18 deletions
+36
-18
产品品牌分析.py
公共代码/产品品牌分析.py
+1
-1
产品类别分析.py
公共代码/产品类别分析.py
+26
-11
产品重复型号分析.py
公共代码/产品重复型号分析.py
+9
-6
异常数据表格.xlsx
公共代码/异常数据表格.xlsx
+0
-0
No files found.
公共代码/产品品牌分析.py
View file @
3b298f82
...
@@ -277,6 +277,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
...
@@ -277,6 +277,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
workbook
.
close
()
workbook
.
close
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
filepath
=
'E:
\\
ZDZC
\\
扫描仪
参数确认.xlsx'
filepath
=
'E:
\\
ZDZC
\\
激光打印机
参数确认.xlsx'
brand_washing
(
filepath
)
brand_washing
(
filepath
)
公共代码/产品类别分析.py
View file @
3b298f82
...
@@ -229,21 +229,36 @@ def class_washing(category, filepath, b=0.01):
...
@@ -229,21 +229,36 @@ def class_washing(category, filepath, b=0.01):
for
special_name
in
name_col_list
:
for
special_name
in
name_col_list
:
if
special_name
in
df
.
loc
[
i
,
'产品名称'
]:
if
special_name
in
df
.
loc
[
i
,
'产品名称'
]:
break
break
not_in_name2
.
append
(
i
)
else
:
not_in_name2
.
append
(
i
)
# 对于标准产品参数中,如果有数据不在标准字典中的,挑出来
# 对于标准产品参数中,如果有数据不在标准字典中的,挑出来
t6
=
time
.
time
()
t6
=
time
.
time
()
print
(
t6
-
t5
)
print
(
t6
-
t5
)
print
(
'开始检测标准参数'
)
print
(
'开始检测标准参数'
)
# cursor_zi_new.execute(
# f"select col_value from data_washing_external where category_name='{category}' and col_name='标准参数异常'")
# stdparam_col_fetch = cursor_zi_new.fetchall()
# if not stdparam_col_fetch:
# print('该类别无‘标准参数异常’列名,或者类别名输入有误,请查证数据库。代码继续运行')
# stdparam_col_list = []
# for stdparam_tuple in stdparam_col_fetch:
# stdparam_col_list.append(stdparam_tuple[0])
cursor_zi_new
.
execute
(
cursor_zi_new
.
execute
(
f
"select col_value from data_washing_external where category_name='{category}' and col_name='标准参数异常'"
)
f
"select subtitle, identy from vw_property where categoryid={category_id} and identy!='{0000}'"
stdparam_col_fetch
=
cursor_zi_new
.
fetchall
()
)
if
not
stdparam_col_fetch
:
subtitle_fetch
=
cursor_zi_new
.
fetchall
()
if
not
subtitle_fetch
:
print
(
'该类别无‘标准参数异常’列名,或者类别名输入有误,请查证数据库。代码继续运行'
)
print
(
'该类别无‘标准参数异常’列名,或者类别名输入有误,请查证数据库。代码继续运行'
)
stdparam_col_list
=
[]
stdparam_col_list
=
[]
for
stdparam_tuple
in
stdparam_col_fetch
:
for
subtitle_tuple
in
subtitle_fetch
:
stdparam_col_list
.
append
(
stdparam_tuple
[
0
])
if
subtitle_tuple
[
1
][
2
]
==
'1'
:
# identy第三个值为1代表是标准项
stdparam_col_list
.
append
(
subtitle_tuple
[
0
])
if
not
stdparam_col_list
:
print
(
'该类别无标准参数项'
)
character_minor_dict
=
{}
character_minor_dict
=
{}
for
col_i
in
stdparam_col_list
:
for
col_i
in
stdparam_col_list
:
...
@@ -257,9 +272,9 @@ def class_washing(category, filepath, b=0.01):
...
@@ -257,9 +272,9 @@ def class_washing(category, filepath, b=0.01):
standard_value
=
[]
standard_value
=
[]
for
std_tuple
in
standard_value_fetchall
:
for
std_tuple
in
standard_value_fetchall
:
standard_value
.
append
(
std_tuple
[
0
])
standard_value
.
append
(
std_tuple
[
0
])
# print(f"{col_i},{len(standard_value_fetchall)}")
for
i
in
df
.
index
:
for
i
in
df
.
index
:
value_col
=
df
.
loc
[
i
,
col_i
]
value_col
=
df
.
loc
[
i
,
'*'
+
col_i
]
if
value_col
in
invalid_list
:
if
value_col
in
invalid_list
:
continue
continue
if
value_col
not
in
standard_value
:
if
value_col
not
in
standard_value
:
...
@@ -341,6 +356,6 @@ def class_washing(category, filepath, b=0.01):
...
@@ -341,6 +356,6 @@ def class_washing(category, filepath, b=0.01):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
category
=
'
扫描仪
'
category
=
'
激光打印机
'
filepath
=
"E:
\\
ZDZC
\\
扫描仪
参数确认.xlsx"
filepath
=
"E:
\\
ZDZC
\\
打印机
参数确认.xlsx"
class_washing
(
category
,
filepath
)
class_washing
(
category
,
filepath
)
公共代码/产品重复型号分析.py
View file @
3b298f82
...
@@ -105,18 +105,21 @@ def product_washing(filepath, category, thre=1, a=0):
...
@@ -105,18 +105,21 @@ def product_washing(filepath, category, thre=1, a=0):
other_parameters_values
.
append
(
brand
[
1
]
.
loc
[
i
,
parameter
])
other_parameters_values
.
append
(
brand
[
1
]
.
loc
[
i
,
parameter
])
tempo_dict
[
i
]
.
extend
(
other_parameters_values
)
tempo_dict
[
i
]
.
extend
(
other_parameters_values
)
# 对比产品参数项,相同则放入related_product
# 对比产品参数项,相同则放入related_product
tested_product
=
[]
tested_product
=
[]
for
i
in
tempo_dict
:
for
i
in
tempo_dict
:
for
j
in
tempo_dict
:
for
j
in
tempo_dict
:
if
i
!=
j
and
set
([
i
,
j
])
not
in
tested_product
:
if
i
!=
j
and
set
([
i
,
j
])
not
in
tested_product
:
count
=
0
for
index
in
range
(
1
,
len
(
tempo_dict
[
i
])):
for
index
in
range
(
1
,
len
(
tempo_dict
[
i
])):
if
tempo_dict
[
i
][
index
]
!=
tempo_dict
[
j
][
index
]
and
\
if
tempo_dict
[
i
][
index
]
!=
tempo_dict
[
j
][
index
]
and
\
(
tempo_dict
[
i
][
index
]
not
in
invalid_list
and
tempo_dict
[
j
][
index
]
not
in
invalid_list
):
(
tempo_dict
[
i
][
index
]
not
in
invalid_list
and
tempo_dict
[
j
][
index
]
not
in
invalid_list
):
tested_product
.
append
(
set
([
i
,
j
]))
count
+=
1
break
# 如果出现了必须相等但不相等的参数,则退出到上一层循环。如果没有出现,则对比型号
break
if
count
!=
0
:
tested_product
.
append
(
set
([
i
,
j
]))
break
# 如果出现了必须相等但不相等的参数,则退出到上一层循环。如果没有出现,则对比型号
accuracy_i
=
0
accuracy_i
=
0
accuracy_j
=
0
accuracy_j
=
0
for
word_i
in
tempo_dict
[
i
][
0
]:
for
word_i
in
tempo_dict
[
i
][
0
]:
...
@@ -174,6 +177,6 @@ def product_washing(filepath, category, thre=1, a=0):
...
@@ -174,6 +177,6 @@ def product_washing(filepath, category, thre=1, a=0):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
filepath
=
"E:
\\
ZDZC
\\
扫描仪
参数确认.xlsx"
filepath
=
"E:
\\
ZDZC
\\
激光打印机
参数确认.xlsx"
category
=
'
扫描仪
'
category
=
'
激光打印机
'
product_washing
(
filepath
,
category
)
product_washing
(
filepath
,
category
)
公共代码/异常数据表格.xlsx
View file @
3b298f82
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment