Commit ded69e78 authored by Jialin's avatar Jialin

整理后的代码

parent e6658a7a
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (tf14)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/公共代码.iml" filepath="$PROJECT_DIR$/.idea/公共代码.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="a6309c41-68db-41c7-97b3-a479bb044273" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/错误品牌分析.py" beforeDir="false" afterPath="$PROJECT_DIR$/错误品牌分析.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
</component>
<component name="ProjectConfigurationFiles">
<option name="files">
<list>
<option value="$PROJECT_DIR$/.idea/公共代码.iml" />
<option value="$PROJECT_DIR$/.idea/vcs.xml" />
<option value="$PROJECT_DIR$/.idea/misc.xml" />
<option value="$PROJECT_DIR$/.idea/modules.xml" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="-10" />
<option name="y" value="-10" />
<option name="width" value="1750" />
<option name="height" value="1030" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
<component name="ProjectView">
<navigator currentView="Scope" currentSubView="Scope 'Project Files'; set:Project Files; class com.intellij.psi.search.scope.ProjectFilesScope" proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="公共代码" type="b2602c69:ProjectViewProjectNode" />
<item name="公共代码" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope">
<subPane subId="Scope 'Project Files'; set:Project Files; class com.intellij.psi.search.scope.ProjectFilesScope">
<expand>
<path>
<item name="公共代码" type="3d21c010:ScopeViewTreeModel$ProjectNode" />
<item name="E:\ZDZC\local_library\公共代码" type="442cc68d:ScopeViewTreeModel$RootNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="last_opened_file_path" value="D:/PycharmProjects/ZDZC" />
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager">
<configuration name="错误产品分析" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="公共代码" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/错误产品分析.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.错误产品分析" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="a6309c41-68db-41c7-97b3-a479bb044273" name="Default Changelist" comment="" />
<created>1617262122270</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1617262122270</updated>
<workItem from="1617262134477" duration="238000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="238000" />
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1550" height="878" extended-state="6" />
<layout>
<window_info id="Favorites" side_tool="true" />
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info anchor="bottom" id="Docker" show_stripe_button="false" />
<window_info anchor="bottom" id="Database Changes" />
<window_info anchor="bottom" id="Version Control" />
<window_info anchor="bottom" id="Python Console" />
<window_info anchor="bottom" id="Terminal" />
<window_info anchor="bottom" id="Event Log" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.3297587" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="right" id="SciView" />
<window_info anchor="right" id="Database" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/$.coverage" NAME="错误产品分析 Coverage Results" MODIFIED="1617262275728" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/错误品牌分析.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-63">
<caret line="15" column="25" selection-start-line="15" selection-start-column="25" selection-end-line="15" selection-end-column="25" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/错误产品分析.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="252">
<caret line="24" column="20" lean-forward="true" selection-start-line="24" selection-start-column="20" selection-end-line="24" selection-end-column="20" />
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
\ No newline at end of file
...@@ -244,6 +244,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0): ...@@ -244,6 +244,6 @@ def brand_washing(filepath,thre=0.5,inner_thre=0.5,a=1,sheet_name=0):
workbook.close() workbook.close()
if __name__ == '__main__': if __name__ == '__main__':
filepath = 'E:\\ZDZC\\激光打印机参数确认.xlsx' filepath = 'E:\\ZDZC\\扫描仪参数确认.xlsx'
brand_washing(filepath) brand_washing(filepath)
...@@ -19,7 +19,7 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -19,7 +19,7 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
df = pd.read_excel(filepath,converters = {'产品编码':str}) df = pd.read_excel(filepath,converters = {'产品编码':str})
df.drop(columns='Unnamed: 0', axis=1, inplace=True) df.drop(columns='Unnamed: 0', axis=1, inplace=True)
# 检测每列数据中 同一数据类型的产品数量少于产品总数量的b 的产品index.(可优化成字典形式存储读取) # 检测每列数据中 同一数据类型的产品数量少于产品总数量的b 的产品index.(可优化成字典形式存储读取)
dtype_minor = [] dtype_minor_dict = {}
for col in df.columns: for col in df.columns:
type_list = {} type_list = {}
valid_index = df.index[(df[col] != '暂无数据') == ((df[col] != '无参数,需补充') == (df[col].notnull()))] valid_index = df.index[(df[col] != '暂无数据') == ((df[col] != '无参数,需补充') == (df[col].notnull()))]
...@@ -33,7 +33,13 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -33,7 +33,13 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
if type_list[data_type_i] < len(valid_index) * b: if type_list[data_type_i] < len(valid_index) * b:
for i in valid_index: for i in valid_index:
if type(df.loc[i][col]) == data_type_i: if type(df.loc[i][col]) == data_type_i:
dtype_minor.append(i) if i in dtype_minor_dict.keys():
dtype_minor_dict[i] += ' '+col
elif i not in dtype_minor_dict.keys():
dtype_minor_dict[i] = col
dtype_minor=[]
dtype_minor.extend(dtype_minor_dict.keys())
# 在检测完产品数据类型后,将所有数据类型转换为string # 在检测完产品数据类型后,将所有数据类型转换为string
for col in df.columns: for col in df.columns:
...@@ -79,7 +85,7 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -79,7 +85,7 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
brand_minor.append(i) brand_minor.append(i)
# 检测产品参数列中数据长度离该列平均数据长度2*std之外的产品,7是第一个产品参数列,-2是质保时间,-1是产品型号 # 检测产品参数列中数据长度离该列平均数据长度2*std之外的产品,7是第一个产品参数列,-2是质保时间,-1是产品型号
length_minor = [] length_minor_dict = {}
for col in df.columns[7:-2]: for col in df.columns[7:-2]:
col_length = [] col_length = []
valid_index = df.index[(df[col] != '暂无数据') == ((df[col] != '无参数,需补充') == (df[col].notnull()))] valid_index = df.index[(df[col] != '暂无数据') == ((df[col] != '无参数,需补充') == (df[col].notnull()))]
...@@ -89,10 +95,16 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -89,10 +95,16 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
mean = np.array(col_length).mean() mean = np.array(col_length).mean()
for counter, length in enumerate(col_length): for counter, length in enumerate(col_length):
if length < mean - 2 * std or length > mean + 2 * std: if length < mean - 2 * std or length > mean + 2 * std:
length_minor.append(valid_index[counter]) # length_minor_dict[valid_index[counter]]=col
index=valid_index[counter]
if index in length_minor_dict.keys():
length_minor_dict[index] += ' ' + col
elif index not in length_minor_dict.keys():
length_minor_dict[index] = col
length_minor=[]
length_minor.extend(length_minor_dict.keys())
# 检测产品参数列数据格式小于总数量的b的产品 # 检测产品参数列数据格式小于总数量的b的产品
format_minor = [] format_minor_dict = {}
for col in df.columns[7:-2]: for col in df.columns[7:-2]:
counter_dict = {} counter_dict = {}
valid_index = df.index[(df[col] != '暂无数据') == ((df[col] != '无参数,需补充') == (df[col].notnull()))] valid_index = df.index[(df[col] != '暂无数据') == ((df[col] != '无参数,需补充') == (df[col].notnull()))]
...@@ -119,7 +131,15 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -119,7 +131,15 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
for keys in counter_dict: for keys in counter_dict:
if len(counter_dict[keys]) < len(valid_index)*b: if len(counter_dict[keys]) < len(valid_index)*b:
format_minor.extend(counter_dict[keys]) for keys_index in counter_dict[keys]:
if keys_index in format_minor_dict.keys():
format_minor_dict[keys_index] += ' ' + col
elif keys_index not in format_minor_dict.keys():
format_minor_dict[keys_index] = col
format_minor=[]
format_minor.extend(format_minor_dict.keys())
# length_record = [] # length_record = []
# for keys in counter_dict: # for keys in counter_dict:
# if not length_record: # if not length_record:
...@@ -176,11 +196,11 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -176,11 +196,11 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
count += 1 count += 1
final_df.loc[i, '产品类型异常'] = 1 final_df.loc[i, '产品类型异常'] = 1
if i in format_minor: if i in format_minor:
count += 1 count += len(format_minor_dict[i].split()) # 如果该行数据有多列数据格式异常,就要加多次,
final_df.loc[i, '数据格式异常'] = 1 final_df.loc[i, '数据格式异常'] = format_minor_dict[i] # 但其中数据是空格分割的字符串,所以用split
if i in length_minor: if i in length_minor:
count += 1 count += len(length_minor_dict[i].split())
final_df.loc[i, '数据长度异常'] = 1 final_df.loc[i, '数据长度异常'] = length_minor_dict[i]
if i in brand_minor: if i in brand_minor:
count += 1 count += 1
final_df.loc[i, '品牌异常'] = 1 final_df.loc[i, '品牌异常'] = 1
...@@ -191,8 +211,8 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -191,8 +211,8 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
count += 1 count += 1
final_df.loc[i, '产品名称异常'] = 1 final_df.loc[i, '产品名称异常'] = 1
if i in dtype_minor: if i in dtype_minor:
count += 1 count += len(dtype_minor_dict[i].split())
final_df.loc[i, '数据类型异常'] = 1 final_df.loc[i, '数据类型异常'] = dtype_minor_dict[i]
for keys_i in character_minor_dict: for keys_i in character_minor_dict:
if i in character_minor_dict[keys_i]: if i in character_minor_dict[keys_i]:
final_df.loc[i, keys_i + '异常'] = 1 final_df.loc[i, keys_i + '异常'] = 1
...@@ -207,7 +227,10 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01): ...@@ -207,7 +227,10 @@ def class_washing(category, filepath, c_list,a=0.02, b=0.01):
if __name__ == '__main__': if __name__ == '__main__':
category='激光打印机' #category='激光打印机'
filepath="E:\\ZDZC\\激光打印机参数确认.xlsx" #filepath="E:\\ZDZC\\激光打印机参数确认.xlsx"
c_list=[6,7,-4,-3] #c_list=[6,7,-4,-3]
category = '扫描仪'
filepath="E:\\ZDZC\\扫描仪参数确认.xlsx"
c_list=[7,8,9]
class_washing(category, filepath, c_list) class_washing(category, filepath, c_list)
...@@ -149,5 +149,5 @@ def product_washing(filepath, thre=1, a=0): ...@@ -149,5 +149,5 @@ def product_washing(filepath, thre=1, a=0):
if __name__ == '__main__': if __name__ == '__main__':
filepath = "E:\\ZDZC\\激光打印机参数确认.xlsx" filepath = "E:\\ZDZC\\扫描仪参数确认.xlsx"
product_washing(filepath) product_washing(filepath)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment