Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tool-code
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZGC_INDEX
tool-code
Commits
c03a06b0
Commit
c03a06b0
authored
Dec 03, 2021
by
rico.liu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
afe6589d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
168 deletions
+5
-168
zgcindex_data2DB.py
数据治理平台线下处理/线下建库/zgcindex_data2DB.py
+0
-151
zgcindex_sync_excel2ob.py
数据治理平台线下处理/线下建库/zgcindex_sync_excel2ob.py
+5
-17
No files found.
数据治理平台线下处理/线下建库/zgcindex_data2DB.py
View file @
c03a06b0
...
...
@@ -242,29 +242,6 @@ for category in category_list:
cat_sku_list
=
cat_df
[
'FINISH_P_SKU'
]
.
tolist
()
create_price_difference_single
(
category
,
cat_sku_list
)
mssql_index
=
MSSQL
(
'123.56.115.207'
,
'ZI_DataBase'
)
cursor_zdindex
=
mssql_index
.
_cur
cursor_zdindex
.
execute
(
f
"select ProductCode,MaximumPrice from ZI_Price_Quote"
)
price_df
=
pd
.
DataFrame
(
cursor_zdindex
.
fetchall
(),
columns
=
[
tuple
[
0
]
for
tuple
in
cursor_zdindex
.
description
])
for
_id
,
sku
in
zip
(
doc_id_list
,
sku_list
):
try
:
price
=
price_df
[
price_df
[
'ProductCode'
]
==
sku
][
'MaximumPrice'
]
.
tolist
()[
0
]
price
=
str
(
float
(
price
))
print
(
price
)
except
:
price
=
"0.0"
cursor
.
execute
(
f
"update DW_PRODUCT_ALL_RES set FINISH_P_PRICE = '{price}' where DOC_ID = {_id}"
)
#组织SKU对应关系数据(暂未完成)
sku_list
=
[]
cust_sku_list
=
[]
...
...
@@ -301,130 +278,3 @@ res = stock_in_sku_relationship(cust_sku_list,sku_list,channel_alias_list)
print
(
res
)
print
(
"SKU对应关系录入完成"
)
'''
import requests
pic_list = []
pic_power = 1
all_count = len(df)
split_count = 100
for_count = int(all_count/split_count)+1
start_line = 0
end_line = split_count
for i in range(for_count):
i = i+1
if i == for_count:
df_split = df.iloc[start_line:all_count]
if df_split.empty:
break
time = str(datetime.datetime.now())
print(f"第{i}批次开始处理,起始行:{start_line},终止行:{all_count},开始时间:{time}")
else:
time = str(datetime.datetime.now())
print(f"第{i}批次开始处理,起始行:{start_line},终止行:{end_line},开始时间:{time}")
df_split = df.iloc[start_line:end_line]
start_line = end_line
end_line = end_line + split_count
#请求地址
request_url = "http://59.110.219.171:8092/return_data"
#组织请求数据
data_list = str({'data':[[str(url),str(price_source_e2c[source]),pic_power] for url,source in zip(df_split['SOURCE_P_URL'].tolist(),df_split['SOURCE_CHANNEL_SNAME'].tolist())]}).replace("'","
\"
")
payload={'dataList': f'{data_list}'}
response = requests.request("POST", request_url, data=payload)
res = eval(response.text)
#处理未爬取到的数据a
for j in range(len(res)):
if res[j]:
pass
else:
res[j] = {'img_list':[],'class_list':{},'url':''}
pic_list += [str(element['img_list']) for element in res]
time = str(datetime.datetime.now())
print(f"第{i}批次处理完成,起始行:{start_line},终止行:{all_count},结束时间:{time}")
df['SOURCE_P_PIC'] = pic_list
'''
##图片上传服务器
if
not
os
.
path
.
exists
(
f
"/mythjob/pic/{batch}"
):
os
.
mkdir
(
f
"/mythjob/pic/{batch}"
)
upload_local_path
=
f
"/mythjob/pic/{batch}/"
'''
uploda_target_path = f"G:
\\
pic
\\
n
\\
{batch}
\\
"
transport = paramiko.Transport(('123.56.115.207',22))
transport.connect(username='root',password='123456')
sftp = paramiko.SFTPClient.from_transport(transport)
'''
#oss = OSS('LTAI5tHR33mAN4vqoUPsW9H4','iBoR3O23lFAtwwq5kxdlE6QGKLCrAS','https://oss.zgcindex.com/','zgcpic')
auth
=
oss2
.
Auth
(
'LTAI5tHR33mAN4vqoUPsW9H4'
,
'iBoR3O23lFAtwwq5kxdlE6QGKLCrAS'
)
bucket
=
oss2
.
Bucket
(
auth
,
'http://oss-cn-beijing.aliyuncs.com'
,
'zgcpic'
)
#bucket.put_object_from_file(f"pic/n/{batch}/test.jpg", '/Users/rico/Library/Mobile Documents/com~apple~CloudDocs/Watch Pic/381609324125_.pic.jpg')
saved_path_list
=
[]
for
index
,
row
in
df
.
iterrows
():
sku
=
str
(
row
[
'FINISH_P_SKU'
])
pic_list
=
eval
(
row
[
'SOURCE_P_PIC'
])
count
=
int
(
row
[
'THIRD_P_COUNT'
])
for
i
in
range
(
count
):
i
=
i
+
1
exec
(
"pic_list + eval(row['THIRD_P_URL_PIC_
%
s'])"
%
i
)
sort_num
=
0
for
pic_url
in
pic_list
:
sort_num
+=
1
r
=
requests
.
get
(
pic_url
)
with
open
(
f
"/mythjob/pic/{batch}/{sku}-{sort_num}.jpg"
,
'wb'
)
as
f
:
f
.
write
(
r
.
content
)
f
.
close
()
img_path_list
=
os
.
listdir
(
upload_local_path
)
for
img_path
in
img_path_list
:
bucket
.
put_object_from_file
(
f
"pic/n/{batch}/{img_path}"
,
"/mythjob/pic/{batch}/{img_path}"
)
saved_path_list
.
append
(
f
"oss.zgcindex.com/pic/n/{batch}/{img_path}"
)
print
(
"图片数据上传至服务器"
)
#记录图片地址
mssql_new
=
MSSQL
(
'123.56.115.207'
,
'ZI_NEW'
)
cursor_zi_new
=
mssql_new
.
_cur
cursor_zi_new
.
execute
(
f
"select id,sku from p_sku"
)
data
=
cursor_zi_new
.
fetchall
()
skuid_df
=
pd
.
DataFrame
(
data
,
columns
=
[
tuple
[
0
]
for
tuple
in
cursor_zi_new
.
description
])
for
path
in
saved_path_list
:
sku
=
saved_path
.
split
(
"/"
)[
-
1
]
.
split
(
"-"
)[
0
]
skuid
=
skuid_df
[
skuid_df
[
'sku'
]
==
sku
][
'id'
]
.
tolist
()[
0
]
cursor_zi_new
.
execute
(
f
"insert into p_skupic (skuid,path) values ({skuid},'{path}')"
)
mssql_new
.
Close
()
print
(
"图片数据位置信息存储至数据库"
)
\ No newline at end of file
数据治理平台线下处理/线下建库/zgcindex_sync_excel2ob.py
View file @
c03a06b0
...
...
@@ -9,15 +9,17 @@ Created on Mon Nov 22 22:38:22 2021
from
db
import
MYSQL
,
MSSQL
,
OSS
import
pandas
as
pd
path
=
"/Users/rico/WorkSpace/1_Project/Company/中电中采/TEMP Workspace/ES处理相关/ES线下补参处理/(已确认)投影机结果202111241215280008.xlsx"
df
=
pd
.
read_excel
(
path
,
converters
=
{
'FINISH_P_SKU'
:
str
})
export_df
=
df
[[
'DOC_ID'
,
'DATA_BATCH'
,
'DATA_END_TIME'
,
'FLOW_NODE_STATUS'
,
'SOURCE_P_SKU'
,
'SOURCE_CHANNEL_NAME'
,
'SOURCE_CHANNEL_SNAME'
,
'SOURCE_P_NAME'
,
try
:
export_df
=
df
[[
'DOC_ID'
,
'DATA_BATCH'
,
'DATA_END_TIME'
,
'FLOW_NODE_STATUS'
,
'SOURCE_P_SKU'
,
'SOURCE_CHANNEL_NAME'
,
'SOURCE_CHANNEL_SNAME'
,
'SOURCE_P_NAME'
,
'SOURCE_P_LASTCATEGORY_NAME'
,
'SOURCE_P_BRAND_NAME'
,
'SOURCE_P_PRICE'
,
'SOURCE_P_URL'
,
'FINISH_P_REMARK'
,
'FINISH_P_BRAND_NAME'
,
'FINISH_P_BRAND_CODE'
,
'FINISH_P_LASTCATEGORY_NAME'
,
'FINISH_P_LASTCATEGORY_CODE'
,
'FINISH_P_MODEL'
,
'FINISH_P_SKU'
,
'FINISH_P_PARAMS'
,
'FINISH_P_NAME'
,
'FINISH_P_STATUS'
,
'FINISH_P_CHECK_STATUS'
]]
except
:
export_df
=
df
[[
'DOC_ID'
,
'DATA_BATCH'
,
'DATA_END_TIME'
,
'FLOW_NODE_STATUS'
,
'SOURCE_P_SKU'
,
'SOURCE_CHANNEL_NAME'
,
'SOURCE_CHANNEL_SNAME'
,
'SOURCE_P_NAME'
,
export_df
=
df
[[
'DOC_ID'
,
'DATA_BATCH'
,
'DATA_END_TIME'
,
'FLOW_NODE_STATUS'
,
'SOURCE_P_SKU'
,
'SOURCE_CHANNEL_NAME'
,
'SOURCE_CHANNEL_SNAME'
,
'SOURCE_P_NAME'
,
'SOURCE_P_LASTCATEGORY_NAME'
,
'SOURCE_P_BRAND_NAME'
,
'SOURCE_P_PRICE'
,
'SOURCE_P_URL'
,
'FINISH_P_REMARK'
,
'FINISH_P_BRAND_NAME'
,
'FINISH_P_BRAND_CODE'
,
'FINISH_P_LASTCATEGORY_NAME'
,
'FINISH_P_LASTCATEGORY_CODE'
,
'FINISH_P_PARAMS'
,
'FINISH_P_NAME'
,
'FINISH_P_CHECK_STATUS'
]]
...
...
@@ -39,16 +41,3 @@ try:
print
(
'>>> 插入数据成功,表 {} 共插入 {} 行数据'
.
format
(
tablename
,
len
(
export_df
)))
except
Exception
as
e
:
print
(
'>>> 插入数据失败'
,
e
)
df
=
pd
.
read_excel
(
"/Users/rico/WorkSpace/1_Project/Company/中电中采/TEMP Workspace/ES处理相关/ES线下补参处理/安徽三大类结果1119(1).xlsx"
)
for
index
,
row
in
df
.
iterrows
():
doc_id
=
row
[
'DOC_ID'
]
name
=
row
[
'SOURCE_P_LASTCATEGORY_NAME'
]
brand
=
row
[
'SOURCE_P_BRAND_NAME'
]
cursor
.
execute
(
f
"update DW_PRODUCT_ALL_RES set SOURCE_P_LASTCATEGORY_NAME = '{name}', SOURCE_P_BRAND_NAME = '{brand}' where DOC_ID = '{doc_id}'"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment