Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
京
京东搜索采集自动验证
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
黄梓誉
京东搜索采集自动验证
Commits
4a77589c
Commit
4a77589c
authored
Mar 21, 2024
by
huangiyu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1
parent
ca02d78d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
85 additions
and
0 deletions
+85
-0
京东搜索_请求接口版.py
京东搜索_请求接口版.py
+85
-0
No files found.
京东搜索_请求接口版.py
0 → 100644
View file @
4a77589c
import
requests
import
json
import
pymysql
import
re
mysql_zdzs_proxy_host
=
'59.110.219.171'
mysql_zdzs_proxy_user
=
'zgcindex'
mysql_zdzs_proxy_password
=
'zgcprice2019'
mysql_zdzs_proxy_database
=
'zdzs_proxy'
conn
=
pymysql
.
connect
(
host
=
mysql_zdzs_proxy_host
,
user
=
mysql_zdzs_proxy_user
,
password
=
mysql_zdzs_proxy_password
,
database
=
mysql_zdzs_proxy_database
,
charset
=
"utf8mb4"
,
cursorclass
=
pymysql
.
cursors
.
DictCursor
,
autocommit
=
True
)
cur
=
conn
.
cursor
()
def
mysql_zdzs_proxy
(
sql
=
None
):
print
(
sql
)
try
:
cur
.
execute
(
sql
)
result
=
cur
.
fetchall
()
return
result
except
pymysql
.
err
.
IntegrityError
as
e
:
# cur.close()
return
[]
except
Exception
as
e
:
return
[]
from
urllib.parse
import
quote
def
remove_html_tags
(
text
):
# 使用正则表达式删除 HTML 标签
clean_text
=
re
.
sub
(
r'<[^>]+>'
,
''
,
text
)
return
clean_text
url_data
=
mysql_zdzs_proxy
(
"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' and (id like '
%0
' or id like '
%1
' or id like '
%2
' or id like '
%3
' ) limit 10000"
))
for
row
in
url_data
:
url
=
row
[
'url'
]
keyword
=
url
.
replace
(
'https://search.jd.com/search?keyword='
,
""
)
.
replace
(
'&enc=utf-8'
,
""
)
print
(
keyword
)
keyword
=
quote
(
keyword
)
url
=
f
"https://api.m.jd.com/api?functionId=pc_search_adv_Search&appid=search-pc-java&client=pc&clientVersion=1.0.0&uuid=143920055.1664177649338438720918.1664177649.1710402971.1710474746.407&loginType=3&t=1710474777240&body=
%7
B
%22
area
%22
:
%221%22
,
%22
enc
%22
:
%22
utf-8
%22
,
%22
keyword
%22
:
%22
{keyword}
%22
,
%22
adType
%22
:7,
%22
page
%22
:
%221%22
,
%22
ad_ids
%22
:
%22291
:19
%22
,
%22
xtest
%22
:
%22
new_search
%22%7
D&x-api-eid-token=jdd036KK7GVN2A5VXFOZVHAPY25WX7HXJAN62PDVL6JU6EOWVUQJMGEAWKRGEDHIE367RY5TVUPVMRDVFP3CN276H7UOWKYAAAAMOIA6VIPAAAAAACT7MJYE5DSZYVQX"
payload
=
{}
headers
=
{
'origin'
:
'https://search.jd.com'
,
'Cookie'
:
'shshshfpa=302d4458-a337-1470-3d87-19967a3d383c-1664177595; __jdu=1664177649338438720918; pinId=m_q_aGs3ba1NmxbgWiZkb7V9-x-f3wj7; jcap_dvzw_fp=6hXnN9RzY6q93f9CIqpUUYVuWPqw8tbJ848WPVvfBOjJmE9TIGJb1Yf5gt4E-YG79w4dtC-1GyVyjEGpw5-w2w; shshshfpx=302d4458-a337-1470-3d87-19967a3d383c-1664177595; shshshfp=1f6ff101d0ef7e3e106d3b6137763832; pin=jd_54fd27b0d2b22; unick=huangziyu0726; _tp=j3ClGrQ8eyicnottNC9z45B9wgXg9sbTugOWYIqmeB0
%3
D; _pst=jd_54fd27b0d2b22; autoOpenApp_downCloseDate_auto=1705733792609_1800000; autoOpenApp_downCloseDate_autoOpenApp_autoPromptly=1706681658897_1; _gcl_au=1.1.1699672507.1709478014; mba_muid=1664177649338438720918; user-key=88908ac9-9cfd-4652-9b2f-f1b9ecad98ea; unpl=JF8EALJnNSttDBhXBB0KSBRDHFsDWw8MQx8Dbm8EA15dHlYDHlAdFRJ7XlVdWBRKFR9uYxRXX1NOVw4aASsSEXteU11bD00VB2xXVgQFDQ8WUUtBSUt-S1tUWFkNTRYAbGUEZG1bS2QFGjIbFRdOXVJYWg1DFQFmYgddW1tKXAUSMhoiF3ttZFpVAU0QAl9mNVVtGh8IABgEHxMXBl1TWVgITREEam8HVlRdSV0DGAMTEhl7XGRd; __jdv=229668127|baidu-search|t_262767352_baidusearch|cpc|171754702230_0_eb2079b7bf7f4c5991090f24d374c763|1709653324334; mt_xid=V2_52007VwMVVVhYVFwYTBFeB24GEFtbW1NSH0ApWFA1VBZSCV1OCUtBHUAAMAFCTlQLU1wDThBcVWYGQQBbCwVbL0oYXwB7AhBOXF9DWx5CGlkOZwMiUG1bYlkeTxFZAFcAFVJb; areaId=1; ipLoc-djd=1-2800-55811-0; RT="z; token=0331448fb40d80da734aceff7d48bafa,3,950263; __tk=uzXDYz2FXUJ0uzu0XsqFZsvTvsk0XpbTvSnoXzJ5uzX,3,950263; 3AB9D23F7A4B3CSS=jdd036KK7GVN2A5VXFOZVHAPY25WX7HXJAN62PDVL6JU6EOWVUQJMGEAWKRGEDHIE367RY5TVUPVMRDVFP3CN276H7UOWKYAAAAMOIA6VIPAAAAAACT7MJYE5DSZYVQX; _gia_d=1; jsavif=1; wlfstk_smdl=rtj4txsjw59ztpv9fsn6yxbvt6ty3wiu; TrackID=1C2yN2jHh6Wg-NhNJLzHHqcnZDWxML5PG3tsIgqmxor1OVm1c_lEWSZjqo9H4QEOUaP-l0Y7EKLOnfjHC2RU7Zdj3DG6mRzJkXqQdNCcnpYM; thor=237C3D3A4A70C62901A697588128F575FE0CEF91A4213E671F253B27AF7FB601536685D63E703D077E709159409EBEC0A3B565591643E47E6D1B0A57C5DFFA4F2A9ECC9ADFD979740CDD4D1842A5CCF28165BD1E1640E4BEA87287FF3023299B2A43DCC6FB3EAA976E79787E1A14A8622CA12CA512495BF85C7F69E30C2E85066B6369585D02B7A58F37806B5C4A07D5DD8E07D1536CF0A7BC676D403012149B; flash=2_qECj__VcDB1S4gEaqS6lrKKvZR4TCQeCSlLdk3QgDH49Q00GmtDvwTO4fhRVPGj_EQMRN_rL8wOk4cX0er-Ohvzpua_6-Cmq4DDcx0Kw5ek*; ceshi3.com=000; mba_sid=17104747515401167401247798463.1; __jd_ref_cls=LoginDisposition_Go; x-rp-evtoken=N-nAb5Oj6OS1u8hkvixIgNLm8epG1xAIhLtiT01VwscTFkwuzJmUObwZwoww3HGIwRv7KyWgxiTC2ghErjsmtxK1DyAkyLbcd5ne9cxLAiPxp1q6nvjZScJDfpNzXE1k5ihL2oqXaUM_PwNW2jZaINB6AUpZgSu1KOrAs0nlGROPmgZMyeVGQG3w1hLHt5rvi3Ylmy-qdixDl4Ds4t92oegs-EbHvqtsTNHNnJcMjG0
%3
D; __jda=143920055.1664177649338438720918.1664177649.1710402971.1710474746.407; __jdc=143920055; shshshfpb=BApXeUi81Q-tA2-pIUXD6xI9OcOHB-4nmByZWcrpX9xJ1MsQPq4O2; 3AB9D23F7A4B3C9B=6KK7GVN2A5VXFOZVHAPY25WX7HXJAN62PDVL6JU6EOWVUQJMGEAWKRGEDHIE367RY5TVUPVMRDVFP3CN276H7UOWKY; __jdb=143920055.6.1664177649338438720918|407.1710474746'
,
'User-Agent'
:
'Apifox/1.0.0 (https://apifox.com)'
,
'Accept'
:
'*/*'
,
'Host'
:
'api.m.jd.com'
,
'Connection'
:
'keep-alive'
}
response
=
requests
.
request
(
"GET"
,
url
,
headers
=
headers
,
data
=
payload
)
response_json
=
json
.
loads
(
response
.
text
)
print
(
response_json
)
dataList
=
[]
if
response_json
!=
{}:
for
sku_info
in
response_json
[
'291'
]:
dataList
.
append
(
{
'is_self_operate_div'
:
''
,
'item_url'
:
'https://item.jd.com/'
+
str
(
sku_info
[
'sku_id'
])
+
'.html'
,
'sku_id'
:
str
(
sku_info
[
'sku_id'
]),
'shop_name'
:
sku_info
[
'shop_link'
][
'shop_name'
],
'sku_name'
:
remove_html_tags
(
sku_info
[
'ad_title'
]),
'sku_price'
:
sku_info
[
'sku_price'
],
}
)
insert_sql
=
f
"""
INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
if
response_json
!=
{}:
mysql_zdzs_proxy
(
insert_sql
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment