京东商品评论接口技术实现：从接口分析到数据挖掘全方案

京东商品评论数据包含丰富的用户反馈信息，对市场分析、产品改进和用户需求挖掘具有重要价值。本文将系统讲解京东商品评论接口的技术实现，重点解决接口参数构造、反爬机制应对、数据解析与分析等核心问题，提供一套套合规高效的技术方案，同时严格严格平台规则与数据采集规范。
一、京东评论接口评论接口原理与合规要点

京东商品评论评论数据通过 API 接口动态加载，采用 JSON 格式返回，包含评论内容、评分、用户信息等关键数据。实现现接口需遵循以下合规要点：

数据用途限制：仅用于个人学习研究、市场调研，不得得用于商业竞争或恶意分析
请求频率控制：单 IP 单小时请求不超过 60 次，单商品评论采集间隔不低于 15 秒
用户协议尊重：不绕过京东正常访问限制，不使用破解手段术技术获取数据
隐私保护：自动过滤评论中包含的手机号、地址等个人隐私信息

京东评论接口的核心技术流程如下：

plaintext

商品ID解析 → 评论参数生成 → 评论请求发送 → 数据解析与清洗 → 结构化存储

点击获取key和secret
二、核心技术实现：从接口分析到数据提取
1. 京东商品 ID 解析工具

京东商品 ID（skuId）是获取评论的基础，可从商品 URL 或页面元数据中提取：

python

运行

import re
import requests
from lxml import etree

class JdSkuIdParser:
"""京东商品ID解析器，提取skuId"""

def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Referer": "https://www.jd.com/"
}

def parse_from_url(self, product_url):
"""从商品URL中提取skuId"""
patterns = [
r"item\.jd\.com/(\d+)\.html", # 标准商品页URL
r"sku=(\d+)", # 包含sku参数的URL
r"product\.jd\.com/(\d+)\.html" # 产品页URL
]

for pattern in patterns:
match = re.search(pattern, product_url)
if match:
return match.group(1)
return None

def parse_from_page(self, product_url):
"""从商品页面中提取skuId"""
try:
response = requests.get(
product_url,
headers=self.headers,
timeout=10,
allow_redirects=True
)
response.encoding = "utf-8"

# 尝试从meta标签提取
tree = etree.HTML(response.text)
meta_tag = tree.xpath('//meta[@name="skuId"]/@content')
if meta_tag:
return meta_tag[0]

# 尝试从脚本标签提取
script_tags = tree.xpath('//script/text()')
for script in script_tags:
match = re.search(r'skuId\s*=\s*"(\d+)"', script)
if match:
return match.group(1)
match = re.search(r'skuId\s*:\s*(\d+)', script)
if match:
return match.group(1)

return None
except Exception as e:
print(f"页面提取skuId失败: {str(e)}")
return None

def get_sku_id(self, product_url):
"""获取商品skuId，先从URL提取，失败则从页面提取"""
sku_id = self.parse_from_url(product_url)
if sku_id:
return sku_id
return self.parse_from_page(product_url)

2. 评论接口参数生成器

京东评论接口需要特定参数组合，包括商品 ID、页码、评分筛选等，部分参数需要动态生成：

python

运行

import time
import random
import hashlib

class JdCommentParamsGenerator:
"""京东评论接口参数生成器"""

def __init__(self):
# 评论类型映射
self.comment_types = {
"all": 0, # 全部评论
"good": 1, # 好评
"medium": 2, # 中评
"poor": 3, # 差评
"image": 5 # 有图评论
}

# 排序方式映射
self.sort_types = {
"default": 5, # 默认排序
"latest": 6 # 最新排序
}

def generate_params(self, sku_id, page=1, comment_type="all", sort="default", page_size=10):
"""
生成评论请求参数

:param sku_id: 商品skuId
:param page: 页码
:param comment_type: 评论类型
:param sort: 排序方式
:param page_size: 每页评论数
:return: 评论请求参数字典
"""
# 基础参数
params = {
"productId": sku_id,
"score": self.comment_types.get(comment_type, 0),
"sortType": self.sort_types.get(sort, 5),
"page": page,
"pageSize": page_size,
"isShadowSku": 0,
"fold": 1,
"busiType": "pms",
"isProto": 0
}

# 生成动态参数
params["t"] = str(int(time.time() * 1000))
params["_"] = str(int(time.time() * 1000) + random.randint(100, 999))
params["callback"] = f"fetchJSON_comment98{random.randint(100000, 999999)}"

return params

3. 评论请求发送器

处理评论请求发送与反爬机制应对，确保请求稳定性：

python

运行

import time
import random
import requests
from fake_useragent import UserAgent

class JdCommentRequester:
"""京东评论请求发送器"""

def __init__(self, proxy_pool=None):
self.comment_api = "https://club.jd.com/comment/productPageComments.action"
self.proxy_pool = proxy_pool or []
self.ua = UserAgent()
self.session = requests.Session()
self.last_request_time = 0
self.min_interval = 15 # 评论请求最小间隔(秒)

def _get_headers(self):
"""生成请求头"""
return {
"User-Agent": self.ua.random,
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Referer": "https://item.jd.com/",
"X-Requested-With": "XMLHttpRequest",
"Connection": "keep-alive",
"Host": "club.jd.com"
}

def _get_proxy(self):
"""获取随机代理"""
if not self.proxy_pool:
return None
return random.choice(self.proxy_pool)

def _check_request_interval(self):
"""控制请求间隔，避免触发反爬"""
current_time = time.time()
elapsed = current_time - self.last_request_time
if elapsed < self.min_interval:
sleep_time = self.min_interval - elapsed + random.uniform(1, 3)
print(f"请求间隔不足，休眠 {sleep_time:.1f} 秒")
time.sleep(sleep_time)
self.last_request_time = time.time()

def fetch_comments(self, params):
"""
发送评论请求

:param params: 评论请求参数
:return: 响应内容或None
"""
self._check_request_interval()

headers = self._get_headers()
proxy = self._get_proxy()
proxies = {"http": proxy, "https": proxy} if proxy else None

try:
response = self.session.get(
self.comment_api,
params=params,
headers=headers,
proxies=proxies,
timeout=15
)

if response.status_code != 200:
print(f"评论请求失败，状态码: {response.status_code}")
return None

# 检查是否被反爬拦截
if self._is_blocked(response.text):
print("评论请求被拦截，可能需要验证")
if proxy and proxy in self.proxy_pool:
self.proxy_pool.remove(proxy)
return None

return response.text

except Exception as e:
print(f"评论请求异常: {str(e)}")
return None

def _is_blocked(self, response_text):
"""判断是否被反爬拦截"""
block_keywords = [
"验证码",
"访问过于频繁",
"请稍后再试",
"系统繁忙"
]
for keyword in block_keywords:
if keyword in response_text:
return True
return False

4. 评论数据解析器

解析京东评论接口返回的 JSONP 数据，提取结构化评论信息：

python

运行

import re
import json
from datetime import datetime

class JdCommentParser:
"""京东评论数据解析器"""

def __init__(self):
# JSONP格式解析正则
self.jsonp_pattern = re.compile(r'fetchJSON_comment98\d+\((.*?)\);')
# 隐私信息过滤正则
self.privacy_pattern = re.compile(r'1\d{10}|\d{6,20}') # 手机号和地址相关数字

def parse_jsonp(self, jsonp_text):
"""解析JSONP格式为JSON数据"""
match = self.jsonp_pattern.search(jsonp_text)
if not match:
return None
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
print("JSON解析失败")
return None

def clean_comment_text(self, text):
"""清理评论文本，过滤隐私信息"""
if not text:
return ""
# 过滤手机号和地址相关数字
text = self.privacy_pattern.sub('***', text)
# 去除多余空格和换行
text = re.sub(r'\s+', ' ', text).strip()
return text

def parse_comment_item(self, comment_item):
"""解析单个评论项"""
try:
# 解析评论时间
comment_time = comment_item.get("creationTime", "")
if comment_time:
try:
comment_time = datetime.strptime(comment_time, "%Y-%m-%d %H:%M:%S")
except ValueError:
comment_time = None

# 提取商品属性
product_attr = comment_item.get("productColor", "")
if comment_item.get("productSize", ""):
product_attr += f" {comment_item.get('productSize')}"

# 解析图片信息
images = comment_item.get("images", [])
image_urls = [img.get("imgUrl") for img in images if img.get("imgUrl")]

return {
"comment_id": comment_item.get("id", ""),
"user_nick": comment_item.get("nickname", ""),
"user_level": comment_item.get("userLevelName", ""),
"comment_text": self.clean_comment_text(comment_item.get("content", "")),
"comment_time": comment_time,
"score": comment_item.get("score", 0), # 评分(1-5)
"product_attr": product_attr.strip(), # 商品属性
"useful_vote": comment_item.get("usefulVoteCount", 0), # 有用数
"image_count": len(images), # 图片数量
"image_urls": image_urls, # 图片URL列表
"is_vip": comment_item.get("isVip", False) # 是否VIP用户
}
except Exception as e:
print(f"解析评论失败: {str(e)}")
return None

def parse_comments(self, jsonp_text):
"""
解析评论列表

:param jsonp_text: JSONP格式的评论响应
:return: 包含评论和分页信息的字典
"""
json_data = self.parse_jsonp(jsonp_text)
if not json_data:
return None

result = {
"total_comments": json_data.get("productCommentSummary", {}).get("commentCount", 0),
"good_rate": json_data.get("productCommentSummary", {}).get("goodRate", 0), # 好评率
"current_page": json_data.get("page", 1),
"page_size": json_data.get("pageSize", 10),
"comments": []
}

# 计算总页数
result["total_pages"] = (result["total_comments"] + result["page_size"] - 1) // result["page_size"]

# 解析评论列表
comment_items = json_data.get("comments", [])
for item in comment_items:
comment = self.parse_comment_item(item)
if comment:
result["comments"].append(comment)

return result

5. 评论数据分析工具

对采集的评论数据进行多维度分析，提取有价值信息：

python

运行

import jieba
import jieba.analyse
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

# 设置中文显示
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]

class JdCommentAnalyzer:
"""京东评论数据分析工具"""

def __init__(self):
jieba.initialize()

def get_score_distribution(self, comments):
"""获取评分分布"""
if not comments:
return {}
scores = [comment["score"] for comment in comments]
return dict(Counter(scores))

def extract_keywords(self, comments, top_k=20):
"""提取评论关键词"""
if not comments:
return []
# 合并所有评论文本
text = " ".join([comment["comment_text"] for comment in comments if comment["comment_text"]])
# 提取关键词
return jieba.analyse.extract_tags(text, topK=top_k, withWeight=True)

def get_product_attr_analysis(self, comments):
"""分析不同商品属性的评价情况"""
if not comments:
return {}
attr_scores = {}
for comment in comments:
attr = comment["product_attr"]
if not attr:
continue
if attr not in attr_scores:
attr_scores[attr] = {"count": 0, "total_score": 0}
attr_scores[attr]["count"] += 1
attr_scores[attr]["total_score"] += comment["score"]

# 计算平均分
for attr in attr_scores:
attr_scores[attr]["avg_score"] = round(
attr_scores[attr]["total_score"] / attr_scores[attr]["count"], 1
)

# 按评价数量排序
return dict(sorted(attr_scores.items(), key=lambda x: x[1]["count"], reverse=True))

def generate_analysis_report(self, comments, output_file=None):
"""生成分析报告"""
if not comments:
return None

report = {
"total_comments": len(comments),
"score_distribution": self.get_score_distribution(comments),
"top_keywords": self.extract_keywords(comments),
"product_attr_analysis": self.get_product_attr_analysis(comments)
}

# 可视化评分分布
self._plot_score_distribution(report["score_distribution"])

# 可视化关键词
self._plot_keywords(report["top_keywords"])

# 保存报告
if output_file:
import json
with open(output_file, "w", encoding="utf-8") as f:
json.dump(report, f, ensure_ascii=False, indent=2, default=str)
print(f"分析报告已保存至: {output_file}")

return report

def _plot_score_distribution(self, score_dist):
"""绘制评分分布图表"""
if not score_dist:
return
plt.figure(figsize=(10, 6))
scores = sorted(score_dist.keys())
counts = [score_dist[s] for s in scores]
plt.bar(scores, counts, color='skyblue')
plt.title('评论评分分布')
plt.xlabel('评分')
plt.ylabel('评论数量')
plt.xticks(scores)
plt.tight_layout()
plt.show()

def _plot_keywords(self, keywords):
"""绘制关键词图表"""
if not keywords:
return
plt.figure(figsize=(12, 8))
words = [kw[0] for kw in keywords[:15]]
weights = [kw[1] for kw in keywords[:15]]
plt.barh(words, weights, color='lightgreen')
plt.title('评论关键词权重')
plt.xlabel('权重')
plt.tight_layout()
plt.show()

三、完整评论采集与分析服务

整合上述组件，实现完整的评论采集与分析流程：

python

运行

class JdCommentService:
"""京东商品评论采集与分析服务"""

def __init__(self, proxy_pool=None):
self.sku_parser = JdSkuIdParser()
self.params_generator = JdCommentParamsGenerator()
self.requester = JdCommentRequester(proxy_pool=proxy_pool)
self.parser = JdCommentParser()
self.analyzer = JdCommentAnalyzer()

def collect_comments(self, product_url, max_pages=5, comment_type="all", sort="default"):
"""
采集商品评论

:param product_url: 商品详情页URL
:param max_pages: 最大采集页数
:param comment_type: 评论类型
:param sort: 排序方式
:return: 包含评论数据的字典
"""
# 1. 获取商品skuId
print("解析商品ID...")
sku_id = self.sku_parser.get_sku_id(product_url)
if not sku_id:
print("无法获取商品ID，采集失败")
return None
print(f"商品skuId: {sku_id}")

all_comments = []
current_page = 1
total_pages = 1
good_rate = 0

# 2. 分页采集评论
while current_page <= max_pages and current_page <= total_pages:
print(f"采集第 {current_page}/{max_pages} 页评论...")

# 生成请求参数
params = self.params_generator.generate_params(
sku_id=sku_id,
page=current_page,
comment_type=comment_type,
sort=sort
)

# 发送请求
response_text = self.requester.fetch_comments(params)
if not response_text:
print(f"第 {current_page} 页评论获取失败，跳过")
current_page += 1
continue

# 解析评论
result = self.parser.parse_comments(response_text)
if not result:
print(f"第 {current_page} 页评论解析失败，跳过")
current_page += 1
continue

# 更新分页信息
total_pages = min(result["total_pages"], max_pages)
good_rate = result["good_rate"]
# 添加评论
all_comments.extend(result["comments"])

print(f"第 {current_page} 页解析完成，获取 {len(result['comments'])} 条评论")

# 检查是否已采集所有评论
if len(all_comments) >= result["total_comments"]:
print("已获取全部评论，停止采集")
break

current_page += 1

# 返回结果
return {
"sku_id": sku_id,
"product_url": product_url,
"total_collected": len(all_comments),
"good_rate": good_rate,
"pages_collected": current_page - 1,
"comments": all_comments
}

def collect_and_analyze(self, product_url, max_pages=5, comment_type="all", sort="default"):
"""采集并分析评论"""
# 采集评论
comment_data = self.collect_comments(
product_url=product_url,
max_pages=max_pages,
comment_type=comment_type,
sort=sort
)

if not comment_data or not comment_data["comments"]:
print("没有评论数据可分析")
return comment_data

# 分析评论
print("开始分析评论数据...")
analysis_report = self.analyzer.generate_analysis_report(
comment_data["comments"],
output_file=f"jd_comment_analysis_{comment_data['sku_id']}.json"
)

# 合并结果
comment_data["analysis_report"] = analysis_report
return comment_data

四、使用示例与数据存储
1. 基本使用示例

python

运行

def main():
# 代理池（实际使用时替换为有效代理）
proxy_pool = [
# "http://123.123.123.123:8080",
# "http://111.111.111.111:8888"
]

# 初始化评论服务
comment_service = JdCommentService(proxy_pool=proxy_pool)

# 京东商品URL
product_url = "https://item.jd.com/100012345678.html" # 替换为实际商品URL

# 采集并分析评论（最多3页，全部评论，最新排序）
result = comment_service.collect_and_analyze(
product_url=product_url,
max_pages=3,
comment_type="all",
sort="latest"
)

# 处理结果
if result:
print(f"\n采集完成！共获取 {result['total_collected']} 条评论，好评率: {result['good_rate']*100:.1f}%")

# 打印部分评论
if result["comments"]:
print("\n前3条评论：")
for i, comment in enumerate(result["comments"][:3], 1):
print(f"{i}. {comment['comment_text'][:100]}...")
print(f" 评分：{comment['score']}星 | 时间：{comment['comment_time']}")
print(f" 商品属性：{comment['product_attr']}\n")
else:
print("评论采集失败")

if __name__ == "__main__":
main()

2. 评论数据存储工具

将评论数据存储为多种格式，方便后续分析：

python

运行

import json
import csv
import pandas as pd
from pathlib import Path
from datetime import datetime

class JdCommentStorage:
"""京东评论数据存储工具"""

def __init__(self, storage_dir="./jd_comments"):
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(exist_ok=True, parents=True)

def save_to_json(self, comment_data):
"""保存为JSON格式"""
sku_id = comment_data["sku_id"]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jd_comments_{sku_id}_{timestamp}.json"
file_path = self.storage_dir / filename

with open(file_path, "w", encoding="utf-8") as f:
json.dump(comment_data, f, ensure_ascii=False, indent=2, default=str)

print(f"JSON文件已保存：{file_path}")
return file_path

def save_to_csv(self, comment_data):
"""保存为CSV格式"""
if not comment_data["comments"]:
print("无评论数据可保存为CSV")
return None

sku_id = comment_data["sku_id"]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jd_comments_{sku_id}_{timestamp}.csv"
file_path = self.storage_dir / filename

# 转换为DataFrame
df = pd.DataFrame(comment_data["comments"])
# 处理时间格式
if "comment_time" in df.columns:
df["comment_time"] = df["comment_time"].apply(
lambda x: x.strftime("%Y-%m-%d %H:%M:%S") if x else ""
)
# 处理列表类型
if "image_urls" in df.columns:
df["image_urls"] = df["image_urls"].apply(lambda x: ",".join(x) if x else "")

df.to_csv(file_path, index=False, encoding="utf-8-sig")
print(f"CSV文件已保存：{file_path}")
return file_path

def save_to_excel(self, comment_data):
"""保存为Excel格式"""
if not comment_data["comments"]:
print("无评论数据可保存为Excel")
return None

sku_id = comment_data["sku_id"]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jd_comments_{sku_id}_{timestamp}.xlsx"
file_path = self.storage_dir / filename

# 转换为DataFrame
df = pd.DataFrame(comment_data["comments"])
# 处理时间格式
if "comment_time" in df.columns:
df["comment_time"] = df["comment_time"].apply(
lambda x: x.strftime("%Y-%m-%d %H:%M:%S") if x else ""
)

df.to_excel(file_path, index=False)
print(f"Excel文件已保存：{file_path}")
return file_path

五、合规优化与风险提示
1. 系统优化策略

智能缓存机制：对已采集的商品评论建立缓存，设置合理过期时间

python

运行

def get_cached_comments(self, sku_id, max_age=86400):
"""从缓存获取评论数据（实际实现需结合缓存系统）"""
# 缓存逻辑实现...
return None

动态请求调整：根据响应状态动态调整请求间隔和代理使用策略

分布式任务调度：大规模采集时采用任务分片，分散请求压力

2. 合规与风险提示

商业用途必须获得京东平台书面授权，遵守《电子商务法》相关规定
不得将采集的评论数据用于生成与京东竞争的产品或服务
严格控制请求频率，避免对平台服务器造成负担
自动过滤评论中的用户隐私信息，保护用户数据安全
当检测到平台反爬机制加强时，应立即暂停采集并评估风险

通过本文提供的技术方案，可构建一套功能完善的京东商品评论接口系统。该方案遵循合规原则，实现了从评论采集、解析到分析的全流程处理，为商品研究、用户需求分析等场景提供数据支持。在实际应用中，需根据平台规则动态调整策略，确保系统的稳定性和合法性。

万邦api博客

Nice to meet you, too!

京东商品评论接口技术实现：从接口分析到数据挖掘全方案

Ace 发表于2025-08-31 15:20:55 浏览546 评论0

少长咸集

群贤毕至