代码链接
大多视频网站使用的是m3u8,m3u8其实不是一个真正的视频文件,而是一个视频播放列表(playlist)。它是一种文本文件,里面记录了一系列的视频片段(segment)的网络地址。这些视频片段通常是ts格式的,也就是传输流(transport stream)格式。ts格式的视频片段可以很快地在网络上传输和播放,而不需要等待整个文件下载完毕。这样就可以实现流媒体(streaming media)的效果,也就是边下边播。
m3u8是苹果公司提出的一种流媒体协议,叫做HTTP Live Streaming(HLS)。HLS的目的是为了解决在不同网络环境下,如何提供更好的视频观看体验的问题。
HLS的原理是把一个完整的视频切分成很多小的视频片段,并且为每个片段提供不同的码率(bitrate)和分辨率(resolution)的选项。这样,当用户观看视频时,可以根据自己的网络状况和设备性能,自动或者手动地选择合适的视频片段进行播放。这样就可以避免卡顿、缓冲、画质模糊等问题,提高用户满意度。
所以对于爬虫来说,需要获取m3u8文件,再根据m3u8里面记录的ts链接列表去下载ts文件(视频片段)。将这些ts保存到本地磁盘中。
public interface VideoResolver {
/**
* 是否支持解析
*
* @param url 地址
* @return boolean
*/
boolean support(String url);
/**
* 获取m3u8文件
*
* @param url 地址
* @return Result
*/
M3U8BO getM3U8(String url);
/**
* 获取ts地址列表
*
* @param m3u8BO m3u8内容
* @return Result
*/
TsListBO getTsList(M3U8BO m3u8BO);
/**
* 获取解密ts的方法
*
* @param head m3u8文件头
* @param encKey 加密的key
* @return UnaryOperator
*/
default UnaryOperator getDecodeTsFunction(String head, byte[] encKey) {
return null;
}
}
一般来说m3u8的链接保存在播放页面里面。可以通过查看源代码找到,只要写一个正则表达式就能取到链接,然后通过该链接获取m3u8内容和ts下载链接等内容,所以将这些公共的代码封装到一个一个抽象类中,并新增两个抽象方法,获取解析m3u8的正则模式和获取解析视频名的正则模式。
@Slf4j
public abstract class CommonVideoResolver implements VideoResolver {
private static final Pattern tsContentPat = Pattern.compile("(#EXTINF.*?)\n(.*?\\.ts)");
public static final Pattern encKeyPat = Pattern.compile("#EXT-X-KEY.*?URI=\"(.*?)\"[^\n]*");
private static final Pattern m3u8ContentPat = Pattern.compile("([\\s\\S]*?)(#EXTINF[\\s\\S]*\\.ts)([\\s\\S]*)");
/**
* 获取解析m3u8的正则模式
*
* @return Pattern
*/
protected abstract Pattern getM3U8UrlPat();
/**
* 获取解析视频名的正则模式
*
* @return Pattern
*/
protected abstract Pattern getM3U8NamePat();
@Override
public M3U8BO getM3U8(String url) {
HttpRespBO respBO = HttpUtil.httpGet10(url);
Assert.isTrue(respBO, "获取M3U8文件失败", () ->
log.error("xcVideoService getM3U8 fail url:{}", url));
String res = respBO.getUTF8Body();
Matcher matcher = getM3U8UrlPat().matcher(res);
Assert.isTrue(matcher.find(), "未获取到M3U8地址");
String m3u8Url = matcher.group(1);
HttpRespBO m3u8Resp = HttpUtil.httpGet10(m3u8Url);
Assert.isTrue(m3u8Resp, "获取M3U8列表文件失败", () ->
log.error("xcVideoService getM3U8 list fail m3u8Url:{},m3u8Resp:{}", m3u8Url, m3u8Resp));
String m3u8Content = m3u8Resp.getUTF8Body();
m3u8Content = HLSUtil.getMaxBandwidthM3U8(m3u8Url, m3u8Content);
Matcher m3u8NameMatcher = getM3U8NamePat().matcher(res);
String title = Optional.of(m3u8NameMatcher).filter(Matcher::find).map(m -> m.group(1)).orElse(null);
String id = UUID.randomUUID().toString().replace("-", "");
return new M3U8BO(id, title, m3u8Content, url, m3u8Url);
}
@Override
public TsListBO getTsList(M3U8BO m3u8BO) {
String videoId = m3u8BO.getId();
String m3u8Url = m3u8BO.getM3u8Url();
String m3u8Content = m3u8BO.getContent();
Matcher m3u8Matcher = m3u8ContentPat.matcher(m3u8Content);
Assert.isTrue(m3u8Matcher.find(), "m3u8内容解析失败");
String head = m3u8Matcher.group(1);
StringBuilder newHead = new StringBuilder(head);
TsEncBO tsEncBO = buildTsEnc(videoId, newHead, m3u8Url);
List tsList = new ArrayList<>();
String domain = NetUtil.resolveRootUrl(m3u8Url);
Matcher tsMatcher = tsContentPat.matcher(m3u8Matcher.group(2));
while (tsMatcher.find()) {
String url = tsMatcher.group(2);
tsList.add(new TsBO(composeUrl(domain, url), tsMatcher.group(1)));
}
TsListBO tsListBO = new TsListBO();
tsListBO.setTsList(tsList);
tsListBO.setTsEncBO(tsEncBO);
tsListBO.setHead(newHead.toString());
tsListBO.setEnd(m3u8Matcher.group(3));
return tsListBO;
}
private String composeUrl(String domain, String url) {
if (url.startsWith("http")) {
return url;
}
return url.startsWith("/") ? domain + url : domain + "/" + url;
}
private TsEncBO buildTsEnc(String videoId, StringBuilder newHead, String m3u8Url) {
TsEncBO tsEncBO = null;
String head = newHead.toString();
Matcher encKeyMatcher = encKeyPat.matcher(head);
if (encKeyMatcher.find()) {
String originEncKeyUrl = encKeyMatcher.group(1);
String encKeyUrl;
if (originEncKeyUrl.startsWith("http")) {
encKeyUrl = originEncKeyUrl;
} else if (originEncKeyUrl.startsWith("/")) {
encKeyUrl = NetUtil.resolveRootUrl(m3u8Url) + originEncKeyUrl;
} else {
encKeyUrl = m3u8Url.substring(0, m3u8Url.lastIndexOf("/") + 1) + originEncKeyUrl;
}
HttpRespBO encKeyResp = HttpUtil.httpGet10(encKeyUrl);
Assert.isTrue(encKeyResp, "获取ts文件密钥失败", () ->
log.error("getEncKey fail encKeyUrl:{},encKeyResp:{}", encKeyUrl, encKeyResp));
newHead.setLength(0);
newHead.append(head.replace(originEncKeyUrl, "/video/enc/key/" + videoId));
byte[] encKey = encKeyResp.getBody();
tsEncBO = new TsEncBO();
tsEncBO.setEncKey(encKey);
tsEncBO.setEncKeyUrl(encKeyUrl);
tsEncBO.setOriginEncKeyUrl(originEncKeyUrl);
}
return tsEncBO;
}
}
如果想新增一个网站的解析,只需要继承这个抽象类,提供两个正则表达式即可。当然对于前后端分离的网站,就不能使用这个抽象类了,需要自己根据m3u8的接口新增一个公用的抽象类(等遇到了再实现),获取m3u8和ts的内容了(实现VideoResolver接口)
@Slf4j
@Service("ccVideoResolver")
public class CCVideoResolver extends CommonVideoResolver {
private static final Pattern m3u8NamePat = Pattern.compile("(.*?) ");
private static final Pattern m3u8UrlPat = Pattern.compile("player_data=\\{.*?\"url\":\"(.*?)\"");
@Override
public boolean support(String url) {
return url != null && url.contains("www.nxyjjt.com");
}
@Override
protected Pattern getM3U8UrlPat() {
return m3u8UrlPat;
}
@Override
protected Pattern getM3U8NamePat() {
return m3u8NamePat;
}
}
有的ts文件是加密的,播放的时候需要根据m3u8上的加密方式和加密的key去解密ts文件,如m3u8文件中有一行为 #EXT-X-KEY:METHOD=AES-128,URI="key.key",IV=0x864267cc19f34ec1066e016e0da856ee。对于这种情况我们有两种处理方案
第一种方式下载相对慢一些但是播放很快,因为下载需要解密,播放无需解密。第二种方式则相反。注意这个慢是相对的,因为在局域网内投屏,最耗时的步骤已经解决,无论哪一种方式都不会卡
可以看到VideoResolver接口中提供了一个getDecodeTsFunction方法,返回一个解密方法UnaryOperator,默认是返回null即不解密,如果想解密的话,可以根据m3u8里的解密方式重写getDecodeTsFunction方法
@Slf4j
@Service("xcVideoResolver")
public class XCVideoResolver extends CommonVideoResolver {
private static final Pattern m3u8UrlPat = Pattern.compile("\"url\":\"(.*?)\"");
private static final Pattern m3u8NamePat = Pattern.compile("(.*?) ");
private static final Pattern encKeyPat = Pattern.compile("#EXT-X-KEY:METHOD=(.*?),.*?IV=(.*?)\n");
@Override
public boolean support(String url) {
return url != null && (url.contains("www.huidongxie.com") || url.contains("www.wszwz.net"));
}
@Override
protected Pattern getM3U8UrlPat() {
return m3u8UrlPat;
}
@Override
protected Pattern getM3U8NamePat() {
return m3u8NamePat;
}
@Override
public UnaryOperator getDecodeTsFunction(String head, byte[] encKey) {
Matcher encKeyMatcher = encKeyPat.matcher(head);
if (encKeyMatcher.find()) {
String method = encKeyMatcher.group(1);
return method.contains("aes") || method.contains("AES") ?
encByte -> AESUtil.decode(encByte, encKey, "0000000000000000") : null;
}
return null;
}
}
当获取完ts的下载列表后,就需要将ts下载下来并且保存到本地。整个过程如下
@Slf4j
@Service("commonVideoActuator")
public class CommonVideoActuator implements VideoActuator {
@Value("${video.base.path}")
private String videoBasePath;
@Resource(name = "downloadTSPool")
private ExecutorService downloadTSPool;
public static final Pattern encKeyPat = Pattern.compile("#EXT-X-KEY.*?URI=\"(.*?)\"[^\n]*");
@Override
public Result downloadAndSaveTS(String url, VideoResolver videoResolver) {
try {
M3U8BO m3u8BO = videoResolver.getM3U8(url);
TsListBO tsListBO = videoResolver.getTsList(m3u8BO);
List tsList = tsListBO.getTsList();
Assert.isNotEmpty(tsList, "ts地址列表为空");
String fileId = m3u8BO.getId();
String basePath = videoBasePath + "/" + fileId;
Assert.isTrue(FileUtil.deleteFolder(basePath), basePath + "删除失败");
Files.createDirectories(Paths.get(basePath + "/ts"));
String m3u8Content = m3u8BO.getContent();
Path originM3U8Path = Paths.get(basePath + "/origin.m3u8");
Files.write(originM3U8Path, m3u8Content.getBytes(), StandardOpenOption.CREATE_NEW);
UnaryOperator decodeTsFunction = getDecodeTsFunction(basePath, tsListBO, videoResolver);
SynchronousQueue synchronousQueue = new SynchronousQueue<>();
batchSubmitTsTask(basePath, tsList, decodeTsFunction, synchronousQueue);
List successTsList = getFutureAndSaveInfo(basePath, m3u8BO, tsList.size(), synchronousQueue);
Path localM3U8Path = Paths.get(basePath + "/local.m3u8");
String newM3U8Content = buildLocalM3U8Content(tsListBO, fileId, successTsList);
Files.write(localM3U8Path, newM3U8Content.getBytes(), StandardOpenOption.CREATE_NEW);
return Result.success(fileId);
} catch (ViewException e) {
throw e;
} catch (Exception e) {
log.error("downloadAndSaveTS fail url:{}", url, e);
return Result.fail("下载保存视频发生错误");
}
}
private UnaryOperator getDecodeTsFunction(String basePath, TsListBO tsListBO, VideoResolver videoResolver) throws IOException {
TsEncBO tsEncBO = tsListBO.getTsEncBO();
if (tsEncBO != null) {
String head = tsListBO.getHead();
byte[] encKey = tsEncBO.getEncKey();
UnaryOperator decodeTsFunction = videoResolver.getDecodeTsFunction(head, encKey);
if (decodeTsFunction != null) {
tsListBO.setHead(head.replaceAll(encKeyPat.toString(), ""));
return decodeTsFunction;
} else {
Path encKeyPath = Paths.get(basePath + "/enc.key");
Files.write(encKeyPath, encKey, StandardOpenOption.CREATE_NEW);
}
}
return UnaryOperator.identity();
}
private void batchSubmitTsTask(String basePath, List tsList, UnaryOperator decodeFun, SynchronousQueue synchronousQueue) {
IntStream.range(0, tsList.size()).forEach(i -> {
TsBO tsBO = tsList.get(i);
String localTsName = i + ".ts";
LocalTsBO localTsBO = new LocalTsBO();
localTsBO.setIndex(i);
localTsBO.setTsUrl(tsBO.getUrl());
localTsBO.setExtInf(tsBO.getExtInf());
localTsBO.setLocalTsName(localTsName);
localTsBO.setLocalTsPath(basePath + "/ts/" + localTsName);
downloadTSPool.submit(() -> doDownloadAndSaveTS(localTsBO, decodeFun, synchronousQueue));
});
}
private List getFutureAndSaveInfo(String basePath, M3U8BO m3u8BO, int allSize, SynchronousQueue synchronousQueue) throws IOException {
try (RandomAccessFile rf = new RandomAccessFile(basePath + "/info.txt", "rw")) {
String head = "文件名:" + m3u8BO.getName() + "\n";
head += "来源:" + m3u8BO.getSourceUrl() + "\n";
head += "进度:";
rf.write(head.getBytes(StandardCharsets.UTF_8));
int preRateByteNum = 0;
StringBuilder failTsContent = new StringBuilder();
List successTsList = new ArrayList<>(allSize);
for (int i = 0; i < allSize; i++) {
LocalTsBO localTsBO = synchronousQueue.take();
rf.seek(rf.getFilePointer() - preRateByteNum);
String rate = String.format("%.2f", (i + 1) * 100.0 / allSize) + "%";
byte[] rateByte = rate.getBytes(StandardCharsets.UTF_8);
rf.write(rateByte);
preRateByteNum = rateByte.length;
HandlerUtil.branchHandler(localTsBO.isTaskSuccess(), () -> successTsList.add(localTsBO), () ->
failTsContent.append(localTsBO.getLocalTsName()).append("->").append(localTsBO.getTsUrl()).append("\n"));
}
rf.write(("\n异常ts文件:\n" + failTsContent).getBytes(StandardCharsets.UTF_8));
successTsList.sort(Comparator.comparing(LocalTsBO::getIndex));
return successTsList;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.error("getFutureAndSaveInfo fail m3u8BO:{}", m3u8BO, e);
throw new IllegalStateException("线程中断,下载任务停止");
}
}
private String buildLocalM3U8Content(TsListBO tsListBO, String fileId, List successLocalTs) {
StringBuilder newM3U8Content = new StringBuilder(tsListBO.getHead());
successLocalTs.forEach(localTsBO -> {
String extInf = localTsBO.getExtInf();
String localTsName = localTsBO.getLocalTsName();
newM3U8Content.append(extInf).append("\n").append("/video/ts/").append(fileId).append("/").append(localTsName).append("\n");
});
newM3U8Content.append(tsListBO.getEnd());
return newM3U8Content.toString();
}
private void doDownloadAndSaveTS(LocalTsBO localTsBO, UnaryOperator decodeFunction, SynchronousQueue synchronousQueue) {
String tsUrl = localTsBO.getTsUrl();
String localTsPath = localTsBO.getLocalTsPath();
try {
HttpRespBO respBO = HttpUtil.httpGet10(tsUrl);
if (respBO == null) {
log.error("downloadTS fail localTsBO:{}", localTsBO);
return;
}
try (RandomAccessFile ts = new RandomAccessFile(localTsPath, "rw")) {
ts.write(decodeFunction.apply(respBO.getBody()));
localTsBO.setTaskSuccess(true);
}
} catch (IOException e) {
log.error("save ts fail,localTsBO:{}", localTsBO, e);
} finally {
putSynchronousQueue(synchronousQueue, localTsBO);
}
}
private void putSynchronousQueue(SynchronousQueue synchronousQueue, LocalTsBO localTsBO) {
try {
synchronousQueue.put(localTsBO);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.error("doDownloadAndSaveTS put synchronousQueue fail,localTsBO:{}", localTsBO, e);
}
}
@Data
private static class LocalTsBO {
/**
* ts的位置
*/
private int index;
/**
* ts的地址
*/
private String tsUrl;
/**
* ts时长
*/
private String extInf;
/**
* 本地ts名称
*/
private String localTsName;
/**
* 本地ts路径
*/
private String localTsPath;
/**
* 任务是否成功
*/
private boolean taskSuccess;
}
}
最后提供本地视频的播放能力,也就是提供三个http接口
至此随便找一个能播放m3u8的软件就可以播放我们本地的视频了,如Safari浏览器、QuickTime Player等,直接在Safari浏览器或者播放器中输入m3u8文件的网址,就可以开始观看视频了
@Slf4j
@RestController
@RequestMapping("/video")
public class VideoController {
@Autowired
private VideoService videoService;
@GetMapping(value = "/m3u8/{videoId}", produces = "application/vnd.apple.mpegurl")
public byte[] getM3U8(@PathVariable String videoId) {
return returnFileTemplate(videoId + "/local.m3u8");
}
@GetMapping(value = "/ts/{videoId}/{tsName}", produces = "video/mp2t")
public byte[] getTs(@PathVariable String videoId, @PathVariable String tsName) {
return returnFileTemplate(videoId + "/ts/" + tsName);
}
@GetMapping(value = "/enc/key/{videoId}", produces = "application/octet-stream")
public byte[] getEncKey(@PathVariable String videoId) {
return returnFileTemplate(videoId + "/enc.key");
}
}
private byte[] returnFileTemplate(String relativePath) {
Result result = videoService.getFileByte(relativePath);
return Optional.of(result).filter(Result::isSuccess).map(Result::getData).orElseGet(() -> JSON.toJSONBytes(result));
}
@Override
public Result getFileByte(String relativePath) {
String filePath = videoBasePath + "/" + relativePath;
try (RandomAccessFile randomAccessFile = new RandomAccessFile(filePath, "r")) {
byte[] buffer = new byte[(int) randomAccessFile.length()];
randomAccessFile.read(buffer);
return Result.success(buffer);
} catch (Exception e) {
log.error("getTs fail,filePath:{}", filePath, e);
return Result.fail("获取文件失败");
}
}
本质上就是先从本地服务里获取m3u8文件,根据里面定义的ts链接去本地服务获取ts数据,如果加密了,再获取密钥进行解密