#!/usr/bin/python3
# -*- coding: UTF-8 -*-
#author:Wos
#a script to remove ads that appended to m3u8 file
#一个用于移除m3u8广告的脚本
import requests
import os
import sys
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
m3u8_path = os.path.join(BASE_DIR, "removeads.m3u8")
hdr = {
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:102.0) Gecko/20100101 Goanna/5.2 Firefox/102.0 PaleMoon/31.3.0.1"
}
#get absolute m3u8 link
def get_real_m3u8(url):
premain = "/".join(str(url).split("/")[:-1])+"/"
premain_ = "/".join(str(url).split("/")[:3])
cnt = 0
while True:
try:
if cnt < 3:
res = requests.get(url,headers=hdr,timeout=(3,7))
break
else:
return
except:
cnt += 1
continue
temp = str(res.text).split("\n")
for i in temp:
if not str(i).startswith("#") and str(i).strip().endswith(".m3u8"):
if str(i).strip().startswith("http"):
return get_real_m3u8(str(i).strip())
else:
t = str(i).strip().split("/")[1]
if str(t) in str(premain):
url2 = premain_+str(i).strip()
else:
url2 = premain+str(i).strip()
return get_real_m3u8(url2)
return url
#移出广告相关的参数
def removeads(url):
url = get_real_m3u8(url)
if not str(url).strip().endswith(".m3u8"):
return False
print(url)
m3u8file = []
premain = "/".join(str(url).split("/")[:-1])+"/"
res = requests.get(url,headers=hdr)
temp = str(res.text).split("\n")
#去广告核心代码---start---
idx = 0
ads_seg_length = 1
start_str = ""
rmlist = []
#判断广告模式
cnt = 0
for i in temp[:-1]:
if not str(i).startswith("#"):
if cnt == 5:
break
if str(i).endswith(str(cnt)+"."+str(i).split(".")[-1]):
cnt += 1
continue
else:
break
if cnt == 5:
j = 0
pre_st = ""
for i in temp[:-1]:
if not str(i).startswith("#"):
#if pre_st == "":
# pre_st = str(i).split("/")[-1].rstrip("."+str(i).split(".")[-1])
#start_str = str(pre_st[:-len(str(idx))])+str(idx)+"."+str(i).split(".")[-1]
start_str = str(idx)+"."+str(i).split(".")[-1]
if str(i).endswith(start_str):
idx += 1
j += 1
continue
else:
rmlist.append(j)
j += 1
continue
j += 1
else:
j = 0
#广告在头部的情况
for i in temp[30:]:
if not str(i).startswith("#"):
start_str = "/".join(str(i).split("/")[:-1])
break
for i in temp:
if not str(i).startswith("#"):
if str(i).startswith(start_str):
j += 1
continue
else:
rmlist.append(j)
j += 1
continue
j += 1
#判断单个广告的分片数量
ads_seg = []
j = 1
while j < len(rmlist)+1:
if (rmlist[j-1]+2) in rmlist:
ads_seg_length += 1
j += 1
continue
else:
ads_seg.append(ads_seg_length)
ads_seg_length = 1
j += 1
continue
j = 0
while j < len(ads_seg):
if j == 0:
st = rmlist[j]-1
else:
st = rmlist[j*ads_seg[j-1]]-1-2*sum(ads_seg[0:j])
ed = st + 2*ads_seg[j]
del temp[st:ed]
j += 1
#去广告核心代码---end----
for i in temp:
if str(i).startswith("#EXT-X-DISCONTINUITY") or str(i).startswith("#EXT-X-PLAYLIST-TYPE"):
continue
elif str(i).startswith("#EXT-X-KEY:METHOD=AES-128"):
kn = str(i).split("#EXT-X-KEY:METHOD=AES-128,URI=\"")[1].strip()
premain_ = "/".join(str(premain).split("/")[:3])
if str(kn).startswith("http"):
l = "#EXT-X-KEY:METHOD=AES-128,URI=\""+str(kn)
else:
if "/" in str(kn):
t = str(kn).split("/")[1]
if str(t) in str(premain):
l = "#EXT-X-KEY:METHOD=AES-128,URI=\""+str(premain_)+str(kn)
else:
l = "#EXT-X-KEY:METHOD=AES-128,URI=\""+str(premain)+str(kn)
m3u8file.append(str(l).strip())
elif not str(i).startswith("#"):
if str(i).startswith("http"):
m3u8file.append(i)
else:
premain_ = "/".join(str(premain).split("/")[:3])
if "/" in str(i):
t = str(i).split("/")[1]
if str(t) in str(premain):
l = premain_+str(i)
else:
l = premain+str(i)
m3u8file.append(l)
else:
m3u8file.append(i)
file_obj = open(m3u8_path,'w',encoding='UTF-8', newline='')
for i in m3u8file:
file_obj.write(str(i)+"\n")
file_obj.write("#EXT-X-ENDLIST\n")
file_obj.close()
return True
if __name__ == '__main__':
if len(sys.argv) == 2:
removeads(sys.argv[1])
cmdline = ("mpv --stream-lavf-o-append=\"protocol_whitelist=file,http,https,tcp,tls,crypto,hls,applehttp\" --really-quiet --ontop \"%s\""%(m3u8_path))
os.system(cmdline)
else:
print("python removeads.py [m3u8-link]")