在生活中不可避免会出现色情视频,因此视频的鉴定就成为了我们需要解决的问题,本博客在NSFW项目(见下面参考文献)的基础上面改进了封装,用来检测视频是否是色情视频。首先,这个项目是基于Caffe 的,使用的网络结构是ResNet网络(可以查看参考文献中的论文)。
检测分为三个等级,score < 0.2 的表示很安全,socre > 0.8 的表示有很大的可能性是色情的。
socre < 0.2 很安全 safe的数量,占的比重
score >= 0.2 && score <= 0.8 medimum , 介于危险和安全之间的数量,比重
score > 0.8 dangerous, 有很大可能性是色情占的比重
最后我们可以根据dangerous 占的比重可以确定视频是否是色情视频。
首先就是安装ffmpeg,由于我使用的是Ubuntu 14的系统,安装这个的时候着实费了一点功夫,所幸终于找到了一个好用的源安装成功。
sudo add-apt-repository ppa:mc3man/trusty-media
sudo apt-get update
sudo apt-get install ffmpeg gstreamer0.10-ffmpeg
安装caffe 容器 CPU版:
docker build -t caffe:cpu https://raw.githubusercontent.com/BVLC/caffe/master/docker/cpu/Dockerfile
docker run caffe:cpu caffe --version
git clone https://github.com/yahoo/open_nsfw
cd open_nsfw
docker run -ti --volume={}:/workspace caffe:cpu bash".format("/home/duan/open_nsfw/
# -*- encoding:utf-8 -*-
__date__ = "17/1/16"
__author__ = "duan"
import os
import shutil
from argparse import ArgumentParser
def video_to_frames(video_path, frames_path, step_size = 20):
if not os.path.exists(frames_path):
output_file = frames_path + "/out%05d.jpg"
print("ffmpeg -i {} -f image2 {}".format(video_path, output_file))
#extract an image every 20 seconds
# you can also set every 10 seconds, just set fps = fps = 1/10
os.system("ffmpeg -i {} -f image2 -vf fps=fps=1/{} {}".format(video_path, step_size, output_file))
if __name__ == '__main__':
parser = ArgumentParser()
dest='content', help='content image',
metavar='CONTENT', required=True)
parser.add_argument('--step', type=int, default = 20,
dest='step', help='the video step you want use',
options = parser.parse_args()
video_name = options.content # the video name you want to detect
step_size = options.step # the video step you want to use
#video_name = "1994.mp4" # the video name you want to detect
video_path = "./" # the video path, i put the video at current folder
frames_path = "picture"
video_to_frames(video_path + video_name, frames_path, step_size)
# start the docker and set the workspace as "/home/duan/open_nsfw"
# set as your own path
#launch the docker
os.system("docker run -ti --volume={}:/workspace caffe:cpu bash -c \"python video_detect.py\"".format("/home/duan/open_nsfw/"))
python video_detect.py
# -*- encoding:utf-8 -*-
__date__ = "17/1/16"
__author__ = "duan"
import os
import shutil
frames_path = "picture"
files= os.listdir(frames_path)
results = []
import video_nsfw
safe = 0.0
median = 0.0
dangerous = 0.0
for file in files:
if not os.path.isdir(file):
res = video_nsfw.detact("nsfw_model/deploy.prototxt", "nsfw_model/resnet_50_1by2_nsfw.caffemodel", frames_path + "/" + file)
if res < 0.2:
safe += 1
elif res < 0.8:
median += 1
dangerous += 1
print("safe count: {}, proportion: {}%".format(safe, round(safe / len(results) * 100, 3)))
print("median count: {}, proportion: {}%".format(median, round(median / len(results) * 100, 3)))
print("dangerous count: {}, proportion: {}%".format(dangerous, round(dangerous / len(results) * 100, 3)))
# -*- encoding:utf-8 -*-
__date__ = "17/1/16"
__author__ = "duan"
import os
import shutil
import numpy as np
import os
import sys
import argparse
import glob
import time
from PIL import Image
from StringIO import StringIO
import caffe
def resize_image(data, sz=(256, 256)):
Resize image. Please use this resize logic for best results instead of the
caffe, since it was used to generate training dataset
img_data = str(data)
im = Image.open(StringIO(img_data))
if im.mode != "RGB":
im = im.convert('RGB')
imr = im.resize(sz, resample=Image.BILINEAR)
fh_im = StringIO()
imr.save(fh_im, format='JPEG')
return bytearray(fh_im.read())
def caffe_preprocess_and_compute(pimg, caffe_transformer=None, caffe_net=None,
Run a Caffe network on an input image after preprocessing it to prepare
it for Caffe.
if caffe_net is not None:
# Grab the default output names if none were requested specifically.
if output_layers is None:
output_layers = caffe_net.outputs
img_data_rs = resize_image(pimg, sz=(256, 256))
image = caffe.io.load_image(StringIO(img_data_rs))
H, W, _ = image.shape
_, _, h, w = caffe_net.blobs['data'].data.shape
h_off = max((H - h) / 2, 0)
w_off = max((W - w) / 2, 0)
crop = image[h_off:h_off + h, w_off:w_off + w, :]
transformed_image = caffe_transformer.preprocess('data', crop)
transformed_image.shape = (1,) + transformed_image.shape
input_name = caffe_net.inputs[0]
all_outputs = caffe_net.forward_all(blobs=output_layers,
**{input_name: transformed_image})
outputs = all_outputs[output_layers[0]][0].astype(float)
return outputs
return []
def detact(model_def, pretrained_model, input_file):
pycaffe_dir = os.path.dirname(__file__)
#args = parser.parse_args()
image_data = open(input_file).read()
# Pre-load caffe model.
nsfw_net = caffe.Net(model_def, # pylint: disable=invalid-name
pretrained_model, caffe.TEST)
# Load transformer
# Note that the parameters are hard-coded for best results
caffe_transformer = caffe.io.Transformer({'data': nsfw_net.blobs['data'].data.shape})
caffe_transformer.set_transpose('data', (2, 0, 1)) # move image channels to outermost
caffe_transformer.set_mean('data', np.array([104, 117, 123])) # subtract the dataset-mean value in each channel
caffe_transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255]
caffe_transformer.set_channel_swap('data', (2, 1, 0)) # swap channels from RGB to BGR
# Classify.
scores = caffe_preprocess_and_compute(image_data, caffe_transformer=caffe_transformer, caffe_net=nsfw_net, output_layers=['prob'])
# Scores is the array containing SFW / NSFW image probabilities
# scores[1] indicates the NSFW probability
print("NSFW score: " , scores[1])
return scores[1]
python launch_video_detact.py --content 1995.mp4 --step 20
