module CensorBear class CheckText include Logger attr_accessor :content, :data_id, :type, :options # 灰产常用正则规则 QQ_REG = /(?:[加茄qQ企鹅号码\s]{1,}|[群号]{1,}|[叩叩]{1,}|[抠抠]{1,}|[扣扣]{1,})(?:[\u4e00-\u9eff]*)(?:[:,:]?)([\d\s]{6,})/ WX_REG = /(?:[加+微++➕薇?vV威卫星♥❤姓xX信]{2,}|weixin|weix)(?:[,❤️.\s]?)(?:[\u4e00-\u9eff]?)(?:[:,:]?)([\w\s]{6,})/ def initialize(content, data_id = nil, type = "ugc", options = {}) @content = content @data_id = data_id @type = type # 可以没有 data_id, 因为不涉及到 callback if data_id.present? raise ParamsError, "data_id 格式错误 #{data_id}" unless TextDataId.valid?(data_id) end @options = options end def scan raise ParamsEmptyError if content.blank? cloud_check end def cloud_check return if options[:skip_cloud_check] == true if CensorBear.config.service == "aliyun" censor_bear_logger.debug("开始调用阿里云文本检测接口, 参数为 content = #{content}, data_id = #{data_id}") resp = ::AliyunGreen::Text.scan(content, data_id) censor_bear_logger.debug("阿里云文本检测接口返回结果为 #{resp}") d = parse_result(resp) TextResult.new(d[:review_flag], content, d[:filtered_content], d[:data_id], "aliyun", d[:raw_data]) else censor_bear_logger.error("未知的服务商 #{CensorBear.config.service}") end end def parse_result(resp) self.class.parse_result resp end class << self def parse_result(resp) resp = resp.with_indifferent_access raise CloudCheckError if resp[:code].to_s != "200" review_flag = false data = resp.dig(:data) d = data.first r = d["results"].first # 当处理建议是 "block", "review" # TODO: 如果为了精准,可以根据 r[:rate] 调整 review_flag,排除法 if r[:suggestion] != "pass" review_flag = true end { review_flag: review_flag, data_id: data.first[:dataId], content: data.first[:content], filtered_content: data.first[:filteredContent], suggestion: r[:suggestion], raw_data: resp, } end end end end