lib/kishu/report.rb in kishu-0.0.1 vs lib/kishu/report.rb in kishu-1.0.0
- old
+ new
@@ -11,20 +11,30 @@
module Kishu
class Report
include Kishu::Base
include Kishu::Utils
+ attr_reader :uid, :total, :period
def initialize options={}
set_period
@es_client = Client.new()
@logger = Logger.new(STDOUT)
@report_id = options[:report_id] ? options[:report_id] : ""
@total = 0
@aggs_size = options[:aggs_size]
- @chunk_size = options[:chunk_size]
@after = options[:after_key] ||=""
+ @enrich = options[:enrich]
+ @schema = options[:schema]
+ @encoding = options[:encoding]
+ @created_by = options[:created_by]
+ @report_size = options[:report_size]
+ if @schema == "resolution"
+ @enrich = false
+ @encoding = "gzip"
+ # @report_size = 20000
+ end
end
def report_period options={}
es_client = Client.new()
@@ -35,14 +45,17 @@
def get_events options={}
logger = Logger.new(STDOUT)
es_client = Client.new()
- response = es_client.get({aggs_size: @aggs_size || 500, after_key: options[:after_key] ||=""})
+ response = es_client.get({aggs_size: @aggs_size || 400, after_key: options[:after_key] ||=""})
aggs = response.dig("aggregations","doi","buckets")
x = aggs.map do |agg|
- ResolutionEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
+ case @schema
+ when "resolution" then ResolutionEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
+ when "usage" then UsageEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
+ end
end
after = response.dig("aggregations","doi").fetch("after_key",{"doi"=>nil}).dig("doi")
logger.info "After_key for pagination #{after}"
y = {data: x, after: after}
y
@@ -54,17 +67,28 @@
loop do
response = get_events({after_key: @after ||=""})
@datasets = @datasets.concat response[:data]
@after = response[:after]
@total += @datasets.size
- generate_chunk_report if @datasets.size > @chunk_size
+ generate_chunk_report if @datasets.size > @report_size
break if @after.nil?
end
end
+ def generate
+ @datasets = []
+ loop do
+ response = get_events({after_key: @after ||=""})
+ @datasets = @datasets.concat response[:data]
+ @after = response[:after]
+ @total += @datasets.size
+ break if @total > 40000
+ break if @after.nil?
+ end
+ end
+
def compress report
- # report = File.read(hash)
gzip = Zlib::GzipWriter.new(StringIO.new)
string = report.to_json
gzip << string
body = gzip.close.string
body
@@ -80,42 +104,49 @@
end
send_report get_template
@datasets = []
end
- def make_report options={}
- generate_dataset_array
- @logger.info "#{LOGS_TAG} Month of #{@period.dig("begin-date")} sent to Hub in report #{@uid} with stats for #{@total} datasets"
- end
-
def set_period
- report_period
+ rp = report_period
@period = {
- "begin-date": Date.civil(report_period.year, report_period.mon, 1).strftime("%Y-%m-%d"),
- "end-date": Date.civil(report_period.year, report_period.mon, -1).strftime("%Y-%m-%d"),
+ "begin-date": Date.civil(rp.year, rp.mon, 1).strftime("%Y-%m-%d"),
+ "end-date": Date.civil(rp.year, rp.mon, -1).strftime("%Y-%m-%d"),
}
end
def send_report report, options={}
uri = HUB_URL+'/reports'
puts uri
- headers = {
- content_type: "application/gzip",
- content_encoding: 'gzip',
- accept: 'gzip'
- }
-
- body = compress(report)
+ case @encoding
+ when "gzip" then
+ headers = {
+ content_type: "application/gzip",
+ content_encoding: 'gzip',
+ accept: 'gzip'
+ }
+ body = compress(report)
+ when "json" then
+ headers = {
+ content_type: "application/json",
+ accept: 'application/json'
+ }
+ body = report
+ end
+
n = 0
loop do
request = Maremma.post(uri, data: body,
bearer: ENV['HUB_TOKEN'],
headers: headers,
timeout: 100)
+ puts body
+ puts headers
+
@uid = request.body.dig("data","report","id")
@logger.info "#{LOGS_TAG} Hub response #{request.status} for Report finishing in #{@after}"
@logger.info "#{LOGS_TAG} Hub response #{@uid} for Report finishing in #{@after}"
n += 1
break if request.status == 201
@@ -130,15 +161,19 @@
"report-datasets": @datasets
}
end
def get_header
+ report_type = case @schema
+ when "resolution" then {release:"drl", title:"resolution report"}
+ when "usage" then {release:"rd1", title:"usage report"}
+ end
{
- "report-name": "resolution report",
+ "report-name": report_type.dig(:title),
"report-id": "dsr",
- release: "drl",
+ release: report_type.dig(:release),
created: Date.today.strftime("%Y-%m-%d"),
- "created-by": "datacite",
+ "created-by": @created_by,
"reporting-period": @period,
"report-filters": [],
"report-attributes": [],
exceptions: [{code: 69,severity: "warning", message: "Report is compressed using gzip","help-url": "https://github.com/datacite/sashimi",data: "usage data needs to be uncompressed"}]
}