lib/svm_helper/preprocessors/simple.rb in svm_helper-0.1.0 vs lib/svm_helper/preprocessors/simple.rb in svm_helper-0.1.1
- old
+ new
@@ -33,25 +33,30 @@
"simple"
end
#
# cleans provided jobs
# @overload process(jobs, classification)
- # @param jobs [Job] single Job
+ # @param jobs [Hash] single Job
+ # @option title
+ # @option description
+ # @option id
+ # @option label
# @param classification [Symbol] in `:industry`, `:function`, `:career_level`
# @overload process(jobs, classification)
- # @param jobs [Array<Job>] list of Jobs
+ # @param jobs [Array<Hash>] list of Jobs
# @param classification [Symbol] in `:industry`, `:function`, `:career_level`
#
# @return [Array<PreprocessedData>] list of processed job data - or singe job data
- def process jobs, classification=:function
- if jobs.respond_to? :map
- process_jobs jobs, classification
+ def process jobs
+ if jobs.is_a? Array
+ process_jobs jobs
else
- process_job jobs, classification
+ process_job jobs
end
end
+
#
# converts string into a cleaner version
# @param title [String] job title
#
# @return [String] clean and lowercase version of input
@@ -82,30 +87,24 @@
.downcase
.strip
end
private
- def process_jobs jobs, classification
+ def process_jobs jobs
if @parallel && RUBY_PLATFORM == 'java'
- Parallel.map(jobs, in_threads: THREAD_COUNT ) {|job| process_job job, classification }
+ Parallel.map(jobs, in_threads: THREAD_COUNT ) {|job| process_job job }
elsif @parallel
- Parallel.map(jobs, in_processes: THREAD_COUNT ) {|job| process_job job, classification }
+ Parallel.map(jobs, in_processes: THREAD_COUNT ) {|job| process_job job }
else
- jobs.map {|job| process_job job, classification }
+ jobs.map {|job| process_job job }
end
end
- def process_job job, classification
+ def process_job job
PreprocessedData.new(
- data: [ clean_title(job.title), clean_description(job.description) ],
- ids: {
- industry: job.classification_id(:industry),
- function: job.classification_id(:function),
- career_level: job.classification_id(:career_level) },
- labels: {
- industry: job.label(:industry),
- function: job.label(:function),
- career_level: job.label(:career_level) }
- ).tap{|e| e.send("#{classification}!")}
+ data: [clean_title(job[:title]), clean_description(job[:description])],
+ id: job[:id],
+ label: job[:label]
+ )
end
end
end