Class: Longleaf::RsyncReplicationService

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/longleaf/preservation_services/rsync_replication_service.rb

Overview

Preservation service which performs replication of a file to one or more destinations using rsync.

The service definition must contain one or more destinations, specified with the “to” property. These destinations must be either a known storage location name, a remote path, or absolute path.

Optional service configuration properties:

  • replica_collision_policy = specifies the desired outcome if the service attempts to replicate

    a file which already exists at a destination. Default: "replace".
    
  • rsync_command = the command to invoke in order to execute rsync. Default: “rsync”

  • rsync_options = additional parameters that will be passed along to rsync. Cannot include options

    which change the target of the command or prevent its execution, such as "files-from", "dry-run",
    "help", etc. Command will always include "-R". Default "-a".
    

Constant Summary collapse

COLLISION_PROPERTY =
"replica_collision_policy"
DEFAULT_COLLISION_POLICY =
"replace"
VALID_COLLISION_POLICIES =
["replace"]
RSYNC_COMMAND_PROPERTY =
"rsync_command"
DEFAULT_COMMAND =
"rsync"
RSYNC_OPTIONS_PROPERTY =
"rsync_options"
DEFAULT_OPTIONS =
"-a"
DISALLOWED_OPTIONS =
["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
"h", "help", "f", "F", "filter"]

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Logging

#initialize_logger, initialize_logger, #logger, logger

Constructor Details

#initialize(service_def, app_manager) ⇒ RsyncReplicationService

Initialize a RsyncReplicationService from the given service definition

Parameters:



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 43

def initialize(service_def, app_manager)
  @service_def = service_def
  @app_manager = app_manager

  @command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND

  # Validate rsync parameters
  @options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
  if contains_disallowed_option?(@options)
    raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
        + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
  end

  # Add -R (--relative) in to command options to ensure full path gets replicated
  @options = @options + " -R"

  # Set and validate the replica collision policy
  @collision_policy = @service_def.properties[COLLISION_PROPERTY] || DEFAULT_COLLISION_POLICY
  if !VALID_COLLISION_POLICIES.include?(@collision_policy)
    raise ArgumentError.new("Service #{service_def.name} received invalid #{COLLISION_PROPERTY}" \
        + " value #{collision_policy}")
  end

  # Store and validate destinations
  replicate_to = @service_def.properties[ServiceFields::REPLICATE_TO]
  if replicate_to.nil? || replicate_to.empty?
    raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
  end
  replicate_to = [replicate_to] if replicate_to.is_a?(String)

  loc_manager = app_manager.location_manager
  # Build list of destinations, translating to storage locations when relevant
  @destinations = Array.new
  replicate_to.each do |dest|
    # Assume that if destination contains a : or / it is a path rather than storage location
    if dest =~ /[:\/]/
      @destinations << dest
    else
      if loc_manager.locations.key?(dest)
        @destinations << loc_manager.locations[dest]
      else
        raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
            + " as a replication destination")
      end
    end
  end
end

Instance Attribute Details

#collision_policyObject (readonly)

Returns the value of attribute collision_policy



37
38
39
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 37

def collision_policy
  @collision_policy
end

#commandObject (readonly)

Returns the value of attribute command



37
38
39
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 37

def command
  @command
end

#optionsObject (readonly)

Returns the value of attribute options



37
38
39
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 37

def options
  @options
end

Instance Method Details

#contains_disallowed_option?(options) ⇒ Boolean (private)

Returns:

  • (Boolean)


143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 143

def contains_disallowed_option?(options)
  DISALLOWED_OPTIONS.each do |disallowed|
    if disallowed.length == 1
      if options =~ /(\A| )-[a-zA-Z0-9]*#{disallowed}[a-zA-Z0-9]*( |=|\z)/
        return true
      end
    else
      if options =~ /(\A| )--#{disallowed}( |=|\z)/
        return true
      end
    end
  end

  false
end

#is_applicable?(event) ⇒ Boolean

Determine if this service is applicable for the provided event, given the configured service definition

Parameters:

  • event (String)

    name of the event

Returns:

  • (Boolean)

    returns true if this service is applicable for the provided event



133
134
135
136
137
138
139
140
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 133

def is_applicable?(event)
  case event
  when EventNames::PRESERVE
    true
  else
    false
  end
end

#perform(file_rec, event) ⇒ Object

During a replication event, perform replication of the specified file to all configured destinations as necessary.

Parameters:

  • file_rec (FileRecord)

    record representing the file to perform the service on.

  • event (String)

    name of the event this service is being invoked by.

Raises:



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 97

def perform(file_rec, event)
  @destinations.each do |destination|
    dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)

    if dest_is_storage_loc
      dest_path = destination.path
    else
      dest_path = destination
    end

    # Determine the path to the file being replicated relative to its storage location
    rel_path = file_rec.path.sub(/\A#{file_rec.storage_location.path}/, "")
    # source path with . so that rsync will only create destination directories starting from that point
    source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")

    # Check that the destination is available because attempting to write
    verify_destination_available(destination, file_rec)

    logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
    stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
    raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
        unless status.success?

    logger.info("Replicated #{file_rec.path} to destination #{dest_path}")

    # For destinations which are storage locations, register the replica with longleaf
    if dest_is_storage_loc
      register_replica(destination, rel_path, file_rec)
    end
  end
end

#register_replica(destination, rel_path, file_rec) ⇒ Object (private)



173
174
175
176
177
178
179
180
181
182
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 173

def register_replica(destination, rel_path, file_rec)
  dest_file_path = File.join(destination.path, rel_path)
  dest_file_rec = FileRecord.new(dest_file_path, destination)

  register_event = RegisterEvent.new(file_rec: dest_file_rec,
      app_manager: @app_manager,
      force: true,
      checksums: file_rec..checksums)
  register_event.perform
end

#verify_destination_available(destination, file_rec) ⇒ Object (private)



159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/longleaf/preservation_services/rsync_replication_service.rb', line 159

def verify_destination_available(destination, file_rec)
  if destination.is_a?(Longleaf::StorageLocation)
    begin
      destination.available?
    rescue StorageLocationUnavailableError => e
      raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
          + e.message)
    end
  elsif destination.start_with?("/")
    raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
        + " #{destination}, path does not exist.") unless Dir.exist?(destination)
  end
end