Class: Crawline::ResourceRepository
- Inherits:
-
Object
- Object
- Crawline::ResourceRepository
- Defined in:
- lib/crawline.rb
Instance Method Summary collapse
- #compress_data(file_name, data) ⇒ Object
- #decompress_data(file_name, compressed_data) ⇒ Object
- #exists_s3_object?(file_name) ⇒ Boolean
- #get_s3_object(file_name) ⇒ Object
-
#initialize(access_key, secret_key, region, bucket, endpoint, force_path_style, object_name_suffix) ⇒ ResourceRepository
constructor
A new instance of ResourceRepository.
- #list_s3_objects ⇒ Object
- #put_s3_object(file_name, data) ⇒ Object
- #remove_s3_object(file_name) ⇒ Object
- #remove_s3_objects ⇒ Object
Constructor Details
#initialize(access_key, secret_key, region, bucket, endpoint, force_path_style, object_name_suffix) ⇒ ResourceRepository
Returns a new instance of ResourceRepository
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/crawline.rb', line 71 def initialize(access_key, secret_key, region, bucket, endpoint, force_path_style, object_name_suffix) @logger = CrawlineLogger.get_logger @logger.debug("ResourceRepository#initialize: start: access_key=#{access_key}, region=#{region}, bucket=#{bucket}, endpoint=#{endpoint}, force_path_style=#{force_path_style}, object_name_suffix=#{object_name_suffix}") Aws.config.update({ region: region, credentials: Aws::Credentials.new(access_key, secret_key) }) s3 = Aws::S3::Resource.new(endpoint: endpoint, force_path_style: force_path_style) @logger.debug("ResourceRepository#initialize: init s3 client") @bucket = s3.bucket(bucket) @logger.debug("ResourceRepository#initialize: get bucket") if not @bucket.exists? @logger.debug("ResourceRepository#initialize: bucket not exists") @bucket.create @logger.debug("ResourceRepository#initialize: bucket created") end @object_name_suffix = object_name_suffix end |
Instance Method Details
#compress_data(file_name, data) ⇒ Object
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/crawline.rb', line 169 def compress_data(file_name, data) compressed_data = nil StringIO.open("") do |io| SevenZipRuby::Writer.open(io) do |szr| szr.level = 9 szr.add_data(data, file_name.split("/")[-1]) end io.rewind raise "Compress error" if not SevenZipRuby::Reader.verify(io) io.rewind compressed_data = io.read end compressed_data end |
#decompress_data(file_name, compressed_data) ⇒ Object
188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/crawline.rb', line 188 def decompress_data(file_name, compressed_data) data = nil StringIO.open(compressed_data) do |io| raise "Decompress error" if not SevenZipRuby::Reader.verify(io) io.rewind SevenZipRuby::Reader.open(io) do |szr| data = szr.extract_data(szr.entries[0]) end end data end |
#exists_s3_object?(file_name) ⇒ Boolean
151 152 153 154 155 |
# File 'lib/crawline.rb', line 151 def exists_s3_object?(file_name) @logger.debug("ResourceRepository#exists_s3_object?: file_name=#{file_name}") (not get_s3_object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name).nil?) end |
#get_s3_object(file_name) ⇒ Object
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/crawline.rb', line 130 def get_s3_object(file_name) @logger.debug("ResourceRepository#get_s3_object: file_name=#{file_name}") # Download from s3 object = @bucket.object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name + ".latest.7z") begin @logger.debug("ResourceRepository#get_s3_object: getting") stored_data = object.get.body.read(object.size) # Decompress data data = decompress_data(file_name, stored_data) @logger.debug("ResourceRepository#get_s3_object: getted: size=#{data.size}") rescue Aws::S3::Errors::NoSuchKey @logger.debug("ResourceRepository#get_s3_object: no such key") data = nil end data end |
#list_s3_objects ⇒ Object
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/crawline.rb', line 111 def list_s3_objects @logger.debug("ResourceRepository#list_s3_objects: start") # Listing s3 object @bucket.objects.each do |obj| @logger.debug("ResourceRepository#list_s3_objects: object.key=#{obj.key}") if obj.key.end_with?(".latest.7z") # Download from s3 stored_data = obj.get.body.read(obj.size) # Decompress data data = decompress_data(obj.key, stored_data) yield(data) end end end |
#put_s3_object(file_name, data) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/crawline.rb', line 95 def put_s3_object(file_name, data) @logger.debug("ResourceRepository#put_s3_object: start: file_name=#{file_name}, data.nil?=#{data.nil?}") # Compress data store_data = compress_data(file_name, data) # Upload data to s3 obj_original = @bucket.object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name + ".latest.7z") obj_original.put(body: store_data) @logger.debug("ResourceRepository#put_s3_object: put original object: data.size=#{store_data.size}") obj_backup = @bucket.object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name + "." + Time.now.to_i.to_s + ".7z") obj_backup.put(body: store_data) @logger.debug("ResourceRepository#put_s3_object: put backup object: data.size=#{store_data.size}") end |
#remove_s3_object(file_name) ⇒ Object
163 164 165 166 167 |
# File 'lib/crawline.rb', line 163 def remove_s3_object(file_name) @logger.debug("ResourceRepository#remove_s3_object: start: file_name=#{file_name}") @bucket.objects({prefix: file_name}).batch_delete! end |
#remove_s3_objects ⇒ Object
157 158 159 160 161 |
# File 'lib/crawline.rb', line 157 def remove_s3_objects @logger.debug("ResourceRepository#remove_s3_objects") @bucket.objects.batch_delete! end |