Class: Crawline::ResourceRepository

Inherits:
Object
  • Object
show all
Defined in:
lib/crawline.rb

Instance Method Summary collapse

Constructor Details

#initialize(access_key, secret_key, region, bucket, endpoint, force_path_style, object_name_suffix) ⇒ ResourceRepository

Returns a new instance of ResourceRepository



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/crawline.rb', line 71

def initialize(access_key, secret_key, region, bucket, endpoint, force_path_style, object_name_suffix)
  @logger = CrawlineLogger.get_logger
  @logger.debug("ResourceRepository#initialize: start: access_key=#{access_key}, region=#{region}, bucket=#{bucket}, endpoint=#{endpoint}, force_path_style=#{force_path_style}, object_name_suffix=#{object_name_suffix}")

  Aws.config.update({
    region: region,
    credentials: Aws::Credentials.new(access_key, secret_key)
  })
  s3 = Aws::S3::Resource.new(endpoint: endpoint, force_path_style: force_path_style)
  @logger.debug("ResourceRepository#initialize: init s3 client")

  @bucket = s3.bucket(bucket)
  @logger.debug("ResourceRepository#initialize: get bucket")

  if not @bucket.exists?
    @logger.debug("ResourceRepository#initialize: bucket not exists")

    @bucket.create
    @logger.debug("ResourceRepository#initialize: bucket created")
  end

  @object_name_suffix = object_name_suffix
end

Instance Method Details

#compress_data(file_name, data) ⇒ Object



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/crawline.rb', line 169

def compress_data(file_name, data)
  compressed_data = nil

  StringIO.open("") do |io|
    SevenZipRuby::Writer.open(io) do |szr|
      szr.level = 9
      szr.add_data(data, file_name.split("/")[-1])
    end

    io.rewind
    raise "Compress error" if not SevenZipRuby::Reader.verify(io)

    io.rewind
    compressed_data = io.read
  end

  compressed_data
end

#decompress_data(file_name, compressed_data) ⇒ Object



188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/crawline.rb', line 188

def decompress_data(file_name, compressed_data)
  data = nil

  StringIO.open(compressed_data) do |io|
    raise "Decompress error" if not SevenZipRuby::Reader.verify(io)

    io.rewind
    SevenZipRuby::Reader.open(io) do |szr|
      data = szr.extract_data(szr.entries[0])
    end
  end

  data
end

#exists_s3_object?(file_name) ⇒ Boolean

Returns:

  • (Boolean)


151
152
153
154
155
# File 'lib/crawline.rb', line 151

def exists_s3_object?(file_name)
  @logger.debug("ResourceRepository#exists_s3_object?: file_name=#{file_name}")

  (not get_s3_object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name).nil?)
end

#get_s3_object(file_name) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/crawline.rb', line 130

def get_s3_object(file_name)
  @logger.debug("ResourceRepository#get_s3_object: file_name=#{file_name}")

  # Download from s3
  object = @bucket.object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name + ".latest.7z")

  begin
    @logger.debug("ResourceRepository#get_s3_object: getting")
    stored_data = object.get.body.read(object.size)

    # Decompress data
    data = decompress_data(file_name, stored_data)
    @logger.debug("ResourceRepository#get_s3_object: getted: size=#{data.size}")
  rescue Aws::S3::Errors::NoSuchKey
    @logger.debug("ResourceRepository#get_s3_object: no such key")
    data = nil
  end

  data
end

#list_s3_objectsObject



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/crawline.rb', line 111

def list_s3_objects
  @logger.debug("ResourceRepository#list_s3_objects: start")

  # Listing s3 object
  @bucket.objects.each do |obj|
    @logger.debug("ResourceRepository#list_s3_objects: object.key=#{obj.key}")

    if obj.key.end_with?(".latest.7z")
      # Download from s3
      stored_data = obj.get.body.read(obj.size)

      # Decompress data
      data = decompress_data(obj.key, stored_data)

      yield(data)
    end
  end
end

#put_s3_object(file_name, data) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/crawline.rb', line 95

def put_s3_object(file_name, data)
  @logger.debug("ResourceRepository#put_s3_object: start: file_name=#{file_name}, data.nil?=#{data.nil?}")

  # Compress data
  store_data = compress_data(file_name, data)

  # Upload data to s3
  obj_original = @bucket.object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name + ".latest.7z")
  obj_original.put(body: store_data)
  @logger.debug("ResourceRepository#put_s3_object: put original object: data.size=#{store_data.size}")

  obj_backup = @bucket.object((@object_name_suffix.nil? ? "" : @object_name_suffix + "/") + file_name + "." + Time.now.to_i.to_s + ".7z")
  obj_backup.put(body: store_data)
  @logger.debug("ResourceRepository#put_s3_object: put backup object: data.size=#{store_data.size}")
end

#remove_s3_object(file_name) ⇒ Object



163
164
165
166
167
# File 'lib/crawline.rb', line 163

def remove_s3_object(file_name)
  @logger.debug("ResourceRepository#remove_s3_object: start: file_name=#{file_name}")

  @bucket.objects({prefix: file_name}).batch_delete!
end

#remove_s3_objectsObject



157
158
159
160
161
# File 'lib/crawline.rb', line 157

def remove_s3_objects
  @logger.debug("ResourceRepository#remove_s3_objects")

  @bucket.objects.batch_delete!
end