activitypub-academy/app/services/fetch_resource_service.rb
ThibG cb12a2cdd3
Fix some timeouts when searching URLs by limiting some database queries (#13253)
Only look up private toots from database if the request failed because of 401,
403 or 404 errors, as those may indicate a private toot, rather than something
that isn't a toot or cannot be processed.
2020-03-12 23:06:43 +01:00

71 lines
2.6 KiB
Ruby

# frozen_string_literal: true
class FetchResourceService < BaseService
include JsonLdHelper
ACCEPT_HEADER = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", text/html;q=0.1'
attr_reader :response_code
def call(url)
return if url.blank?
process(url)
rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
Rails.logger.debug "Error fetching resource #{@url}: #{e}"
nil
end
private
def process(url, terminal = false)
@url = url
perform_request { |response| process_response(response, terminal) }
end
def perform_request(&block)
Request.new(:get, @url).add_headers('Accept' => ACCEPT_HEADER).on_behalf_of(Account.representative).perform(&block)
end
def process_response(response, terminal = false)
@response_code = response.code
return nil if response.code != 200
if ['application/activity+json', 'application/ld+json'].include?(response.mime_type)
body = response.body_with_limit
json = body_to_json(body)
[json['id'], { prefetched_body: body, id: true }] if supported_context?(json) && (equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) || expected_type?(json))
elsif !terminal
link_header = response['Link'] && parse_link_header(response)
if link_header&.find_link(%w(rel alternate))
process_link_headers(link_header)
elsif response.mime_type == 'text/html'
process_html(response)
end
end
end
def expected_type?(json)
equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
end
def process_html(response)
page = Nokogiri::HTML(response.body_with_limit)
json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) }
process(json_link['href'], terminal: true) unless json_link.nil?
end
def process_link_headers(link_header)
json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'])
process(json_link.href, terminal: true) unless json_link.nil?
end
def parse_link_header(response)
LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
end
end