diff --git a/app/javascript/mastodon/features/explore/components/story.jsx b/app/javascript/mastodon/features/explore/components/story.jsx index 92eb41cff8..80dd5200fc 100644 --- a/app/javascript/mastodon/features/explore/components/story.jsx +++ b/app/javascript/mastodon/features/explore/components/story.jsx @@ -22,6 +22,7 @@ export default class Story extends PureComponent { author: PropTypes.string, sharedTimes: PropTypes.number, thumbnail: PropTypes.string, + thumbnailDescription: PropTypes.string, blurhash: PropTypes.string, expanded: PropTypes.bool, }; @@ -33,7 +34,7 @@ export default class Story extends PureComponent { handleImageLoad = () => this.setState({ thumbnailLoaded: true }); render () { - const { expanded, url, title, lang, publisher, author, publishedAt, sharedTimes, thumbnail, blurhash } = this.props; + const { expanded, url, title, lang, publisher, author, publishedAt, sharedTimes, thumbnail, thumbnailDescription, blurhash } = this.props; const { thumbnailLoaded } = this.state; @@ -49,7 +50,7 @@ export default class Story extends PureComponent { {thumbnail ? ( <> <div className={classNames('story__thumbnail__preview', { 'story__thumbnail__preview--hidden': thumbnailLoaded })}><Blurhash hash={blurhash} /></div> - <img src={thumbnail} onLoad={this.handleImageLoad} alt='' role='presentation' /> + <img src={thumbnail} onLoad={this.handleImageLoad} alt={thumbnailDescription} title={thumbnailDescription} lang={lang} /> </> ) : <Skeleton />} </div> diff --git a/app/javascript/mastodon/features/explore/links.jsx b/app/javascript/mastodon/features/explore/links.jsx index 489ab6dd61..663aa6d80f 100644 --- a/app/javascript/mastodon/features/explore/links.jsx +++ b/app/javascript/mastodon/features/explore/links.jsx @@ -67,6 +67,7 @@ class Links extends PureComponent { author={link.get('author_name')} sharedTimes={link.getIn(['history', 0, 'accounts']) * 1 + link.getIn(['history', 1, 'accounts']) * 1} thumbnail={link.get('image')} + thumbnailDescription={link.get('image_description')} blurhash={link.get('blurhash')} /> ))} diff --git a/app/javascript/mastodon/features/status/components/card.jsx b/app/javascript/mastodon/features/status/components/card.jsx index 6ac3c1d0f2..07fb1db9e2 100644 --- a/app/javascript/mastodon/features/status/components/card.jsx +++ b/app/javascript/mastodon/features/status/components/card.jsx @@ -167,7 +167,8 @@ export default class Card extends PureComponent { /> ); - let thumbnail = <img src={card.get('image')} alt='' style={thumbnailStyle} onLoad={this.handleImageLoad} className='status-card__image-image' />; + const thumbnailDescription = card.get('image_description'); + const thumbnail = <img src={card.get('image')} alt={thumbnailDescription} title={thumbnailDescription} lang={language} style={thumbnailStyle} onLoad={this.handleImageLoad} className='status-card__image-image' />; let spoilerButton = ( <button type='button' onClick={this.handleReveal} className='spoiler-button__overlay'> diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index 4ee07acdc0..b95ec80519 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -113,6 +113,7 @@ class LinkDetailsExtractor title: title || '', description: description || '', image_remote_url: image, + image_description: image_alt || '', type: type, link_type: link_type, width: width || 0, @@ -168,6 +169,10 @@ class LinkDetailsExtractor valid_url_or_nil(opengraph_tag('og:image')) end + def image_alt + opengraph_tag('og:image:alt') + end + def canonical_url valid_url_or_nil(link_tag('canonical') || opengraph_tag('og:url'), same_origin_only: true) || @original_url.to_s end diff --git a/app/models/preview_card.rb b/app/models/preview_card.rb index d9ddd3ba0d..3e2b5bf992 100644 --- a/app/models/preview_card.rb +++ b/app/models/preview_card.rb @@ -31,6 +31,7 @@ # trendable :boolean # link_type :integer # published_at :datetime +# image_description :string default(""), not null # class PreviewCard < ApplicationRecord diff --git a/app/serializers/rest/preview_card_serializer.rb b/app/serializers/rest/preview_card_serializer.rb index cf700ff24e..3e1c4bde3c 100644 --- a/app/serializers/rest/preview_card_serializer.rb +++ b/app/serializers/rest/preview_card_serializer.rb @@ -6,7 +6,7 @@ class REST::PreviewCardSerializer < ActiveModel::Serializer attributes :url, :title, :description, :language, :type, :author_name, :author_url, :provider_name, :provider_url, :html, :width, :height, - :image, :embed_url, :blurhash, :published_at + :image, :image_description, :embed_url, :blurhash, :published_at def image object.image? ? full_asset_url(object.image.url(:original)) : nil diff --git a/db/migrate/20230725213448_add_image_description_to_preview_cards.rb b/db/migrate/20230725213448_add_image_description_to_preview_cards.rb new file mode 100644 index 0000000000..03020db655 --- /dev/null +++ b/db/migrate/20230725213448_add_image_description_to_preview_cards.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +require Rails.root.join('lib', 'mastodon', 'migration_helpers') + +class AddImageDescriptionToPreviewCards < ActiveRecord::Migration[7.0] + include Mastodon::MigrationHelpers + + disable_ddl_transaction! + + def up + safety_assured { add_column_with_default :preview_cards, :image_description, :string, default: '', allow_null: false } + end + + def down + remove_column :preview_cards, :image_description + end +end diff --git a/db/schema.rb b/db/schema.rb index 0c8ede3831..79bebb5ed5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -802,6 +802,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_08_03_112520) do t.boolean "trendable" t.integer "link_type" t.datetime "published_at" + t.string "image_description", default: "", null: false t.index ["url"], name: "index_preview_cards_on_url", unique: true end diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb index ef501efbff..599bc4e6de 100644 --- a/spec/lib/link_details_extractor_spec.rb +++ b/spec/lib/link_details_extractor_spec.rb @@ -3,33 +3,31 @@ require 'rails_helper' RSpec.describe LinkDetailsExtractor do - subject { described_class.new(original_url, html, html_charset) } + subject { described_class.new(original_url, html, nil) } - let(:original_url) { '' } - let(:html) { '' } - let(:html_charset) { nil } + let(:original_url) { 'https://example.com/dog.html?tracking=123' } describe '#canonical_url' do - let(:original_url) { 'https://foo.com/article?bar=baz123' } + let(:html) { "<!doctype html><link rel='canonical' href='#{url}'>" } + + context 'when canonical URL points to the same host' do + let(:url) { 'https://example.com/dog.html' } + + it 'ignores the canonical URLs' do + expect(subject.canonical_url).to eq 'https://example.com/dog.html' + end + end context 'when canonical URL points to another host' do - let(:html) { '<!doctype html><link rel="canonical" href="https://bar.com/different-article" />' } + let(:url) { 'https://different.example.net/dog.html' } it 'ignores the canonical URLs' do expect(subject.canonical_url).to eq original_url end end - context 'when canonical URL points to the same host' do - let(:html) { '<!doctype html><link rel="canonical" href="https://foo.com/article" />' } - - it 'ignores the canonical URLs' do - expect(subject.canonical_url).to eq 'https://foo.com/article' - end - end - context 'when canonical URL is set to "null"' do - let(:html) { '<!doctype html><link rel="canonical" href="null" />' } + let(:url) { 'null' } it 'ignores the canonical URLs' do expect(subject.canonical_url).to eq original_url @@ -37,46 +35,103 @@ RSpec.describe LinkDetailsExtractor do end end + context 'when only basic metadata is present' do + let(:html) { <<~HTML } + <!doctype html> + <html lang="en"> + <head> + <title>Man bites dog</title> + <meta name="description" content="A dog's tale"> + </head> + </html> + HTML + + describe '#title' do + it 'returns the title from title tag' do + expect(subject.title).to eq 'Man bites dog' + end + end + + describe '#description' do + it 'returns the description from meta tag' do + expect(subject.description).to eq "A dog's tale" + end + end + + describe '#language' do + it 'returns the language from lang attribute' do + expect(subject.language).to eq 'en' + end + end + end + context 'when structured data is present' do - let(:original_url) { 'https://example.com/page.html' } - - context 'when is wrapped in CDATA tags' do - let(:html) { <<~HTML } - <!doctype html> - <html> - <head> - <script type="application/ld+json"> - //<![CDATA[ - {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}} - //]]> - </script> - </head> - </html> - HTML + let(:ld_json) do + { + '@context' => 'https://schema.org', + '@type' => 'NewsArticle', + 'headline' => 'Man bites dog', + 'description' => "A dog's tale", + 'datePublished' => '2022-01-31T19:53:00+00:00', + 'author' => { + '@type' => 'Organization', + 'name' => 'Charlie Brown', + }, + 'publisher' => { + '@type' => 'NewsMediaOrganization', + 'name' => 'Pet News', + 'url' => 'https://example.com', + }, + }.to_json + end + shared_examples 'structured data' do describe '#title' do it 'returns the title from structured data' do - expect(subject.title).to eq 'Foo' + expect(subject.title).to eq 'Man bites dog' end end describe '#description' do it 'returns the description from structured data' do - expect(subject.description).to eq 'Bar' + expect(subject.description).to eq "A dog's tale" end end - describe '#provider_name' do - it 'returns the provider name from structured data' do - expect(subject.provider_name).to eq 'Baz' + describe '#published_at' do + it 'returns the publicaton time from structured data' do + expect(subject.published_at).to eq '2022-01-31T19:53:00+00:00' end end describe '#author_name' do it 'returns the author name from structured data' do - expect(subject.author_name).to eq 'Hoge' + expect(subject.author_name).to eq 'Charlie Brown' end end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Pet News' + end + end + end + + context 'when is wrapped in CDATA tags' do + let(:html) { <<~HTML } + <!doctype html> + <html> + <head> + <script type="application/ld+json"> + //<![CDATA[ + #{ld_json} + //]]> + </script> + </head> + </html> + HTML + + include_examples 'structured data' end context 'with the first tag is invalid JSON' do @@ -85,76 +140,152 @@ RSpec.describe LinkDetailsExtractor do <html> <body> <script type="application/ld+json"> - { - "@context":"https://schema.org", - "@type":"ItemList", - "url":"https://example.com/page.html", - "name":"Foo", - "description":"Bar" - }, - { - "@context": "https://schema.org", - "@type": "BreadcrumbList", - "itemListElement":[ - { - "@type":"ListItem", - "position":1, - "item":{ - "@id":"https://www.example.com", - "name":"Baz" - } - } - ] - } + invalid LD+JSON </script> <script type="application/ld+json"> - { - "@context":"https://schema.org", - "@type":"NewsArticle", - "mainEntityOfPage": { - "@type":"WebPage", - "@id": "http://example.com/page.html" - }, - "headline": "Foo", - "description": "Bar", - "datePublished": "2022-01-31T19:46:00+00:00", - "author": { - "@type": "Organization", - "name": "Hoge" - }, - "publisher": { - "@type": "NewsMediaOrganization", - "name":"Baz", - "url":"https://example.com/" - } - } + #{ld_json} </script> </body> </html> HTML - describe '#title' do - it 'returns the title from structured data' do - expect(subject.title).to eq 'Foo' - end - end + include_examples 'structured data' + end - describe '#description' do - it 'returns the description from structured data' do - expect(subject.description).to eq 'Bar' - end - end + context 'with preceding block of unsupported LD+JSON' do + let(:html) { <<~HTML } + <!doctype html> + <html> + <body> + <script type="application/ld+json"> + [ + { + "@context": "https://schema.org", + "@type": "ItemList", + "url": "https://example.com/cat.html", + "name": "Man bites cat", + "description": "A cat's tale" + }, + { + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement":[ + { + "@type": "ListItem", + "position": 1, + "item": { + "@id": "https://www.example.com", + "name": "Cat News" + } + } + ] + } + ] + </script> + <script type="application/ld+json"> + #{ld_json} + </script> + </body> + </html> + HTML - describe '#provider_name' do - it 'returns the provider name from structured data' do - expect(subject.provider_name).to eq 'Baz' - end - end + include_examples 'structured data' + end - describe '#author_name' do - it 'returns the author name from structured data' do - expect(subject.author_name).to eq 'Hoge' - end + context 'with unsupported in same block LD+JSON' do + let(:html) { <<~HTML } + <!doctype html> + <html> + <body> + <script type="application/ld+json"> + [ + { + "@context": "https://schema.org", + "@type": "ItemList", + "url": "https://example.com/cat.html", + "name": "Man bites cat", + "description": "A cat's tale" + }, + #{ld_json} + ] + </script> + </body> + </html> + HTML + + include_examples 'structured data' + end + end + + context 'when Open Graph protocol data is present' do + let(:html) { <<~HTML } + <!doctype html> + <html> + <head> + <meta property="og:url" content="https://example.com/dog.html"> + <meta property="og:title" content="Man bites dog"> + <meta property="og:description" content="A dog's tale"> + <meta property="article:published_time" content="2022-01-31T19:53:00+00:00"> + <meta property="og:author" content="Charlie Brown"> + <meta property="og:locale" content="en"> + <meta property="og:image" content="https://example.com/snoopy.jpg"> + <meta property="og:image:alt" content="A good boy"> + <meta property="og:site_name" content="Pet News"> + </head> + </html> + HTML + + describe '#canonical_url' do + it 'returns the URL from Open Graph protocol data' do + expect(subject.canonical_url).to eq 'https://example.com/dog.html' + end + end + + describe '#title' do + it 'returns the title from Open Graph protocol data' do + expect(subject.title).to eq 'Man bites dog' + end + end + + describe '#description' do + it 'returns the description from Open Graph protocol data' do + expect(subject.description).to eq "A dog's tale" + end + end + + describe '#published_at' do + it 'returns the publicaton time from Open Graph protocol data' do + expect(subject.published_at).to eq '2022-01-31T19:53:00+00:00' + end + end + + describe '#author_name' do + it 'returns the author name from Open Graph protocol data' do + expect(subject.author_name).to eq 'Charlie Brown' + end + end + + describe '#language' do + it 'returns the language from Open Graph protocol data' do + expect(subject.language).to eq 'en' + end + end + + describe '#image' do + it 'returns the image from Open Graph protocol data' do + expect(subject.image).to eq 'https://example.com/snoopy.jpg' + end + end + + describe '#image:alt' do + it 'returns the image description from Open Graph protocol data' do + expect(subject.image_alt).to eq 'A good boy' + end + end + + describe '#provider_name' do + it 'returns the provider name from Open Graph protocol data' do + expect(subject.provider_name).to eq 'Pet News' end end end