diff --git a/Gemfile b/Gemfile index 1ddb6f383a..7713bfb2b1 100644 --- a/Gemfile +++ b/Gemfile @@ -29,9 +29,7 @@ gem 'addressable', '~> 2.8' gem 'bootsnap', '~> 1.10.2', require: false gem 'browser' gem 'charlock_holmes', '~> 0.7.7' -gem 'iso-639' gem 'chewy', '~> 7.2' -gem 'cld3', '~> 3.4.4' gem 'devise', '~> 4.8' gem 'devise-two-factor', '~> 4.0' diff --git a/Gemfile.lock b/Gemfile.lock index 18459a1593..2baa12038a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -152,8 +152,6 @@ GEM elasticsearch (>= 7.12.0, < 7.14.0) elasticsearch-dsl chunky_png (1.4.0) - cld3 (3.4.4) - ffi (>= 1.1.0, < 1.16.0) climate_control (0.2.0) coderay (1.1.3) color_diff (0.1) @@ -303,7 +301,6 @@ GEM terminal-table (>= 1.5.1) idn-ruby (0.1.4) ipaddress (0.8.3) - iso-639 (0.3.5) jmespath (1.5.0) json (2.5.1) json-canonicalization (0.3.0) @@ -701,7 +698,6 @@ DEPENDENCIES capybara (~> 3.36) charlock_holmes (~> 0.7.7) chewy (~> 7.2) - cld3 (~> 3.4.4) climate_control (~> 0.2) color_diff (~> 0.1) concurrent-ruby @@ -729,7 +725,6 @@ DEPENDENCIES httplog (~> 1.5.0) i18n-tasks (~> 0.9) idn-ruby - iso-639 json-ld json-ld-preloaded (~> 3.2) kaminari (~> 1.2) diff --git a/app/controllers/activitypub/replies_controller.rb b/app/controllers/activitypub/replies_controller.rb index fde6c861f2..4ff7cfa080 100644 --- a/app/controllers/activitypub/replies_controller.rb +++ b/app/controllers/activitypub/replies_controller.rb @@ -63,15 +63,29 @@ class ActivityPub::RepliesController < ActivityPub::BaseController end def next_page - only_other_accounts = !(@replies&.last&.account_id == @account.id && @replies.size == DESCENDANTS_LIMIT) + if only_other_accounts? + # Only consider remote accounts + return nil if @replies.size < DESCENDANTS_LIMIT - account_status_replies_url( - @account, - @status, - page: true, - min_id: only_other_accounts && !only_other_accounts? ? nil : @replies&.last&.id, - only_other_accounts: only_other_accounts - ) + account_status_replies_url( + @account, + @status, + page: true, + min_id: @replies&.last&.id, + only_other_accounts: true + ) + else + # For now, we're serving only self-replies, but next page might be other accounts + next_only_other_accounts = @replies&.last&.account_id != @account.id || @replies.size < DESCENDANTS_LIMIT + + account_status_replies_url( + @account, + @status, + page: true, + min_id: next_only_other_accounts ? nil : @replies&.last&.id, + only_other_accounts: next_only_other_accounts + ) + end end def page_params diff --git a/app/controllers/concerns/localized.rb b/app/controllers/concerns/localized.rb index fe1142f345..f7b62f09c1 100644 --- a/app/controllers/concerns/localized.rb +++ b/app/controllers/concerns/localized.rb @@ -7,27 +7,24 @@ module Localized around_action :set_locale end - def set_locale - locale = current_user.locale if respond_to?(:user_signed_in?) && user_signed_in? - locale ||= session[:locale] ||= default_locale - locale = default_locale unless I18n.available_locales.include?(locale.to_sym) - - I18n.with_locale(locale) do - yield - end + def set_locale(&block) + I18n.with_locale(requested_locale || I18n.default_locale, &block) end private - def default_locale - if ENV['DEFAULT_LOCALE'].present? - I18n.default_locale - else - request_locale || I18n.default_locale - end + def requested_locale + requested_locale_name = available_locale_or_nil(params[:locale]) + requested_locale_name ||= available_locale_or_nil(current_user.locale) if respond_to?(:user_signed_in?) && user_signed_in? + requested_locale_name ||= http_accept_language if ENV['DEFAULT_LOCALE'].blank? + requested_locale_name end - def request_locale - http_accept_language.language_region_compatible_from(I18n.available_locales) + def http_accept_language + HttpAcceptLanguage::Parser.new(request.headers.fetch('Accept-Language')).language_region_compatible_from(I18n.available_locales) if request.headers.key?('Accept-Language') + end + + def available_locale_or_nil(locale_name) + locale_name.to_sym if locale_name.present? && I18n.available_locales.include?(locale_name.to_sym) end end diff --git a/app/helpers/languages_helper.rb b/app/helpers/languages_helper.rb index 7307242086..f3ed7b3140 100644 --- a/app/helpers/languages_helper.rb +++ b/app/helpers/languages_helper.rb @@ -1,94 +1,237 @@ # frozen_string_literal: true module LanguagesHelper - HUMAN_LOCALES = { - af: 'Afrikaans', - ar: 'العربية', - ast: 'Asturianu', - bg: 'Български', - bn: 'বাংলা', - br: 'Breton', - ca: 'Català', - co: 'Corsu', - cs: 'Čeština', - cy: 'Cymraeg', - da: 'Dansk', - de: 'Deutsch', - el: 'Ελληνικά', - en: 'English', - eo: 'Esperanto', + ISO_639_1 = { + aa: ['Afar', 'Afaraf'].freeze, + ab: ['Abkhaz', 'аҧсуа бызшәа'].freeze, + ae: ['Avestan', 'avesta'].freeze, + af: ['Afrikaans', 'Afrikaans'].freeze, + ak: ['Akan', 'Akan'].freeze, + am: ['Amharic', 'አማርኛ'].freeze, + an: ['Aragonese', 'aragonés'].freeze, + ar: ['Arabic', 'اللغة العربية'].freeze, + as: ['Assamese', 'অসমীয়া'].freeze, + av: ['Avaric', 'авар мацӀ'].freeze, + ay: ['Aymara', 'aymar aru'].freeze, + az: ['Azerbaijani', 'azərbaycan dili'].freeze, + ba: ['Bashkir', 'башҡорт теле'].freeze, + be: ['Belarusian', 'беларуская мова'].freeze, + bg: ['Bulgarian', 'български език'].freeze, + bh: ['Bihari', 'भोजपुरी'].freeze, + bi: ['Bislama', 'Bislama'].freeze, + bm: ['Bambara', 'bamanankan'].freeze, + bn: ['Bengali', 'বাংলা'].freeze, + bo: ['Tibetan', 'བོད་ཡིག'].freeze, + br: ['Breton', 'brezhoneg'].freeze, + bs: ['Bosnian', 'bosanski jezik'].freeze, + ca: ['Catalan', 'Català'].freeze, + ce: ['Chechen', 'нохчийн мотт'].freeze, + ch: ['Chamorro', 'Chamoru'].freeze, + co: ['Corsican', 'corsu'].freeze, + cr: ['Cree', 'ᓀᐦᐃᔭᐍᐏᐣ'].freeze, + cs: ['Czech', 'čeština'].freeze, + cu: ['Old Church Slavonic', 'ѩзыкъ словѣньскъ'].freeze, + cv: ['Chuvash', 'чӑваш чӗлхи'].freeze, + cy: ['Welsh', 'Cymraeg'].freeze, + da: ['Danish', 'dansk'].freeze, + de: ['German', 'Deutsch'].freeze, + dv: ['Divehi', 'Dhivehi'].freeze, + dz: ['Dzongkha', 'རྫོང་ཁ'].freeze, + ee: ['Ewe', 'Eʋegbe'].freeze, + el: ['Greek', 'Ελληνικά'].freeze, + en: ['English', 'English'].freeze, + eo: ['Esperanto', 'Esperanto'].freeze, + es: ['Spanish', 'Español'].freeze, + et: ['Estonian', 'eesti'].freeze, + eu: ['Basque', 'euskara'].freeze, + fa: ['Persian', 'فارسی'].freeze, + ff: ['Fula', 'Fulfulde'].freeze, + fi: ['Finnish', 'suomi'].freeze, + fj: ['Fijian', 'Vakaviti'].freeze, + fo: ['Faroese', 'føroyskt'].freeze, + fr: ['French', 'Français'].freeze, + fy: ['Western Frisian', 'Frysk'].freeze, + ga: ['Irish', 'Gaeilge'].freeze, + gd: ['Scottish Gaelic', 'Gàidhlig'].freeze, + gl: ['Galician', 'galego'].freeze, + gu: ['Gujarati', 'ગુજરાતી'].freeze, + gv: ['Manx', 'Gaelg'].freeze, + ha: ['Hausa', 'هَوُسَ'].freeze, + he: ['Hebrew', 'עברית'].freeze, + hi: ['Hindi', 'हिन्दी'].freeze, + ho: ['Hiri Motu', 'Hiri Motu'].freeze, + hr: ['Croatian', 'Hrvatski'].freeze, + ht: ['Haitian', 'Kreyòl ayisyen'].freeze, + hu: ['Hungarian', 'magyar'].freeze, + hy: ['Armenian', 'Հայերեն'].freeze, + hz: ['Herero', 'Otjiherero'].freeze, + ia: ['Interlingua', 'Interlingua'].freeze, + id: ['Indonesian', 'Bahasa Indonesia'].freeze, + ie: ['Interlingue', 'Interlingue'].freeze, + ig: ['Igbo', 'Asụsụ Igbo'].freeze, + ii: ['Nuosu', 'ꆈꌠ꒿ Nuosuhxop'].freeze, + ik: ['Inupiaq', 'Iñupiaq'].freeze, + io: ['Ido', 'Ido'].freeze, + is: ['Icelandic', 'Íslenska'].freeze, + it: ['Italian', 'Italiano'].freeze, + iu: ['Inuktitut', 'ᐃᓄᒃᑎᑐᑦ'].freeze, + ja: ['Japanese', '日本語'].freeze, + jv: ['Javanese', 'basa Jawa'].freeze, + ka: ['Georgian', 'ქართული'].freeze, + kg: ['Kongo', 'Kikongo'].freeze, + ki: ['Kikuyu', 'Gĩkũyũ'].freeze, + kj: ['Kwanyama', 'Kuanyama'].freeze, + kk: ['Kazakh', 'қазақ тілі'].freeze, + kl: ['Kalaallisut', 'kalaallisut'].freeze, + km: ['Khmer', 'ខេមរភាសា'].freeze, + kn: ['Kannada', 'ಕನ್ನಡ'].freeze, + ko: ['Korean', '한국어'].freeze, + kr: ['Kanuri', 'Kanuri'].freeze, + ks: ['Kashmiri', 'कश्मीरी'].freeze, + ku: ['Kurdish', 'Kurdî'].freeze, + kv: ['Komi', 'коми кыв'].freeze, + kw: ['Cornish', 'Kernewek'].freeze, + ky: ['Kyrgyz', 'Кыргызча'].freeze, + la: ['Latin', 'latine'].freeze, + lb: ['Luxembourgish', 'Lëtzebuergesch'].freeze, + lg: ['Ganda', 'Luganda'].freeze, + li: ['Limburgish', 'Limburgs'].freeze, + ln: ['Lingala', 'Lingála'].freeze, + lo: ['Lao', 'ພາສາ'].freeze, + lt: ['Lithuanian', 'lietuvių kalba'].freeze, + lu: ['Luba-Katanga', 'Tshiluba'].freeze, + lv: ['Latvian', 'latviešu valoda'].freeze, + mg: ['Malagasy', 'fiteny malagasy'].freeze, + mh: ['Marshallese', 'Kajin M̧ajeļ'].freeze, + mi: ['Māori', 'te reo Māori'].freeze, + mk: ['Macedonian', 'македонски јазик'].freeze, + ml: ['Malayalam', 'മലയാളം'].freeze, + mn: ['Mongolian', 'Монгол хэл'].freeze, + mr: ['Marathi', 'मराठी'].freeze, + ms: ['Malay', 'Bahasa Malaysia'].freeze, + mt: ['Maltese', 'Malti'].freeze, + my: ['Burmese', 'ဗမာစာ'].freeze, + na: ['Nauru', 'Ekakairũ Naoero'].freeze, + nb: ['Norwegian Bokmål', 'Norsk bokmål'].freeze, + nd: ['Northern Ndebele', 'isiNdebele'].freeze, + ne: ['Nepali', 'नेपाली'].freeze, + ng: ['Ndonga', 'Owambo'].freeze, + nl: ['Dutch', 'Nederlands'].freeze, + nn: ['Norwegian Nynorsk', 'Norsk nynorsk'].freeze, + no: ['Norwegian', 'Norsk'].freeze, + nr: ['Southern Ndebele', 'isiNdebele'].freeze, + nv: ['Navajo', 'Diné bizaad'].freeze, + ny: ['Chichewa', 'chiCheŵa'].freeze, + oc: ['Occitan', 'occitan'].freeze, + oj: ['Ojibwe', 'ᐊᓂᔑᓈᐯᒧᐎᓐ'].freeze, + om: ['Oromo', 'Afaan Oromoo'].freeze, + or: ['Oriya', 'ଓଡ଼ିଆ'].freeze, + os: ['Ossetian', 'ирон æвзаг'].freeze, + pa: ['Panjabi', 'ਪੰਜਾਬੀ'].freeze, + pi: ['Pāli', 'पाऴि'].freeze, + pl: ['Polish', 'Polski'].freeze, + ps: ['Pashto', 'پښتو'].freeze, + pt: ['Portuguese', 'Português'].freeze, + qu: ['Quechua', 'Runa Simi'].freeze, + rm: ['Romansh', 'rumantsch grischun'].freeze, + rn: ['Kirundi', 'Ikirundi'].freeze, + ro: ['Romanian', 'Română'].freeze, + ru: ['Russian', 'Русский'].freeze, + rw: ['Kinyarwanda', 'Ikinyarwanda'].freeze, + sa: ['Sanskrit', 'संस्कृतम्'].freeze, + sc: ['Sardinian', 'sardu'].freeze, + sd: ['Sindhi', 'सिन्धी'].freeze, + se: ['Northern Sami', 'Davvisámegiella'].freeze, + sg: ['Sango', 'yângâ tî sängö'].freeze, + si: ['Sinhala', 'සිංහල'].freeze, + sk: ['Slovak', 'slovenčina'].freeze, + sl: ['Slovenian', 'slovenščina'].freeze, + sn: ['Shona', 'chiShona'].freeze, + so: ['Somali', 'Soomaaliga'].freeze, + sq: ['Albanian', 'Shqip'].freeze, + sr: ['Serbian', 'српски језик'].freeze, + ss: ['Swati', 'SiSwati'].freeze, + st: ['Southern Sotho', 'Sesotho'].freeze, + su: ['Sundanese', 'Basa Sunda'].freeze, + sv: ['Swedish', 'Svenska'].freeze, + sw: ['Swahili', 'Kiswahili'].freeze, + ta: ['Tamil', 'தமிழ்'].freeze, + te: ['Telugu', 'తెలుగు'].freeze, + tg: ['Tajik', 'тоҷикӣ'].freeze, + th: ['Thai', 'ไทย'].freeze, + ti: ['Tigrinya', 'ትግርኛ'].freeze, + tk: ['Turkmen', 'Türkmen'].freeze, + tl: ['Tagalog', 'Wikang Tagalog'].freeze, + tn: ['Tswana', 'Setswana'].freeze, + to: ['Tonga', 'faka Tonga'].freeze, + tr: ['Turkish', 'Türkçe'].freeze, + ts: ['Tsonga', 'Xitsonga'].freeze, + tt: ['Tatar', 'татар теле'].freeze, + tw: ['Twi', 'Twi'].freeze, + ty: ['Tahitian', 'Reo Tahiti'].freeze, + ug: ['Uyghur', 'ئۇيغۇرچە'].freeze, + uk: ['Ukrainian', 'Українська'].freeze, + ur: ['Urdu', 'اردو'].freeze, + uz: ['Uzbek', 'Ўзбек'].freeze, + ve: ['Venda', 'Tshivenḓa'].freeze, + vi: ['Vietnamese', 'Tiếng Việt'].freeze, + vo: ['Volapük', 'Volapük'].freeze, + wa: ['Walloon', 'walon'].freeze, + wo: ['Wolof', 'Wollof'].freeze, + xh: ['Xhosa', 'isiXhosa'].freeze, + yi: ['Yiddish', 'ייִדיש'].freeze, + yo: ['Yoruba', 'Yorùbá'].freeze, + za: ['Zhuang', 'Saɯ cueŋƅ'].freeze, + zh: ['Chinese', '中文'].freeze, + zu: ['Zulu', 'isiZulu'].freeze, + }.freeze + + ISO_639_3 = { + ast: ['Asturian', 'Asturianu'].freeze, + kab: ['Kabyle', 'Taqbaylit'].freeze, + kmr: ['Northern Kurdish', 'Kurmancî'].freeze, + zgh: ['Standard Moroccan Tamazight', 'ⵜⴰⵎⴰⵣⵉⵖⵜ'].freeze, + }.freeze + + SUPPORTED_LOCALES = {}.merge(ISO_639_1).merge(ISO_639_3).freeze + + # For ISO-639-1 and ISO-639-3 language codes, we have their official + # names, but for some translations, we need the names of the + # regional variants specifically + REGIONAL_LOCALE_NAMES = { 'es-AR': 'Español (Argentina)', 'es-MX': 'Español (México)', - es: 'Español', - et: 'Eesti', - eu: 'Euskara', - fa: 'فارسی', - fi: 'Suomi', - fr: 'Français', - ga: 'Gaeilge', - gd: 'Gàidhlig', - gl: 'Galego', - he: 'עברית', - hi: 'हिन्दी', - hr: 'Hrvatski', - hu: 'Magyar', - hy: 'Հայերեն', - id: 'Bahasa Indonesia', - io: 'Ido', - is: 'Íslenska', - it: 'Italiano', - ja: '日本語', - ka: 'ქართული', - kab: 'Taqbaylit', - kk: 'Қазақша', - kmr: 'Kurmancî', - kn: 'ಕನ್ನಡ', - ko: '한국어', - ku: 'سۆرانی', - lt: 'Lietuvių', - lv: 'Latviešu', - mk: 'Македонски', - ml: 'മലയാളം', - mr: 'मराठी', - ms: 'Bahasa Melayu', - nl: 'Nederlands', - nn: 'Nynorsk', - no: 'Norsk', - oc: 'Occitan', - pl: 'Polski', 'pt-BR': 'Português (Brasil)', 'pt-PT': 'Português (Portugal)', - pt: 'Português', - ro: 'Română', - ru: 'Русский', - sa: 'संस्कृतम्', - sc: 'Sardu', - si: 'සිංහල', - sk: 'Slovenčina', - sl: 'Slovenščina', - sq: 'Shqip', 'sr-Latn': 'Srpski (latinica)', - sr: 'Српски', - sv: 'Svenska', - ta: 'தமிழ்', - te: 'తెలుగు', - th: 'ไทย', - tr: 'Türkçe', - uk: 'Українська', - ur: 'اُردُو', - vi: 'Tiếng Việt', - zgh: 'ⵜⴰⵎⴰⵣⵉⵖⵜ', 'zh-CN': '简体中文', 'zh-HK': '繁體中文(香港)', 'zh-TW': '繁體中文(臺灣)', - zh: '中文', }.freeze def human_locale(locale) if locale == 'und' I18n.t('generic.none') + elsif (supported_locale = SUPPORTED_LOCALES[locale.to_sym]) + supported_locale[1] + elsif (regional_locale = REGIONAL_LOCALE_NAMES[locale.to_sym]) + regional_locale else - HUMAN_LOCALES[locale.to_sym] || locale + locale end end + + def valid_locale_or_nil(str) + return if str.blank? + + code, = str.to_s.split(/[_-]/) # Strip out the region from e.g. en_US or ja-JP + + return unless valid_locale?(code) + + code + end + + def valid_locale?(locale) + SUPPORTED_LOCALES.key?(locale.to_sym) + end end diff --git a/app/helpers/settings_helper.rb b/app/helpers/settings_helper.rb index 23739d1cd4..3d5592867c 100644 --- a/app/helpers/settings_helper.rb +++ b/app/helpers/settings_helper.rb @@ -2,7 +2,7 @@ module SettingsHelper def filterable_languages - LanguageDetector.instance.language_names.select(&LanguagesHelper::HUMAN_LOCALES.method(:key?)) + LanguagesHelper::SUPPORTED_LOCALES.keys end def hash_to_object(hash) diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index ad273c20bb..cf31b6ff62 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -112,7 +112,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity url: @status_parser.url || @status_parser.uri, account: @account, text: converted_object_type? ? converted_text : (@status_parser.text || ''), - language: @status_parser.language || detected_language, + language: @status_parser.language, spoiler_text: converted_object_type? ? '' : (@status_parser.spoiler_text || ''), created_at: @status_parser.created_at, edited_at: @status_parser.edited_at, @@ -370,10 +370,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n")) end - def detected_language - LanguageDetector.instance.detect(@status_parser.text, @account) if supported_object_type? - end - def unsupported_media_type?(mime_type) mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type) end diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb deleted file mode 100644 index 40452eddc9..0000000000 --- a/app/lib/language_detector.rb +++ /dev/null @@ -1,101 +0,0 @@ -# frozen_string_literal: true - -class LanguageDetector - include Singleton - - WORDS_THRESHOLD = 4 - RELIABLE_CHARACTERS_RE = /[\p{Hebrew}\p{Arabic}\p{Syriac}\p{Thaana}\p{Nko}\p{Han}\p{Katakana}\p{Hiragana}\p{Hangul}\p{Thai}]+/m - - def initialize - @identifier = CLD3::NNetLanguageIdentifier.new(1, 2048) - end - - def detect(text, account) - input_text = prepare_text(text) - - return if input_text.blank? - - detect_language_code(input_text) || default_locale(account) - end - - def language_names - @language_names = CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }.uniq - end - - private - - def prepare_text(text) - simplify_text(text).strip - end - - def unreliable_input?(text) - !reliable_input?(text) - end - - def reliable_input?(text) - sufficient_text_length?(text) || language_specific_character_set?(text) - end - - def sufficient_text_length?(text) - text.split(/\s+/).size >= WORDS_THRESHOLD - end - - def language_specific_character_set?(text) - words = text.scan(RELIABLE_CHARACTERS_RE) - - if words.present? - words.reduce(0) { |acc, elem| acc + elem.size }.to_f / text.size > 0.3 - else - false - end - end - - def detect_language_code(text) - return if unreliable_input?(text) - - result = @identifier.find_language(text) - - iso6391(result.language.to_s).to_sym if result&.reliable? - end - - def iso6391(bcp47) - iso639 = bcp47.split('-').first - - # CLD3 returns grandfathered language code for Hebrew - return 'he' if iso639 == 'iw' - - ISO_639.find(iso639).alpha2 - end - - def simplify_text(text) - new_text = remove_html(text) - new_text.gsub!(FetchLinkCardService::URL_PATTERN, '\1') - new_text.gsub!(Account::MENTION_RE, '') - new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase } - new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '') - new_text.gsub!(/\s+/, ' ') - new_text - end - - def new_scrubber - scrubber = Rails::Html::PermitScrubber.new - scrubber.tags = %w(br p) - scrubber - end - - def scrubber - @scrubber ||= new_scrubber - end - - def remove_html(text) - text = Loofah.fragment(text).scrub!(scrubber).to_s - text.gsub!('<br>', "\n") - text.gsub!('</p><p>', "\n\n") - text.gsub!(/(^<p>|<\/p>$)/, '') - text - end - - def default_locale(account) - account.user_locale&.to_sym || I18n.default_locale if account.local? - end -end diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index 56ad0717b7..fabbd244df 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -2,6 +2,20 @@ class LinkDetailsExtractor include ActionView::Helpers::TagHelper + include LanguagesHelper + + # Some publications wrap their JSON-LD data in their <script> tags + # in commented-out CDATA blocks, they need to be removed before + # attempting to parse JSON + CDATA_JUNK_PATTERN = %r{^[\s]*( + (/\*[\s]*<!\[CDATA\[[\s]*\*/) # Block comment style opening + | + (//[\s]*<!\[CDATA\[) # Single-line comment style opening + | + (/\*[\s]*\]\]>[\s]*\*/) # Block comment style closing + | + (//[\s]*\]\]>) # Single-line comment style closing + )[\s]*$}x class StructuredData SUPPORTED_TYPES = %w( @@ -61,6 +75,10 @@ class LinkDetailsExtractor publisher.dig('logo', 'url') end + def valid? + json.present? + end + private def author @@ -134,11 +152,11 @@ class LinkDetailsExtractor end def title - structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first + html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first) end def description - structured_data&.description || opengraph_tag('og:description') || meta_tag('description') + html_entities.decode(structured_data&.description || opengraph_tag('og:description') || meta_tag('description')) end def image @@ -146,11 +164,11 @@ class LinkDetailsExtractor end def canonical_url - valid_url_or_nil(opengraph_tag('og:url') || link_tag('canonical'), same_origin_only: true) || @original_url.to_s + valid_url_or_nil(link_tag('canonical') || opengraph_tag('og:url'), same_origin_only: true) || @original_url.to_s end def provider_name - structured_data&.publisher_name || opengraph_tag('og:site_name') + html_entities.decode(structured_data&.publisher_name || opengraph_tag('og:site_name')) end def provider_url @@ -158,7 +176,7 @@ class LinkDetailsExtractor end def author_name - structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username') + html_entities.decode(structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username')) end def author_url @@ -201,14 +219,6 @@ class LinkDetailsExtractor nil end - def valid_locale_or_nil(str) - return nil if str.blank? - - code, = str.split(/_-/) # Strip out the region from e.g. en_US or ja-JA - locale = ISO_639.find(code) - locale&.alpha2 - end - def link_tag(name) document.xpath("//link[@rel=\"#{name}\"]").map { |link| link['href'] }.first end @@ -223,10 +233,24 @@ class LinkDetailsExtractor def structured_data @structured_data ||= begin - json_ld = document.xpath('//script[@type="application/ld+json"]').map(&:content).first - json_ld.present? ? StructuredData.new(json_ld) : nil - rescue Oj::ParseError - nil + # Some publications have more than one JSON-LD definition on the page, + # and some of those definitions aren't valid JSON either, so we have + # to loop through here until we find something that is the right type + # and doesn't break + document.xpath('//script[@type="application/ld+json"]').filter_map do |element| + json_ld = element.content&.gsub(CDATA_JUNK_PATTERN, '') + + next if json_ld.blank? + + structured_data = StructuredData.new(html_entities.decode(json_ld)) + + next unless structured_data.valid? + + structured_data + rescue Oj::ParseError, EncodingError + Rails.logger.debug("Invalid JSON-LD in #{@original_url}") + next + end.first end end @@ -246,4 +270,8 @@ class LinkDetailsExtractor detector.strip_tags = true end end + + def html_entities + @html_entities ||= HTMLEntities.new + end end diff --git a/app/models/account_suggestions/global_source.rb b/app/models/account_suggestions/global_source.rb index ac764de50f..03ed1b6c2a 100644 --- a/app/models/account_suggestions/global_source.rb +++ b/app/models/account_suggestions/global_source.rb @@ -6,7 +6,7 @@ class AccountSuggestions::GlobalSource < AccountSuggestions::Source end def get(account, skip_account_ids: [], limit: 40) - account_ids = account_ids_for_locale(account.user_locale) - [account.id] - skip_account_ids + account_ids = account_ids_for_locale(I18n.locale.to_str.split(/[_-]/).first) - [account.id] - skip_account_ids as_ordered_suggestions( scope(account).where(id: account_ids), diff --git a/app/models/user.rb b/app/models/user.rb index 9afdc481d7..ee20e293e8 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -245,6 +245,10 @@ class User < ApplicationRecord save! end + def preferred_posting_language + settings.default_language || locale + end + def setting_default_privacy settings.default_privacy || (account.locked? ? 'private' : 'public') end diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb index 9779281272..b1cea1cdfc 100644 --- a/app/services/activitypub/process_status_update_service.rb +++ b/app/services/activitypub/process_status_update_service.rb @@ -120,7 +120,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService @status.text = @status_parser.text || '' @status.spoiler_text = @status_parser.spoiler_text || '' @status.sensitive = @account.sensitized? || @status_parser.sensitive || false - @status.language = @status_parser.language || detected_language + @status.language = @status_parser.language @status.edited_at = @status_parser.edited_at || Time.now.utc if significant_changes? @status.save! @@ -210,10 +210,6 @@ class ActivityPub::ProcessStatusUpdateService < BaseService { redis: Redis.current, key: "create:#{@uri}", autorelease: 15.minutes.seconds } end - def detected_language - LanguageDetector.instance.detect(@status_parser.text, @account) - end - def create_previous_edit! # We only need to create a previous edit when no previous edits exist, e.g. # when the status has never been edited. For other cases, we always create diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb index 9d26e0f5bf..c5061dd635 100644 --- a/app/services/post_status_service.rb +++ b/app/services/post_status_service.rb @@ -2,6 +2,7 @@ class PostStatusService < BaseService include Redisable + include LanguagesHelper MIN_SCHEDULE_OFFSET = 5.minutes.freeze @@ -118,10 +119,6 @@ class PostStatusService < BaseService raise Mastodon::ValidationError, I18n.t('media_attachments.validations.not_ready') if @media.any?(&:not_processed?) end - def language_from_option(str) - ISO_639.find(str)&.alpha2 - end - def process_mentions_service ProcessMentionsService.new end @@ -174,7 +171,7 @@ class PostStatusService < BaseService sensitive: @sensitive, spoiler_text: @options[:spoiler_text] || '', visibility: @visibility, - language: language_from_option(@options[:language]) || @account.user&.setting_default_language&.presence || LanguageDetector.instance.detect(@text, @account), + language: valid_locale_or_nil(@options[:language].presence || @account.user&.preferred_posting_language || I18n.default_locale), application: @options[:application], content_type: @options[:content_type] || @account.user&.setting_default_content_type, rate_limit: @options[:with_rate_limit], diff --git a/app/validators/import_validator.rb b/app/validators/import_validator.rb index a182abfa50..9f19aee2ae 100644 --- a/app/validators/import_validator.rb +++ b/app/validators/import_validator.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'csv' + class ImportValidator < ActiveModel::Validator KNOWN_HEADERS = [ 'Account address', diff --git a/app/views/admin/follow_recommendations/show.html.haml b/app/views/admin/follow_recommendations/show.html.haml index 2878f07d76..d1c160bd28 100644 --- a/app/views/admin/follow_recommendations/show.html.haml +++ b/app/views/admin/follow_recommendations/show.html.haml @@ -10,7 +10,7 @@ .filter-subset.filter-subset--with-select %strong= t('admin.follow_recommendations.language') .input.select.optional - = select_tag :language, options_for_select(I18n.available_locales.map { |key| [human_locale(key), key]}, @language) + = select_tag :language, options_for_select(I18n.available_locales.map { |key| key.to_s.split(/[_-]/).first.to_sym }.uniq.map { |key| [human_locale(key), key]}, @language) .filter-subset %strong= t('admin.follow_recommendations.status') diff --git a/app/views/settings/preferences/other/show.html.haml b/app/views/settings/preferences/other/show.html.haml index 3b5c7016d8..76ff2bcbcf 100644 --- a/app/views/settings/preferences/other/show.html.haml +++ b/app/views/settings/preferences/other/show.html.haml @@ -27,7 +27,7 @@ = f.input :setting_default_privacy, collection: Status.selectable_visibilities, wrapper: :with_label, include_blank: false, label_method: lambda { |visibility| safe_join([I18n.t("statuses.visibilities.#{visibility}"), I18n.t("statuses.visibilities.#{visibility}_long")], ' - ') }, required: false, hint: false .fields-group.fields-row__column.fields-row__column-6 - = f.input :setting_default_language, collection: [nil] + filterable_languages.sort, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.language_detection') : human_locale(locale) }, required: false, include_blank: false, hint: false + = f.input :setting_default_language, collection: [nil] + filterable_languages, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.default_language') : human_locale(locale) }, required: false, include_blank: false, hint: false .fields-group = f.input :setting_default_sensitive, as: :boolean, wrapper: :with_label @@ -41,7 +41,7 @@ %h4= t 'preferences.public_timelines' .fields-group - = f.input :chosen_languages, collection: filterable_languages.sort, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li' + = f.input :chosen_languages, collection: filterable_languages, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li' .actions = f.button :button, t('generic.save_changes'), type: :submit diff --git a/app/workers/activitypub/processing_worker.rb b/app/workers/activitypub/processing_worker.rb index cef5953194..37e316354b 100644 --- a/app/workers/activitypub/processing_worker.rb +++ b/app/workers/activitypub/processing_worker.rb @@ -6,7 +6,10 @@ class ActivityPub::ProcessingWorker sidekiq_options backtrace: true, retry: 8 def perform(account_id, body, delivered_to_account_id = nil) - ActivityPub::ProcessCollectionService.new.call(body, Account.find(account_id), override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true) + account = Account.find_by(id: account_id) + return if account.nil? + + ActivityPub::ProcessCollectionService.new.call(body, account, override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true) rescue ActiveRecord::RecordInvalid => e Rails.logger.debug "Error processing incoming ActivityPub object: #{e}" end diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb index f42d4bca6a..7195f0ff97 100644 --- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb +++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb @@ -66,7 +66,7 @@ class Scheduler::AccountsStatusesCleanupScheduler end def compute_budget - threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum + threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum [PER_THREAD_BUDGET * threads, MAX_BUDGET].min end diff --git a/app/workers/scheduler/follow_recommendations_scheduler.rb b/app/workers/scheduler/follow_recommendations_scheduler.rb index effc63e598..084619cbd9 100644 --- a/app/workers/scheduler/follow_recommendations_scheduler.rb +++ b/app/workers/scheduler/follow_recommendations_scheduler.rb @@ -16,28 +16,33 @@ class Scheduler::FollowRecommendationsScheduler AccountSummary.refresh FollowRecommendation.refresh - fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE).index_by(&:account_id) + fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE) - I18n.available_locales.each do |locale| + I18n.available_locales.map { |locale| locale.to_s.split(/[_-]/).first }.uniq.each do |locale| recommendations = begin if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist - FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).index_by(&:account_id) + FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).map { |recommendation| [recommendation.account_id, recommendation.rank] } else - {} + [] end end # Use language-agnostic results if there are not enough language-specific ones - missing = SET_SIZE - recommendations.keys.size + missing = SET_SIZE - recommendations.size + + if missing.positive? && fallback_recommendations.size.positive? + max_fallback_rank = fallback_recommendations.first.rank || 0 + + # Language-specific results should be above language-agnostic ones, + # otherwise language-agnostic ones will always overshadow them + recommendations.map! { |(account_id, rank)| [account_id, rank + max_fallback_rank] } - if missing.positive? added = 0 - # Avoid duplicate results - fallback_recommendations.each_value do |recommendation| - next if recommendations.key?(recommendation.account_id) + fallback_recommendations.each do |recommendation| + next if recommendations.any? { |(account_id, _)| account_id == recommendation.account_id } - recommendations[recommendation.account_id] = recommendation + recommendations << [recommendation.account_id, recommendation.rank] added += 1 break if added >= missing @@ -47,8 +52,8 @@ class Scheduler::FollowRecommendationsScheduler redis.pipelined do redis.del(key(locale)) - recommendations.each_value do |recommendation| - redis.zadd(key(locale), recommendation.rank, recommendation.account_id) + recommendations.each do |(account_id, rank)| + redis.zadd(key(locale), rank, account_id) end end end diff --git a/config/application.rb b/config/application.rb index 5617228849..c6f7751622 100644 --- a/config/application.rb +++ b/config/application.rb @@ -149,10 +149,14 @@ module Mastodon :'zh-TW', ] - config.i18n.default_locale = ENV['DEFAULT_LOCALE']&.to_sym + config.i18n.default_locale = begin + custom_default_locale = ENV['DEFAULT_LOCALE']&.to_sym - unless config.i18n.available_locales.include?(config.i18n.default_locale) - config.i18n.default_locale = :en + if config.i18n.available_locales.include?(custom_default_locale) + custom_default_locale + else + :en + end end # config.paths.add File.join('app', 'api'), glob: File.join('**', '*.rb') @@ -169,7 +173,6 @@ module Mastodon Doorkeeper::Application.send :include, ApplicationExtension Doorkeeper::AccessToken.send :include, AccessTokenExtension Devise::FailureApp.send :include, AbstractController::Callbacks - Devise::FailureApp.send :include, HttpAcceptLanguage::EasyAccess Devise::FailureApp.send :include, Localized end end diff --git a/config/locales/en.yml b/config/locales/en.yml index 85aa87c7a6..600090e784 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -1307,13 +1307,13 @@ en: other: "%{count} videos" boosted_from_html: Boosted from %{acct_link} content_warning: 'Content warning: %{warning}' + default_language: Same as interface language disallowed_hashtags: one: 'contained a disallowed hashtag: %{tags}' other: 'contained the disallowed hashtags: %{tags}' edited_at: Edited %{date} errors: in_reply_not_found: The post you are trying to reply to does not appear to exist. - language_detection: Automatically detect language open_in_web: Open in web over_character_limit: character limit of %{max} exceeded pin_errors: diff --git a/lib/tasks/repo.rake b/lib/tasks/repo.rake index bbf7f20ee7..795b54c59c 100644 --- a/lib/tasks/repo.rake +++ b/lib/tasks/repo.rake @@ -96,7 +96,8 @@ namespace :repo do end.uniq.compact missing_available_locales = locales_in_files - I18n.available_locales - missing_locale_names = I18n.available_locales.reject { |locale| LanguagesHelper::HUMAN_LOCALES.key?(locale) } + supported_locale_codes = Set.new(LanguagesHelper::SUPPORTED_LOCALES.keys + LanguagesHelper::REGIONAL_LOCALE_NAMES.keys) + missing_locale_names = I18n.available_locales.reject { |locale| supported_locale_codes.include?(locale) } critical = false @@ -123,7 +124,7 @@ namespace :repo do unless missing_locale_names.empty? puts pastel.yellow("You are missing human-readable names for these locales: #{pastel.bold(missing_locale_names.join(', '))}") - puts pastel.yellow("Add them to #{pastel.bold('HUMAN_LOCALES')} in app/helpers/settings_helper.rb or remove the locales from #{pastel.bold('I18n.available_locales')} in config/application.rb") + puts pastel.yellow("Add them to app/helpers/languages_helper.rb or remove the locales from #{pastel.bold('I18n.available_locales')} in config/application.rb") end if critical diff --git a/spec/controllers/activitypub/replies_controller_spec.rb b/spec/controllers/activitypub/replies_controller_spec.rb index bf82fd0207..a2c7f336ff 100644 --- a/spec/controllers/activitypub/replies_controller_spec.rb +++ b/spec/controllers/activitypub/replies_controller_spec.rb @@ -4,8 +4,9 @@ require 'rails_helper' RSpec.describe ActivityPub::RepliesController, type: :controller do let(:status) { Fabricate(:status, visibility: parent_visibility) } - let(:remote_reply_id) { nil } - let(:remote_account) { nil } + let(:remote_account) { Fabricate(:account, domain: 'foobar.com') } + let(:remote_reply_id) { 'https://foobar.com/statuses/1234' } + let(:remote_querier) { nil } shared_examples 'cachable response' do it 'does not set cookies' do @@ -23,8 +24,151 @@ RSpec.describe ActivityPub::RepliesController, type: :controller do end end + shared_examples 'common behavior' do + context 'when status is private' do + let(:parent_visibility) { :private } + + it 'returns http not found' do + expect(response).to have_http_status(404) + end + end + + context 'when status is direct' do + let(:parent_visibility) { :direct } + + it 'returns http not found' do + expect(response).to have_http_status(404) + end + end + end + + shared_examples 'disallowed access' do + context 'when status is public' do + let(:parent_visibility) { :public } + + it 'returns http not found' do + expect(response).to have_http_status(404) + end + end + + it_behaves_like 'common behavior' + end + + shared_examples 'allowed access' do + context 'when account is permanently suspended' do + let(:parent_visibility) { :public } + + before do + status.account.suspend! + status.account.deletion_request.destroy + end + + it 'returns http gone' do + expect(response).to have_http_status(410) + end + end + + context 'when account is temporarily suspended' do + let(:parent_visibility) { :public } + + before do + status.account.suspend! + end + + it 'returns http forbidden' do + expect(response).to have_http_status(403) + end + end + + context 'when status is public' do + let(:parent_visibility) { :public } + let(:json) { body_as_json } + let(:page_json) { json[:first] } + + it 'returns http success' do + expect(response).to have_http_status(200) + end + + it 'returns application/activity+json' do + expect(response.media_type).to eq 'application/activity+json' + end + + it_behaves_like 'cachable response' + + context 'without only_other_accounts' do + it "returns items with thread author's replies" do + expect(page_json).to be_a Hash + expect(page_json[:items]).to be_an Array + expect(page_json[:items].size).to eq 1 + expect(page_json[:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true + end + + context 'when there are few self-replies' do + it 'points next to replies from other people' do + expect(page_json).to be_a Hash + expect(Addressable::URI.parse(page_json[:next]).query.split('&')).to include('only_other_accounts=true', 'page=true') + end + end + + context 'when there are many self-replies' do + before do + 10.times { Fabricate(:status, account: status.account, thread: status, visibility: :public) } + end + + it 'points next to other self-replies' do + expect(page_json).to be_a Hash + expect(Addressable::URI.parse(page_json[:next]).query.split('&')).to include('only_other_accounts=false', 'page=true') + end + end + end + + context 'with only_other_accounts' do + let(:only_other_accounts) { 'true' } + + it 'returns items with other public or unlisted replies' do + expect(page_json).to be_a Hash + expect(page_json[:items]).to be_an Array + expect(page_json[:items].size).to eq 3 + end + + it 'only inlines items that are local and public or unlisted replies' do + inlined_replies = page_json[:items].select { |x| x.is_a?(Hash) } + public_collection = ActivityPub::TagManager::COLLECTIONS[:public] + expect(inlined_replies.all? { |item| item[:to].include?(public_collection) || item[:cc].include?(public_collection) }).to be true + expect(inlined_replies.all? { |item| ActivityPub::TagManager.instance.local_uri?(item[:id]) }).to be true + end + + it 'uses ids for remote toots' do + remote_replies = page_json[:items].select { |x| !x.is_a?(Hash) } + expect(remote_replies.all? { |item| item.is_a?(String) && !ActivityPub::TagManager.instance.local_uri?(item) }).to be true + end + + context 'when there are few replies' do + it 'does not have a next page' do + expect(page_json).to be_a Hash + expect(page_json[:next]).to be_nil + end + end + + context 'when there are many replies' do + before do + 10.times { Fabricate(:status, thread: status, visibility: :public) } + end + + it 'points next to other replies' do + expect(page_json).to be_a Hash + expect(Addressable::URI.parse(page_json[:next]).query.split('&')).to include('only_other_accounts=true', 'page=true') + end + end + end + end + + it_behaves_like 'common behavior' + end + before do - allow(controller).to receive(:signed_request_account).and_return(remote_account) + stub_const 'ActivityPub::RepliesController::DESCENDANTS_LIMIT', 5 + allow(controller).to receive(:signed_request_account).and_return(remote_querier) Fabricate(:status, thread: status, visibility: :public) Fabricate(:status, thread: status, visibility: :public) @@ -32,215 +176,36 @@ RSpec.describe ActivityPub::RepliesController, type: :controller do Fabricate(:status, account: status.account, thread: status, visibility: :public) Fabricate(:status, account: status.account, thread: status, visibility: :private) - Fabricate(:status, account: remote_account, thread: status, visibility: :public, uri: remote_reply_id) if remote_reply_id + Fabricate(:status, account: remote_account, thread: status, visibility: :public, uri: remote_reply_id) end describe 'GET #index' do + subject(:response) { get :index, params: { account_username: status.account.username, status_id: status.id, only_other_accounts: only_other_accounts } } + let(:only_other_accounts) { nil } + context 'with no signature' do - subject(:response) { get :index, params: { account_username: status.account.username, status_id: status.id } } - subject(:body) { body_as_json } - - context 'when account is permanently suspended' do - let(:parent_visibility) { :public } - - before do - status.account.suspend! - status.account.deletion_request.destroy - end - - it 'returns http gone' do - expect(response).to have_http_status(410) - end - end - - context 'when account is temporarily suspended' do - let(:parent_visibility) { :public } - - before do - status.account.suspend! - end - - it 'returns http forbidden' do - expect(response).to have_http_status(403) - end - end - - context 'when status is public' do - let(:parent_visibility) { :public } - - it 'returns http success' do - expect(response).to have_http_status(200) - end - - it 'returns application/activity+json' do - expect(response.media_type).to eq 'application/activity+json' - end - - it_behaves_like 'cachable response' - - it 'returns items with account\'s own replies' do - expect(body[:first]).to be_a Hash - expect(body[:first][:items]).to be_an Array - expect(body[:first][:items].size).to eq 1 - expect(body[:first][:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true - end - end - - context 'when status is private' do - let(:parent_visibility) { :private } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - - context 'when status is direct' do - let(:parent_visibility) { :direct } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end + it_behaves_like 'allowed access' end context 'with signature' do - let(:remote_account) { Fabricate(:account, domain: 'example.com') } - let(:only_other_accounts) { nil } + let(:remote_querier) { Fabricate(:account, domain: 'example.com') } - context do - before do - get :index, params: { account_username: status.account.username, status_id: status.id, only_other_accounts: only_other_accounts } - end - - context 'when status is public' do - let(:parent_visibility) { :public } - - it 'returns http success' do - expect(response).to have_http_status(200) - end - - it 'returns application/activity+json' do - expect(response.media_type).to eq 'application/activity+json' - end - - it_behaves_like 'cachable response' - - context 'without only_other_accounts' do - it 'returns items with account\'s own replies' do - json = body_as_json - - expect(json[:first]).to be_a Hash - expect(json[:first][:items]).to be_an Array - expect(json[:first][:items].size).to eq 1 - expect(json[:first][:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true - end - end - - context 'with only_other_accounts' do - let(:only_other_accounts) { 'true' } - - it 'returns items with other public or unlisted replies' do - json = body_as_json - - expect(json[:first]).to be_a Hash - expect(json[:first][:items]).to be_an Array - expect(json[:first][:items].size).to eq 2 - expect(json[:first][:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true - end - - context 'with remote responses' do - let(:remote_reply_id) { 'foo' } - - it 'returned items are all inlined local toots or are ids' do - json = body_as_json - - expect(json[:first]).to be_a Hash - expect(json[:first][:items]).to be_an Array - expect(json[:first][:items].size).to eq 3 - expect(json[:first][:items].all? { |item| item.is_a?(Hash) ? ActivityPub::TagManager.instance.local_uri?(item[:id]) : item.is_a?(String) }).to be true - expect(json[:first][:items]).to include remote_reply_id - end - end - end - end - - context 'when status is private' do - let(:parent_visibility) { :private } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - - context 'when status is direct' do - let(:parent_visibility) { :direct } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - end + it_behaves_like 'allowed access' context 'when signed request account is blocked' do before do - status.account.block!(remote_account) - get :index, params: { account_username: status.account.username, status_id: status.id } + status.account.block!(remote_querier) end - context 'when status is public' do - let(:parent_visibility) { :public } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - - context 'when status is private' do - let(:parent_visibility) { :private } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - - context 'when status is direct' do - let(:parent_visibility) { :direct } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end + it_behaves_like 'disallowed access' end context 'when signed request account is domain blocked' do before do - status.account.block_domain!(remote_account.domain) - get :index, params: { account_username: status.account.username, status_id: status.id } + status.account.block_domain!(remote_querier.domain) end - context 'when status is public' do - let(:parent_visibility) { :public } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - - context 'when status is private' do - let(:parent_visibility) { :private } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end - - context 'when status is direct' do - let(:parent_visibility) { :direct } - - it 'returns http not found' do - expect(response).to have_http_status(404) - end - end + it_behaves_like 'disallowed access' end end end diff --git a/spec/helpers/languages_helper_spec.rb b/spec/helpers/languages_helper_spec.rb index 6db617824d..b455cee33d 100644 --- a/spec/helpers/languages_helper_spec.rb +++ b/spec/helpers/languages_helper_spec.rb @@ -3,9 +3,9 @@ require 'rails_helper' describe LanguagesHelper do - describe 'the HUMAN_LOCALES constant' do - it 'includes all I18n locales' do - expect(described_class::HUMAN_LOCALES.keys).to include(*I18n.available_locales) + describe 'the SUPPORTED_LOCALES constant' do + it 'includes all i18n locales' do + expect(Set.new(described_class::SUPPORTED_LOCALES.keys + described_class::REGIONAL_LOCALE_NAMES.keys)).to include(*I18n.available_locales) end end diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb deleted file mode 100644 index b7ba0f6c4f..0000000000 --- a/spec/lib/language_detector_spec.rb +++ /dev/null @@ -1,134 +0,0 @@ -# frozen_string_literal: true - -require 'rails_helper' - -describe LanguageDetector do - describe 'prepare_text' do - it 'returns unmodified string without special cases' do - string = 'just a regular string' - result = described_class.instance.send(:prepare_text, string) - - expect(result).to eq string - end - - it 'collapses spacing in strings' do - string = 'The formatting in this is very odd' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'The formatting in this is very odd' - end - - it 'strips usernames from strings before detection' do - string = '@username Yeah, very surreal...! also @friend' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Yeah, very surreal...! also' - end - - it 'strips URLs from strings before detection' do - string = 'Our website is https://example.com and also http://localhost.dev' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Our website is and also' - end - - it 'converts #hashtags back to normal text before detection' do - string = 'Hey look at all the #animals and #FishAndChips' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Hey look at all the animals and fish and chips' - end - end - - describe 'detect' do - let(:account_without_user_locale) { Fabricate(:user, locale: nil).account } - let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') } - - it 'detects english language for basic strings' do - strings = [ - "Hello and welcome to mastodon how are you today?", - "I'd rather not!", - "a lot of people just want to feel righteous all the time and that's all that matters", - ] - strings.each do |string| - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq(:en), string - end - end - - it 'detects spanish language' do - string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon' - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq :es - end - - describe 'when language can\'t be detected' do - it 'uses nil when sent an empty document' do - result = described_class.instance.detect('', account_without_user_locale) - expect(result).to eq nil - end - - describe 'because of a URL' do - it 'uses nil when sent just a URL' do - string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' - cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string) - expect(cld_result).not_to eq :en - - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq nil - end - end - - describe 'with an account' do - it 'uses the account locale when present' do - account = double(user_locale: 'fr') - result = described_class.instance.detect('', account) - - expect(result).to eq nil - end - - it 'uses nil when account is present but has no locale' do - result = described_class.instance.detect('', account_without_user_locale) - - expect(result).to eq nil - end - end - - describe 'with an `en` default locale' do - it 'uses nil for undetectable string' do - result = described_class.instance.detect('', account_without_user_locale) - - expect(result).to eq nil - end - end - - describe 'remote user' do - it 'detects Korean language' do - string = '안녕하세요' - result = described_class.instance.detect(string, account_remote) - - expect(result).to eq :ko - end - end - - describe 'with a non-`en` default locale' do - around(:each) do |example| - before = I18n.default_locale - I18n.default_locale = :ja - example.run - I18n.default_locale = before - end - - it 'uses nil for undetectable string' do - string = '' - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq nil - end - end - end - end -end diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb index 850857b2dd..84bb4579ca 100644 --- a/spec/lib/link_details_extractor_spec.rb +++ b/spec/lib/link_details_extractor_spec.rb @@ -26,4 +26,126 @@ RSpec.describe LinkDetailsExtractor do end end end + + context 'when structured data is present' do + let(:original_url) { 'https://example.com/page.html' } + + context 'and is wrapped in CDATA tags' do + let(:html) { <<-HTML } +<!doctype html> +<html> +<head> + <script type="application/ld+json"> + //<![CDATA[ + {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}} + //]]> + </script> +</head> +</html> + HTML + + describe '#title' do + it 'returns the title from structured data' do + expect(subject.title).to eq 'Foo' + end + end + + describe '#description' do + it 'returns the description from structured data' do + expect(subject.description).to eq 'Bar' + end + end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Baz' + end + end + + describe '#author_name' do + it 'returns the author name from structured data' do + expect(subject.author_name).to eq 'Hoge' + end + end + end + + context 'but the first tag is invalid JSON' do + let(:html) { <<-HTML } +<!doctype html> +<html> +<body> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"ItemList", + "url":"https://example.com/page.html", + "name":"Foo", + "description":"Bar" + }, + { + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement":[ + { + "@type":"ListItem", + "position":1, + "item":{ + "@id":"https://www.example.com", + "name":"Baz" + } + } + ] + } + </script> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"NewsArticle", + "mainEntityOfPage": { + "@type":"WebPage", + "@id": "http://example.com/page.html" + }, + "headline": "Foo", + "description": "Bar", + "datePublished": "2022-01-31T19:46:00+00:00", + "author": { + "@type": "Organization", + "name": "Hoge" + }, + "publisher": { + "@type": "NewsMediaOrganization", + "name":"Baz", + "url":"https://example.com/" + } + } + </script> +</body> +</html> + HTML + + describe '#title' do + it 'returns the title from structured data' do + expect(subject.title).to eq 'Foo' + end + end + + describe '#description' do + it 'returns the description from structured data' do + expect(subject.description).to eq 'Bar' + end + end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Baz' + end + end + + describe '#author_name' do + it 'returns the author name from structured data' do + expect(subject.author_name).to eq 'Hoge' + end + end + end + end end