From b58db8f12eb19787ee3bd1ec8abab21027b3d4ef Mon Sep 17 00:00:00 2001
From: Eugen Rochko <eugen@zeonfederated.com>
Date: Fri, 25 Mar 2022 19:31:35 +0100
Subject: [PATCH] Add workaround for YouTube Shorts links (#17869)

* Add workaround for YouTube Shorts links

* Update link_details_extractor_spec.rb
---
 app/lib/link_details_extractor.rb       | 2 +-
 spec/lib/link_details_extractor_spec.rb | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb
index fabbd244df..b0c4e4f425 100644
--- a/app/lib/link_details_extractor.rb
+++ b/app/lib/link_details_extractor.rb
@@ -208,7 +208,7 @@ class LinkDetailsExtractor
   end
 
   def valid_url_or_nil(str, same_origin_only: false)
-    return if str.blank?
+    return if str.blank? || str == 'null'
 
     url = @original_url + Addressable::URI.parse(str)
 
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb
index 84bb4579ca..7ea867c614 100644
--- a/spec/lib/link_details_extractor_spec.rb
+++ b/spec/lib/link_details_extractor_spec.rb
@@ -25,6 +25,14 @@ RSpec.describe LinkDetailsExtractor do
         expect(subject.canonical_url).to eq 'https://foo.com/article'
       end
     end
+
+    context 'when canonical URL is set to "null"' do
+      let(:html) { '<!doctype html><link rel="canonical" href="null" />' }
+
+      it 'ignores the canonical URLs' do
+        expect(subject.canonical_url).to eq original_url
+      end
+    end
   end
 
   context 'when structured data is present' do