diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb index 9090773ae9..c2c634ea4f 100644 --- a/app/lib/extractor.rb +++ b/app/lib/extractor.rb @@ -2,11 +2,27 @@ module Extractor MAX_DOMAIN_LENGTH = 253 + MAX_URL_LENGTH = 4096 extend Twitter::TwitterText::Extractor module_function + def is_valid_domain(url_length, domain, protocol) + begin + raise ArgumentError.new("invalid empty domain") unless domain + original_domain_length = domain.length + encoded_domain = IDN::Idna.toASCII(domain, IDN::Idna::ALLOW_UNASSIGNED) + updated_domain_length = encoded_domain.length + url_length += (updated_domain_length - original_domain_length) if (updated_domain_length > original_domain_length) + url_length += URL_PROTOCOL_LENGTH unless protocol + url_length <= MAX_URL_LENGTH + # On error don't consider this a valid domain. + rescue Exception + return false + end + end + def extract_entities_with_indices(text, options = {}, &block) entities = extract_urls_with_indices(text, options) + extract_hashtags_with_indices(text, check_url_overlap: false) +