From 75eb8b8d7f21a198f6f2d5907be35e4bd2a14424 Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 14 Nov 2024 16:20:23 -0800 Subject: [PATCH] Connection pool: ensure response is fully read The streaming API of HTTP::Client has an internal buffer that will continue to persist onto the next request unless the response is fully read. This commit privatizes the #client method of Pool and instead expose various HTTP request methods that will call and yield the underlying request and response. This way, we can ensure that the resposne is fully read before the client is passed back into the pool for another request. --- src/invidious/channels/channels.cr | 2 +- src/invidious/connection/pool.cr | 27 ++++++++++++++++++++++++- src/invidious/mixes.cr | 2 +- src/invidious/routes/api/manifest.cr | 6 +++--- src/invidious/routes/api/v1/videos.cr | 6 +++--- src/invidious/routes/channels.cr | 2 +- src/invidious/routes/embed.cr | 2 +- src/invidious/routes/errors.cr | 6 +++--- src/invidious/routes/feeds.cr | 4 ++-- src/invidious/routes/images.cr | 12 +++++------ src/invidious/routes/playlists.cr | 2 +- src/invidious/search/processors.cr | 6 +++--- src/invidious/yt_backend/youtube_api.cr | 14 ++++++------- 13 files changed, 57 insertions(+), 34 deletions(-) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 1478c8fc..814b5531 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -166,7 +166,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) } LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed") - rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body + rss = YT_POOL.get("/feeds/videos.xml?channel_id=#{ucid}").body LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed") rss = XML.parse(rss) diff --git a/src/invidious/connection/pool.cr b/src/invidious/connection/pool.cr index 565c168d..5e527187 100644 --- a/src/invidious/connection/pool.cr +++ b/src/invidious/connection/pool.cr @@ -24,8 +24,33 @@ module Invidious::ConnectionPool @pool = build_pool() end + {% for method in %w[get post put patch delete head options] %} + def {{method.id}}(*args, **kwargs) + self.client do | client | + client.{{method.id}}(*args, **kwargs) do | response | + + result = yield response + return result + + ensure + response.body_io?.try &. skip_to_end + end + end + end + + def {{method.id}}(*args, **kwargs) + self.client do | client | + return response = client.{{method.id}}(*args, **kwargs) + ensure + if response + response.body_io?.try &. skip_to_end + end + end + end + {% end %} + # Checks out a client in the pool - def client(&) + private def client(&) pool.checkout do |http_client| # Proxy needs to be reinstated every time we get a client from the pool http_client.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy diff --git a/src/invidious/mixes.cr b/src/invidious/mixes.cr index 823ca85b..2fa18af3 100644 --- a/src/invidious/mixes.cr +++ b/src/invidious/mixes.cr @@ -26,7 +26,7 @@ def fetch_mix(rdid, video_id, cookies = nil, locale = nil) end video_id = "CvFH_6DNRCY" if rdid.starts_with? "OLAK5uy_" - response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers) + response = YT_POOL.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers) initial_data = extract_initial_data(response.body) if !initial_data["contents"]["twoColumnWatchNextResults"]["playlist"]? diff --git a/src/invidious/routes/api/manifest.cr b/src/invidious/routes/api/manifest.cr index d89e752c..e23548c2 100644 --- a/src/invidious/routes/api/manifest.cr +++ b/src/invidious/routes/api/manifest.cr @@ -21,7 +21,7 @@ module Invidious::Routes::API::Manifest end if dashmpd = video.dash_manifest_url - response = YT_POOL.client &.get(URI.parse(dashmpd).request_target) + response = YT_POOL.get(URI.parse(dashmpd).request_target) if response.status_code != 200 haltf env, status_code: response.status_code @@ -163,7 +163,7 @@ module Invidious::Routes::API::Manifest # /api/manifest/hls_playlist/* def self.get_hls_playlist(env) - response = YT_POOL.client &.get(env.request.path) + response = YT_POOL.get(env.request.path) if response.status_code != 200 haltf env, status_code: response.status_code @@ -218,7 +218,7 @@ module Invidious::Routes::API::Manifest # /api/manifest/hls_variant/* def self.get_hls_variant(env) - response = YT_POOL.client &.get(env.request.path) + response = YT_POOL.get(env.request.path) if response.status_code != 200 haltf env, status_code: response.status_code diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index 368304ac..e5a9cbf0 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -106,7 +106,7 @@ module Invidious::Routes::API::V1::Videos # Auto-generated captions often have cues that aren't aligned properly with the video, # as well as some other markup that makes it cumbersome, so we try to fix that here if caption.name.includes? "auto-generated" - caption_xml = YT_POOL.client &.get(url).body + caption_xml = YT_POOL.get(url).body settings_field = { "Kind" => "captions", @@ -147,7 +147,7 @@ module Invidious::Routes::API::V1::Videos query_params = uri.query_params query_params["fmt"] = "vtt" uri.query_params = query_params - webvtt = YT_POOL.client &.get(uri.request_target).body + webvtt = YT_POOL.get(uri.request_target).body if webvtt.starts_with?("[a-zA-Z0-9_-]{11})"/).try &.["video_id"] env.params.query.delete_all("channel") diff --git a/src/invidious/routes/errors.cr b/src/invidious/routes/errors.cr index 1e9ab44e..2f35f050 100644 --- a/src/invidious/routes/errors.cr +++ b/src/invidious/routes/errors.cr @@ -9,10 +9,10 @@ module Invidious::Routes::ErrorRoutes item = md["id"] # Check if item is branding URL e.g. https://youtube.com/gaming - response = YT_POOL.client &.get("/#{item}") + response = YT_POOL.get("/#{item}") if response.status_code == 301 - response = YT_POOL.client &.get(URI.parse(response.headers["Location"]).request_target) + response = YT_POOL.get(URI.parse(response.headers["Location"]).request_target) end if response.body.empty? @@ -40,7 +40,7 @@ module Invidious::Routes::ErrorRoutes end # Check if item is video ID - if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.client &.head("/watch?v=#{item}").status_code != 404 + if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.head("/watch?v=#{item}").status_code != 404 env.response.headers["Location"] = url haltf env, status_code: 302 end diff --git a/src/invidious/routes/feeds.cr b/src/invidious/routes/feeds.cr index ea7fb396..6a01315b 100644 --- a/src/invidious/routes/feeds.cr +++ b/src/invidious/routes/feeds.cr @@ -168,7 +168,7 @@ module Invidious::Routes::Feeds "default" => "http://www.w3.org/2005/Atom", } - response = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{channel.ucid}") + response = YT_POOL.get("/feeds/videos.xml?channel_id=#{channel.ucid}") rss = XML.parse(response.body) videos = rss.xpath_nodes("//default:feed/default:entry", namespaces).map do |entry| @@ -308,7 +308,7 @@ module Invidious::Routes::Feeds end end - response = YT_POOL.client &.get("/feeds/videos.xml?playlist_id=#{plid}") + response = YT_POOL.get("/feeds/videos.xml?playlist_id=#{plid}") document = XML.parse(response.body) document.xpath_nodes(%q(//*[@href]|//*[@url])).each do |node| diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index 63c1b94e..1deec90a 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -12,7 +12,7 @@ module Invidious::Routes::Images end begin - GGPHT_POOL.client &.get(url, headers) do |resp| + GGPHT_POOL.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -42,7 +42,7 @@ module Invidious::Routes::Images end begin - Invidious::ConnectionPool.get_ytimg_pool(authority).client &.get(url, headers) do |resp| + Invidious::ConnectionPool.get_ytimg_pool(authority).get(url, headers) do |resp| env.response.headers["Connection"] = "close" return self.proxy_image(env, resp) end @@ -65,7 +65,7 @@ module Invidious::Routes::Images end begin - Invidious::ConnectionPool.get_ytimg_pool("i9").client &.get(url, headers) do |resp| + Invidious::ConnectionPool.get_ytimg_pool("i9").get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -81,7 +81,7 @@ module Invidious::Routes::Images end begin - YT_POOL.client &.get(env.request.resource, headers) do |response| + YT_POOL.get(env.request.resource, headers) do |response| env.response.status_code = response.status_code response.headers.each do |key, value| if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) @@ -111,7 +111,7 @@ module Invidious::Routes::Images if name == "maxres.jpg" build_thumbnails(id).each do |thumb| thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg" - if Invidious::ConnectionPool.get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200 + if Invidious::ConnectionPool.get_ytimg_pool("i9").head(thumbnail_resource_path, headers).status_code == 200 name = thumb[:url] + ".jpg" break end @@ -127,7 +127,7 @@ module Invidious::Routes::Images end begin - Invidious::ConnectionPool.get_ytimg_pool("i").client &.get(url, headers) do |resp| + Invidious::ConnectionPool.get_ytimg_pool("i").get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex diff --git a/src/invidious/routes/playlists.cr b/src/invidious/routes/playlists.cr index 9c6843e9..db3b1fc6 100644 --- a/src/invidious/routes/playlists.cr +++ b/src/invidious/routes/playlists.cr @@ -483,7 +483,7 @@ module Invidious::Routes::Playlists # Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos def self.watch_videos(env) - response = YT_POOL.client &.get(env.request.resource) + response = YT_POOL.get(env.request.resource) if url = response.headers["Location"]? url = URI.parse(url).request_target return env.redirect url diff --git a/src/invidious/search/processors.cr b/src/invidious/search/processors.cr index 25edb936..4c635ab2 100644 --- a/src/invidious/search/processors.cr +++ b/src/invidious/search/processors.cr @@ -16,11 +16,11 @@ module Invidious::Search # Search a youtube channel # TODO: clean code, and rely more on YoutubeAPI def channel(query : Query) : Array(SearchItem) - response = YT_POOL.client &.get("/channel/#{query.channel}") + response = YT_POOL.get("/channel/#{query.channel}") if response.status_code == 404 - response = YT_POOL.client &.get("/user/#{query.channel}") - response = YT_POOL.client &.get("/c/#{query.channel}") if response.status_code == 404 + response = YT_POOL.get("/user/#{query.channel}") + response = YT_POOL.get("/c/#{query.channel}") if response.status_code == 404 initial_data = extract_initial_data(response.body) ucid = initial_data.dig?("header", "c4TabbedHeaderRenderer", "channelId").try(&.as_s?) raise ChannelSearchException.new(query.channel) if !ucid diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index 8f5aa61d..c8483648 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -635,15 +635,13 @@ module YoutubeAPI LOGGER.trace("YoutubeAPI: POST data: #{data}") # Send the POST request - body = YT_POOL.client() do |client| - client.post(url, headers: headers, body: data.to_json) do |response| - if response.status_code != 200 - raise InfoException.new("Error: non 200 status code. Youtube API returned \ - status code #{response.status_code}. See \ - https://docs.invidious.io/youtube-errors-explained/ for troubleshooting.") - end - self._decompress(response.body_io, response.headers["Content-Encoding"]?) + body = YT_POOL.post(url, headers: headers, body: data.to_json) do |response| + if response.status_code != 200 + raise InfoException.new("Error: non 200 status code. Youtube API returned \ + status code #{response.status_code}. See \ + https://docs.invidious.io/youtube-errors-explained/ for troubleshooting.") end + self._decompress(response.body_io, response.headers["Content-Encoding"]?) end # Convert result to Hash