Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Shawn Webb
youtube-dl
Commits
416da574
Unverified
Commit
416da574
authored
Oct 23, 2020
by
Sergey M․
Browse files
[ytsearch] Fix extraction (closes #26920)
parent
48c5663c
Changes
1
Hide whitespace changes
Inline
Side-by-side
youtube_dl/extractor/youtube.py
View file @
416da574
...
...
@@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
_MAX_RESULTS
=
float
(
'inf'
)
IE_NAME
=
'youtube:search'
_SEARCH_KEY
=
'ytsearch'
_
EXTRA_QUERY_ARGS
=
{}
_
SEARCH_PARAMS
=
None
_TESTS
=
[]
def
_get_n_results
(
self
,
query
,
n
):
"""Get a specified number of results for a query"""
videos
=
[]
limit
=
n
url_query
=
{
'search_query'
:
query
.
encode
(
'utf-8'
),
def
_entries
(
self
,
query
,
n
):
data
=
{
'context'
:
{
'client'
:
{
'clientName'
:
'WEB'
,
'clientVersion'
:
'2.20201021.03.00'
,
}
},
'query'
:
query
,
}
url_query
.
update
(
self
.
_EXTRA_QUERY_ARGS
)
result_url
=
'https://www.youtube.com/results?'
+
compat_urllib_parse_urlencode
(
url_query
)
for
pagenum
in
itertools
.
count
(
1
):
data
=
self
.
_download_json
(
result_url
,
video_id
=
'query "%s"'
%
query
,
note
=
'Downloading page %s'
%
pagenum
,
errnote
=
'Unable to download API page'
,
query
=
{
'spf'
:
'navigate'
})
html_content
=
data
[
1
][
'body'
][
'content'
]
if
'class="search-message'
in
html_content
:
raise
ExtractorError
(
'[youtube] No video results'
,
expected
=
True
)
new_videos
=
list
(
self
.
_process_page
(
html_content
))
videos
+=
new_videos
if
not
new_videos
or
len
(
videos
)
>
limit
:
if
self
.
_SEARCH_PARAMS
:
data
[
'params'
]
=
self
.
_SEARCH_PARAMS
total
=
0
for
page_num
in
itertools
.
count
(
1
):
search
=
self
.
_download_json
(
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
,
video_id
=
'query "%s"'
%
query
,
note
=
'Downloading page %s'
%
page_num
,
errnote
=
'Unable to download API page'
,
fatal
=
False
,
data
=
json
.
dumps
(
data
).
encode
(
'utf8'
),
headers
=
{
'content-type'
:
'application/json'
})
if
not
search
:
break
next_link
=
self
.
_html_search_regex
(
r
'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next'
,
html_content
,
'next link'
,
default
=
None
)
if
next_link
is
None
:
slr_contents
=
try_get
(
search
,
(
lambda
x
:
x
[
'contents'
][
'twoColumnSearchResultsRenderer'
][
'primaryContents'
][
'sectionListRenderer'
][
'contents'
],
lambda
x
:
x
[
'onResponseReceivedCommands'
][
0
][
'appendContinuationItemsAction'
][
'continuationItems'
]),
list
)
if
not
slr_contents
:
break
result_url
=
compat_urlparse
.
urljoin
(
'https://www.youtube.com/'
,
next_link
)
isr_contents
=
try_get
(
slr_contents
,
lambda
x
:
x
[
0
][
'itemSectionRenderer'
][
'contents'
],
list
)
if
not
isr_contents
:
break
for
content
in
isr_contents
:
if
not
isinstance
(
content
,
dict
):
continue
video
=
content
.
get
(
'videoRenderer'
)
if
not
isinstance
(
video
,
dict
):
continue
video_id
=
video
.
get
(
'videoId'
)
if
not
video_id
:
continue
title
=
try_get
(
video
,
lambda
x
:
x
[
'title'
][
'runs'
][
0
][
'text'
],
compat_str
)
description
=
try_get
(
video
,
lambda
x
:
x
[
'descriptionSnippet'
][
'runs'
][
0
][
'text'
],
compat_str
)
duration
=
parse_duration
(
try_get
(
video
,
lambda
x
:
x
[
'lengthText'
][
'simpleText'
],
compat_str
))
view_count_text
=
try_get
(
video
,
lambda
x
:
x
[
'viewCountText'
][
'simpleText'
],
compat_str
)
or
''
view_count
=
int_or_none
(
self
.
_search_regex
(
r
'^(\d+)'
,
re
.
sub
(
r
'\s'
,
''
,
view_count_text
),
'view count'
,
default
=
None
))
uploader
=
try_get
(
video
,
lambda
x
:
x
[
'ownerText'
][
'runs'
][
0
][
'text'
],
compat_str
)
total
+=
1
yield
{
'_type'
:
'url_transparent'
,
'ie_key'
:
YoutubeIE
.
ie_key
(),
'id'
:
video_id
,
'url'
:
video_id
,
'title'
:
title
,
'description'
:
description
,
'duration'
:
duration
,
'view_count'
:
view_count
,
'uploader'
:
uploader
,
}
if
total
==
n
:
return
token
=
try_get
(
slr_contents
,
lambda
x
:
x
[
1
][
'continuationItemRenderer'
][
'continuationEndpoint'
][
'continuationCommand'
][
'token'
],
compat_str
)
if
not
token
:
break
data
[
'continuation'
]
=
token
if
len
(
videos
)
>
n
:
videos
=
videos
[:
n
]
return
self
.
playlist_result
(
videos
,
query
)
def
_get_n_results
(
self
,
query
,
n
)
:
"""Get a specified number of results for a query"""
return
self
.
playlist_result
(
self
.
_entries
(
query
,
n
)
,
query
)
class
YoutubeSearchDateIE
(
YoutubeSearchIE
):
IE_NAME
=
YoutubeSearchIE
.
IE_NAME
+
':date'
_SEARCH_KEY
=
'ytsearchdate'
IE_DESC
=
'YouTube.com searches, newest videos first'
_
EXTRA_QUERY_ARGS
=
{
'search_sort'
:
'video_date_uploaded'
}
_
SEARCH_PARAMS
=
'CAI%3D'
class
YoutubeSearchURLIE
(
YoutubeSearchBaseInfoExtractor
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment