Added manual ingest from disk

This commit is contained in:
Maximilian Wagner
2023-08-10 12:53:33 +02:00
parent 6c46cf0e38
commit 3ff34c1c4e
4 changed files with 110 additions and 17 deletions

View File

@@ -6,9 +6,14 @@ YouTube Media Library Server aims to make downloading and updating media librari
- Download of channels, playlists and videos in mp3 or mp4 - Download of channels, playlists and videos in mp3 or mp4
- Download of processed files either individually or entire channels and playlists as a .zip - Download of processed files either individually or entire channels and playlists as a .zip
- Update of channels and playlists from the library - Update of channels and playlists from the library
- Ingesting folders manually added to downloads folder
- Watching videos via an embed - Watching videos via an embed
- Listening to song via an embed - Listening to song via an embed
### what could be added eventually
- Search function in library
## media sources ## media sources
Currently supported is YouTube but its possible to expand since the download itself is handled by yt-dlp. Currently supported is YouTube but its possible to expand since the download itself is handled by yt-dlp.
@@ -58,3 +63,7 @@ With the default settings, the server will be up and running on port `5000` and
## currently not supported ## currently not supported
Having the same video in mp3 _and_ mp4 is not possible with how the download process and database work. This includes channels and playlists. The chosen type on initial download dictates what you have. Having the same video in mp3 _and_ mp4 is not possible with how the download process and database work. This includes channels and playlists. The chosen type on initial download dictates what you have.
## suboptimal
Downloading as well as updating large playlists or channels can take hours but does not take up a lot of bandwidth.
Updating could be sped up but involves either making an own extractor / crawler or an elaborate construct around the existing one provided by yt-dlp. Currently every item gets crawled even if only a handful of new videos got added.

View File

@@ -64,11 +64,7 @@ def process_general(url, ext, update=False):
else: else:
process_download(url, ext, parent, query, current_thread) process_download(url, ext, parent, query, current_thread)
try: queued_downloads.pop(0)
queued_downloads.pop(0)
except IndexError:
print('*** IndexError: download could not be removed from list of running downloads. ***')
return return
@@ -366,8 +362,7 @@ def yt_download(location, ext='mp3'):
'format': 'bestaudio/best', 'format': 'bestaudio/best',
'postprocessors': [{ 'postprocessors': [{
'key': 'FFmpegExtractAudio', 'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3', 'preferredcodec': 'mp3'
'preferredquality': 192
}] }]
} }
@@ -413,7 +408,7 @@ def zip_folder(full_rel_path) -> tuple[str, str]:
# add remaining files to zip # add remaining files to zip
with zipfile.ZipFile(downloads_path() + full_rel_path + filename, 'a') as existing_zip: with zipfile.ZipFile(downloads_path() + full_rel_path + filename, 'a') as existing_zip:
file_list = existing_zip.namelist() file_list = existing_zip.namelist()
file_list = [e[len(parent)+1:] for e in file_list] file_list = [e[len(parent) + 1:] for e in file_list]
for entry in os.scandir(downloads_path() + full_rel_path): for entry in os.scandir(downloads_path() + full_rel_path):
if entry.is_file() and not entry.name.endswith('.zip') and entry.name not in file_list: if entry.is_file() and not entry.name.endswith('.zip') and entry.name not in file_list:
@@ -463,6 +458,80 @@ def internet_available(target='http://www.youtube.com'):
return False return False
def enqueue_ingest():
t = Thread(target=manual_ingest)
thread_queue.append(t)
t.start()
return
def manual_ingest():
# get current time
current_time = datetime.now().time()
# parse hour and minute
hour = str(current_time.hour)
hour = hour if len(hour) > 1 else '0' + hour
minute = str(current_time.minute)
minute = minute if len(minute) > 1 else '0' + minute
# add ingest and time to queue
queued_downloads.append(['Manual Ingest', hour + ':' + minute])
# wait for previous thread to finish if not first / only in list
current_thread = threading.current_thread()
if len(thread_queue) > 0 and thread_queue[0] is not current_thread:
threading.Thread.join(thread_queue[thread_queue.index(current_thread) - 1])
# get length of download root to slice path
dl_path_length = len(downloads_path())
for folder, _, files in os.walk(downloads_path()):
for file in files:
file_split = file.split('.')
path = folder.replace('\\', '/')[dl_path_length:]
if len(path) > 0:
path += '/'
ext = '.' + file_split[-1]
name = file[:-len(ext)]
# if file not already in db
if not len(query_db_threaded('SELECT ROWID FROM video '
'WHERE path = :path AND name = :name AND ext = :ext',
{'path': path, 'name': name, 'ext': ext})):
# insert without id
video_rowid = query_db_threaded('INSERT INTO video(path, name, ext) VALUES (:path, :name, :ext) '
'RETURNING ROWID',
{'path': path, 'name': name, 'ext': ext},
True)[0]
# skip next if title not in playlist but in downloads root
if not path:
continue
query = query_db_threaded('SELECT ROWID FROM playlist WHERE folder = :path',
{'path': path})
# if title added to existing playlist / folder
query_db_threaded('INSERT INTO collection(playlist, video) VALUES (:path, :rowid)',
{'path': path, 'rowid': video_rowid})
# if title in new playlist / folder (supposed to happen max once)
if not len(query):
path_split = path.split('/')
playlist_name = ''.join([p if type(p) is not int else '' for p in path_split])
# create new playlist
query_db_threaded('INSERT INTO playlist(folder, name) VALUES (:path, :name)',
{'path': path, 'name': playlist_name})
queued_downloads.pop(0)
thread_queue.remove(current_thread)
return
# does what it says; does not need thread of its own since it's reasonably fast # does what it says; does not need thread of its own since it's reasonably fast
def delete_file_or_playlist(file_name): def delete_file_or_playlist(file_name):
# deleting single download is simple enough # deleting single download is simple enough
@@ -494,7 +563,9 @@ def delete_file_or_playlist(file_name):
# checks if file is somewhere in downloads directory and returns true if so # checks if file is somewhere in downloads directory and returns true if so
def check_file_path(path): def check_file_path(path):
downloads = downloads_path() downloads = downloads_path()
return downloads in os.path.abspath(downloads + path) abspath = os.path.abspath(downloads + path)
# need to check for backslash also for debugging on windows
return path in abspath or path.replace('/', '\\') in abspath
# sanitizes file names for windows fs # sanitizes file names for windows fs

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os.path
from flask import ( from flask import (
Blueprint, Blueprint,
request, request,
@@ -17,6 +15,7 @@ from backend import (
enqueue_download, enqueue_download,
internet_available, internet_available,
delete_file_or_playlist, delete_file_or_playlist,
enqueue_ingest,
check_file_path check_file_path
) )
from forms.download import DownloadForm from forms.download import DownloadForm
@@ -52,7 +51,6 @@ def downloader():
# if there has been a problem with the form (empty or error) or the link is not valid # if there has been a problem with the form (empty or error) or the link is not valid
if not form.validate_on_submit() or not valid_link: if not form.validate_on_submit() or not valid_link:
valid_link = True if url == 'None' else False # if url is empty, don't show error
return render_template('downloader.html', form=form, amount=len(urls)) return render_template('downloader.html', form=form, amount=len(urls))
if not internet_available(): if not internet_available():
@@ -70,7 +68,8 @@ def downloader():
@frontend.route('/library', methods=['GET']) @frontend.route('/library', methods=['GET'])
def library(): def library():
videos = query_db("SELECT name, ext, path FROM video " videos = query_db("SELECT name, ext, path FROM video "
"LEFT JOIN collection ON video.id = collection.video WHERE collection.video IS NULL ") "LEFT JOIN collection ON video.id = collection.video "
"WHERE video.path = '' ")
playlists = query_db("SELECT name, ROWID FROM playlist") playlists = query_db("SELECT name, ROWID FROM playlist")
if not playlists and not videos: if not playlists and not videos:
flash('Library ist currently empty. Try downloading something!', 'primary') flash('Library ist currently empty. Try downloading something!', 'primary')
@@ -84,7 +83,7 @@ def library():
def library_playlist(): def library_playlist():
playlist = request.args.get('playlist', None) playlist = request.args.get('playlist', None)
videos = query_db('SELECT video.name, video.ext, video.path FROM video ' videos = query_db('SELECT video.name, video.ext, video.path FROM video '
'LEFT JOIN collection ON video.id = collection.video ' 'LEFT JOIN collection ON video.id = collection.video OR video.ROWID = collection.video '
'LEFT JOIN playlist ON collection.playlist=playlist.folder ' 'LEFT JOIN playlist ON collection.playlist=playlist.folder '
'WHERE playlist.ROWID = :playlist', 'WHERE playlist.ROWID = :playlist',
{'playlist': playlist}) {'playlist': playlist})
@@ -145,6 +144,10 @@ def update():
{'url_rowid': url_rowid}, {'url_rowid': url_rowid},
True)[0] True)[0]
if url is None:
flash('Playlist has no URL. It probably was added from disk.', 'danger')
return redirect(request.args.get('from'))
# kick off download process # kick off download process
enqueue_download(url, update=True) enqueue_download(url, update=True)
@@ -179,3 +182,10 @@ def serve():
'video/mp4', 'video/mp4',
True True
) )
@frontend.route('/ingest', methods=['GET'])
def ingest():
enqueue_ingest()
flash('Ingest started', 'primary')
return redirect('/library')

View File

@@ -1,6 +1,6 @@
/* /*
- unique youtube id / watch key identifies the song - unique youtube id / watch key identifies the song
- id is nullable for todo: ingest from local dev - id is nullable for ingest from filesystem
- name is title of video - name is title of video
- ext is the file extension / type - ext is the file extension / type
- path is relative to 'project_root/downloads/' - path is relative to 'project_root/downloads/'
@@ -24,16 +24,19 @@ CREATE TABLE IF NOT EXISTS video (
example for folder: example for folder:
- 'playlist_name/' - 'playlist_name/'
- 'playlist_name/playlist_ROWID/' - 'playlist_name/playlist_ROWID/'
- url is nullable for manual ingest
*/ */
CREATE TABLE IF NOT EXISTS playlist ( CREATE TABLE IF NOT EXISTS playlist (
folder TEXT PRIMARY KEY, folder TEXT PRIMARY KEY,
name TEXT NOT NULL, name TEXT NOT NULL,
url TEXT UNIQUE NOT NULL url TEXT UNIQUE
); );
/* /*
- playlist equals folder - playlist equals folder
- video equals id - video equals id or rowid if from manual ingest
- you better hope you dont have a video id with
only numbers
- simple n-m mapping - simple n-m mapping
(playlist contains multiple songs) (playlist contains multiple songs)
(song can be in multiple playlists) (song can be in multiple playlists)