Added manual ingest from disk

This commit is contained in:
Maximilian Wagner 2023-08-10 12:53:33 +02:00
parent 6c46cf0e38
commit 3ff34c1c4e
4 changed files with 110 additions and 17 deletions

View File

@ -6,9 +6,14 @@ YouTube Media Library Server aims to make downloading and updating media librari
- Download of channels, playlists and videos in mp3 or mp4
- Download of processed files either individually or entire channels and playlists as a .zip
- Update of channels and playlists from the library
- Ingesting folders manually added to downloads folder
- Watching videos via an embed
- Listening to song via an embed
### what could be added eventually
- Search function in library
## media sources
Currently supported is YouTube but its possible to expand since the download itself is handled by yt-dlp.
@ -58,3 +63,7 @@ With the default settings, the server will be up and running on port `5000` and
## currently not supported
Having the same video in mp3 _and_ mp4 is not possible with how the download process and database work. This includes channels and playlists. The chosen type on initial download dictates what you have.
## suboptimal
Downloading as well as updating large playlists or channels can take hours but does not take up a lot of bandwidth.
Updating could be sped up but involves either making an own extractor / crawler or an elaborate construct around the existing one provided by yt-dlp. Currently every item gets crawled even if only a handful of new videos got added.

View File

@ -64,11 +64,7 @@ def process_general(url, ext, update=False):
else:
process_download(url, ext, parent, query, current_thread)
try:
queued_downloads.pop(0)
except IndexError:
print('*** IndexError: download could not be removed from list of running downloads. ***')
queued_downloads.pop(0)
return
@ -366,8 +362,7 @@ def yt_download(location, ext='mp3'):
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': 192
'preferredcodec': 'mp3'
}]
}
@ -413,7 +408,7 @@ def zip_folder(full_rel_path) -> tuple[str, str]:
# add remaining files to zip
with zipfile.ZipFile(downloads_path() + full_rel_path + filename, 'a') as existing_zip:
file_list = existing_zip.namelist()
file_list = [e[len(parent)+1:] for e in file_list]
file_list = [e[len(parent) + 1:] for e in file_list]
for entry in os.scandir(downloads_path() + full_rel_path):
if entry.is_file() and not entry.name.endswith('.zip') and entry.name not in file_list:
@ -463,6 +458,80 @@ def internet_available(target='http://www.youtube.com'):
return False
def enqueue_ingest():
t = Thread(target=manual_ingest)
thread_queue.append(t)
t.start()
return
def manual_ingest():
# get current time
current_time = datetime.now().time()
# parse hour and minute
hour = str(current_time.hour)
hour = hour if len(hour) > 1 else '0' + hour
minute = str(current_time.minute)
minute = minute if len(minute) > 1 else '0' + minute
# add ingest and time to queue
queued_downloads.append(['Manual Ingest', hour + ':' + minute])
# wait for previous thread to finish if not first / only in list
current_thread = threading.current_thread()
if len(thread_queue) > 0 and thread_queue[0] is not current_thread:
threading.Thread.join(thread_queue[thread_queue.index(current_thread) - 1])
# get length of download root to slice path
dl_path_length = len(downloads_path())
for folder, _, files in os.walk(downloads_path()):
for file in files:
file_split = file.split('.')
path = folder.replace('\\', '/')[dl_path_length:]
if len(path) > 0:
path += '/'
ext = '.' + file_split[-1]
name = file[:-len(ext)]
# if file not already in db
if not len(query_db_threaded('SELECT ROWID FROM video '
'WHERE path = :path AND name = :name AND ext = :ext',
{'path': path, 'name': name, 'ext': ext})):
# insert without id
video_rowid = query_db_threaded('INSERT INTO video(path, name, ext) VALUES (:path, :name, :ext) '
'RETURNING ROWID',
{'path': path, 'name': name, 'ext': ext},
True)[0]
# skip next if title not in playlist but in downloads root
if not path:
continue
query = query_db_threaded('SELECT ROWID FROM playlist WHERE folder = :path',
{'path': path})
# if title added to existing playlist / folder
query_db_threaded('INSERT INTO collection(playlist, video) VALUES (:path, :rowid)',
{'path': path, 'rowid': video_rowid})
# if title in new playlist / folder (supposed to happen max once)
if not len(query):
path_split = path.split('/')
playlist_name = ''.join([p if type(p) is not int else '' for p in path_split])
# create new playlist
query_db_threaded('INSERT INTO playlist(folder, name) VALUES (:path, :name)',
{'path': path, 'name': playlist_name})
queued_downloads.pop(0)
thread_queue.remove(current_thread)
return
# does what it says; does not need thread of its own since it's reasonably fast
def delete_file_or_playlist(file_name):
# deleting single download is simple enough
@ -494,7 +563,9 @@ def delete_file_or_playlist(file_name):
# checks if file is somewhere in downloads directory and returns true if so
def check_file_path(path):
downloads = downloads_path()
return downloads in os.path.abspath(downloads + path)
abspath = os.path.abspath(downloads + path)
# need to check for backslash also for debugging on windows
return path in abspath or path.replace('/', '\\') in abspath
# sanitizes file names for windows fs

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
import os.path
from flask import (
Blueprint,
request,
@ -17,6 +15,7 @@ from backend import (
enqueue_download,
internet_available,
delete_file_or_playlist,
enqueue_ingest,
check_file_path
)
from forms.download import DownloadForm
@ -52,7 +51,6 @@ def downloader():
# if there has been a problem with the form (empty or error) or the link is not valid
if not form.validate_on_submit() or not valid_link:
valid_link = True if url == 'None' else False # if url is empty, don't show error
return render_template('downloader.html', form=form, amount=len(urls))
if not internet_available():
@ -70,7 +68,8 @@ def downloader():
@frontend.route('/library', methods=['GET'])
def library():
videos = query_db("SELECT name, ext, path FROM video "
"LEFT JOIN collection ON video.id = collection.video WHERE collection.video IS NULL ")
"LEFT JOIN collection ON video.id = collection.video "
"WHERE video.path = '' ")
playlists = query_db("SELECT name, ROWID FROM playlist")
if not playlists and not videos:
flash('Library ist currently empty. Try downloading something!', 'primary')
@ -84,7 +83,7 @@ def library():
def library_playlist():
playlist = request.args.get('playlist', None)
videos = query_db('SELECT video.name, video.ext, video.path FROM video '
'LEFT JOIN collection ON video.id = collection.video '
'LEFT JOIN collection ON video.id = collection.video OR video.ROWID = collection.video '
'LEFT JOIN playlist ON collection.playlist=playlist.folder '
'WHERE playlist.ROWID = :playlist',
{'playlist': playlist})
@ -145,6 +144,10 @@ def update():
{'url_rowid': url_rowid},
True)[0]
if url is None:
flash('Playlist has no URL. It probably was added from disk.', 'danger')
return redirect(request.args.get('from'))
# kick off download process
enqueue_download(url, update=True)
@ -179,3 +182,10 @@ def serve():
'video/mp4',
True
)
@frontend.route('/ingest', methods=['GET'])
def ingest():
enqueue_ingest()
flash('Ingest started', 'primary')
return redirect('/library')

View File

@ -1,6 +1,6 @@
/*
- unique youtube id / watch key identifies the song
- id is nullable for todo: ingest from local dev
- id is nullable for ingest from filesystem
- name is title of video
- ext is the file extension / type
- path is relative to 'project_root/downloads/'
@ -24,16 +24,19 @@ CREATE TABLE IF NOT EXISTS video (
example for folder:
- 'playlist_name/'
- 'playlist_name/playlist_ROWID/'
- url is nullable for manual ingest
*/
CREATE TABLE IF NOT EXISTS playlist (
folder TEXT PRIMARY KEY,
name TEXT NOT NULL,
url TEXT UNIQUE NOT NULL
url TEXT UNIQUE
);
/*
- playlist equals folder
- video equals id
- video equals id or rowid if from manual ingest
- you better hope you dont have a video id with
only numbers
- simple n-m mapping
(playlist contains multiple songs)
(song can be in multiple playlists)