Added manual ingest from disk

2023-08-10 12:53:33 +02:00
parent 6c46cf0e38
commit 3ff34c1c4e
4 changed files with 110 additions and 17 deletions
--- a/README.md
+++ b/README.md
@@ -6,9 +6,14 @@ YouTube Media Library Server aims to make downloading and updating media librari
 - Download of channels, playlists and videos in mp3 or mp4
 - Download of processed files either individually or entire channels and playlists as a .zip
 - Update of channels and playlists from the library
 - Ingesting folders manually added to downloads folder
 - Watching videos via an embed
 - Listening to song via an embed
 ### what could be added eventually
 - Search function in library
 ## media sources
 Currently supported is YouTube but its possible to expand since the download itself is handled by yt-dlp.
@@ -58,3 +63,7 @@ With the default settings, the server will be up and running on port `5000` and
 ## currently not supported
 Having the same video in mp3 _and_ mp4 is not possible with how the download process and database work. This includes channels and playlists. The chosen type on initial download dictates what you have.
 ## suboptimal
 Downloading as well as updating large playlists or channels can take hours but does not take up a lot of bandwidth. 
 Updating could be sped up but involves either making an own extractor / crawler or an elaborate construct around the existing one provided by yt-dlp. Currently every item gets crawled even if only a handful of new videos got added.
--- a/backend.py
+++ b/backend.py
@@ -64,11 +64,7 @@ def process_general(url, ext, update=False):
    else:
        process_download(url, ext, parent, query, current_thread)
-    try:
+    queued_downloads.pop(0)
        queued_downloads.pop(0)
    except IndexError:
        print('*** IndexError: download could not be removed from list of running downloads. ***')
    return
@@ -366,8 +362,7 @@ def yt_download(location, ext='mp3'):
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
+            'preferredcodec': 'mp3'
            'preferredquality': 192
        }]
    }
@@ -413,7 +408,7 @@ def zip_folder(full_rel_path) -> tuple[str, str]:
    # add remaining files to zip
    with zipfile.ZipFile(downloads_path() + full_rel_path + filename, 'a') as existing_zip:
        file_list = existing_zip.namelist()
-        file_list = [e[len(parent)+1:] for e in file_list]
+        file_list = [e[len(parent) + 1:] for e in file_list]
        for entry in os.scandir(downloads_path() + full_rel_path):
            if entry.is_file() and not entry.name.endswith('.zip') and entry.name not in file_list:
@@ -463,6 +458,80 @@ def internet_available(target='http://www.youtube.com'):
        return False
 def enqueue_ingest():
    t = Thread(target=manual_ingest)
    thread_queue.append(t)
    t.start()
    return
 def manual_ingest():
    # get current time
    current_time = datetime.now().time()
    # parse hour and minute
    hour = str(current_time.hour)
    hour = hour if len(hour) > 1 else '0' + hour
    minute = str(current_time.minute)
    minute = minute if len(minute) > 1 else '0' + minute
    # add ingest and time to queue
    queued_downloads.append(['Manual Ingest', hour + ':' + minute])
    # wait for previous thread to finish if not first / only in list
    current_thread = threading.current_thread()
    if len(thread_queue) > 0 and thread_queue[0] is not current_thread:
        threading.Thread.join(thread_queue[thread_queue.index(current_thread) - 1])
    # get length of download root to slice path
    dl_path_length = len(downloads_path())
    for folder, _, files in os.walk(downloads_path()):
        for file in files:
            file_split = file.split('.')
            path = folder.replace('\\', '/')[dl_path_length:]
            if len(path) > 0:
                path += '/'
            ext = '.' + file_split[-1]
            name = file[:-len(ext)]
            # if file not already in db
            if not len(query_db_threaded('SELECT ROWID FROM video '
                                         'WHERE path = :path AND name = :name AND ext = :ext',
                                         {'path': path, 'name': name, 'ext': ext})):
                # insert without id
                video_rowid = query_db_threaded('INSERT INTO video(path, name, ext) VALUES (:path, :name, :ext) '
                                                'RETURNING ROWID',
                                                {'path': path, 'name': name, 'ext': ext},
                                                True)[0]
                # skip next if title not in playlist but in downloads root
                if not path:
                    continue
                query = query_db_threaded('SELECT ROWID FROM playlist WHERE folder = :path',
                                          {'path': path})
                # if title added to existing playlist / folder
                query_db_threaded('INSERT INTO collection(playlist, video) VALUES (:path, :rowid)',
                                  {'path': path, 'rowid': video_rowid})
                # if title in new playlist / folder (supposed to happen max once)
                if not len(query):
                    path_split = path.split('/')
                    playlist_name = ''.join([p if type(p) is not int else '' for p in path_split])
                    # create new playlist
                    query_db_threaded('INSERT INTO playlist(folder, name) VALUES (:path, :name)',
                                      {'path': path, 'name': playlist_name})
    queued_downloads.pop(0)
    thread_queue.remove(current_thread)
    return
 # does what it says; does not need thread of its own since it's reasonably fast
 def delete_file_or_playlist(file_name):
    # deleting single download is simple enough
@@ -494,7 +563,9 @@ def delete_file_or_playlist(file_name):
 # checks if file is somewhere in downloads directory and returns true if so
 def check_file_path(path):
    downloads = downloads_path()
-    return downloads in os.path.abspath(downloads + path)
+    abspath = os.path.abspath(downloads + path)
    # need to check for backslash also for debugging on windows
    return path in abspath or path.replace('/', '\\') in abspath
 # sanitizes file names for windows fs
--- a/frontend.py
+++ b/frontend.py
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 import os.path
 from flask import (
    Blueprint,
    request,
@@ -17,6 +15,7 @@ from backend import (
    enqueue_download,
    internet_available,
    delete_file_or_playlist,
    enqueue_ingest,
    check_file_path
 )
 from forms.download import DownloadForm
@@ -52,7 +51,6 @@ def downloader():
    # if there has been a problem with the form (empty or error) or the link is not valid
    if not form.validate_on_submit() or not valid_link:
        valid_link = True if url == 'None' else False  # if url is empty, don't show error
        return render_template('downloader.html', form=form, amount=len(urls))
    if not internet_available():
@@ -70,7 +68,8 @@ def downloader():
@frontend.route('/library', methods=['GET'])
 def library():
    videos = query_db("SELECT name, ext, path FROM video "
-                      "LEFT JOIN collection ON video.id = collection.video WHERE collection.video IS NULL ")
+                      "LEFT JOIN collection ON video.id = collection.video "
                      "WHERE video.path = '' ")
    playlists = query_db("SELECT name, ROWID FROM playlist")
    if not playlists and not videos:
        flash('Library ist currently empty. Try downloading something!', 'primary')
@@ -84,7 +83,7 @@ def library():
 def library_playlist():
    playlist = request.args.get('playlist', None)
    videos = query_db('SELECT video.name, video.ext, video.path FROM video '
-                      'LEFT JOIN collection ON video.id = collection.video '
+                      'LEFT JOIN collection ON video.id = collection.video OR video.ROWID = collection.video '
                      'LEFT JOIN playlist ON collection.playlist=playlist.folder '
                      'WHERE playlist.ROWID = :playlist',
                      {'playlist': playlist})
@@ -145,6 +144,10 @@ def update():
                   {'url_rowid': url_rowid},
                   True)[0]
    if url is None:
        flash('Playlist has no URL. It probably was added from disk.', 'danger')
        return redirect(request.args.get('from'))
    # kick off download process
    enqueue_download(url, update=True)
@@ -179,3 +182,10 @@ def serve():
            'video/mp4',
            True
        )
@frontend.route('/ingest', methods=['GET'])
 def ingest():
    enqueue_ingest()
    flash('Ingest started', 'primary')
    return redirect('/library')
--- a/schema.sql
+++ b/schema.sql
@@ -1,6 +1,6 @@
 /*
    - unique youtube id / watch key identifies the song
-    - id is nullable for todo: ingest from local dev
+    - id is nullable for ingest from filesystem
    - name is title of video
    - ext is the file extension / type
    - path is relative to 'project_root/downloads/'
@@ -24,16 +24,19 @@ CREATE TABLE IF NOT EXISTS video (
        example for folder:
            - 'playlist_name/'
            - 'playlist_name/playlist_ROWID/'
    - url is nullable for manual ingest
 */
 CREATE TABLE IF NOT EXISTS playlist (
    folder TEXT PRIMARY KEY,
    name TEXT NOT NULL,
-    url TEXT UNIQUE NOT NULL
+    url TEXT UNIQUE
 );
 /*
    - playlist equals folder
-    - video equals id
+    - video equals id or rowid if from manual ingest
        - you better hope you dont have a video id with
            only numbers
    - simple n-m mapping
        (playlist contains multiple songs)
        (song can be in multiple playlists)