Added manual ingest from disk

2023-08-10 12:53:33 +02:00 · 2023-08-10 12:53:33 +02:00 · 3ff34c1c4e
commit 3ff34c1c4e
parent 6c46cf0e38
4 changed files with 110 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -6,9 +6,14 @@ YouTube Media Library Server aims to make downloading and updating media librari
 - Download of channels, playlists and videos in mp3 or mp4
 - Download of processed files either individually or entire channels and playlists as a .zip
 - Update of channels and playlists from the library
+- Ingesting folders manually added to downloads folder
 - Watching videos via an embed
 - Listening to song via an embed

+### what could be added eventually
+- Search function in library
+
+
 ## media sources
 Currently supported is YouTube but its possible to expand since the download itself is handled by yt-dlp.

@ -58,3 +63,7 @@ With the default settings, the server will be up and running on port `5000` and

 ## currently not supported
 Having the same video in mp3 _and_ mp4 is not possible with how the download process and database work. This includes channels and playlists. The chosen type on initial download dictates what you have.
+
+## suboptimal
+Downloading as well as updating large playlists or channels can take hours but does not take up a lot of bandwidth. 
+Updating could be sped up but involves either making an own extractor / crawler or an elaborate construct around the existing one provided by yt-dlp. Currently every item gets crawled even if only a handful of new videos got added.
--- a/backend.py
+++ b/backend.py
@ -64,11 +64,7 @@ def process_general(url, ext, update=False):
    else:
        process_download(url, ext, parent, query, current_thread)

-    try:
-        queued_downloads.pop(0)
-    except IndexError:
-        print('*** IndexError: download could not be removed from list of running downloads. ***')
-
+    queued_downloads.pop(0)
    return


@ -366,8 +362,7 @@ def yt_download(location, ext='mp3'):
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': 192
+            'preferredcodec': 'mp3'
        }]
    }

@ -413,7 +408,7 @@ def zip_folder(full_rel_path) -> tuple[str, str]:
    # add remaining files to zip
    with zipfile.ZipFile(downloads_path() + full_rel_path + filename, 'a') as existing_zip:
        file_list = existing_zip.namelist()
-        file_list = [e[len(parent)+1:] for e in file_list]
+        file_list = [e[len(parent) + 1:] for e in file_list]

        for entry in os.scandir(downloads_path() + full_rel_path):
            if entry.is_file() and not entry.name.endswith('.zip') and entry.name not in file_list:
@ -463,6 +458,80 @@ def internet_available(target='http://www.youtube.com'):
        return False


+def enqueue_ingest():
+    t = Thread(target=manual_ingest)
+    thread_queue.append(t)
+    t.start()
+    return
+
+
+def manual_ingest():
+    # get current time
+    current_time = datetime.now().time()
+
+    # parse hour and minute
+    hour = str(current_time.hour)
+    hour = hour if len(hour) > 1 else '0' + hour
+    minute = str(current_time.minute)
+    minute = minute if len(minute) > 1 else '0' + minute
+
+    # add ingest and time to queue
+    queued_downloads.append(['Manual Ingest', hour + ':' + minute])
+
+    # wait for previous thread to finish if not first / only in list
+    current_thread = threading.current_thread()
+    if len(thread_queue) > 0 and thread_queue[0] is not current_thread:
+        threading.Thread.join(thread_queue[thread_queue.index(current_thread) - 1])
+
+    # get length of download root to slice path
+    dl_path_length = len(downloads_path())
+
+    for folder, _, files in os.walk(downloads_path()):
+        for file in files:
+            file_split = file.split('.')
+
+            path = folder.replace('\\', '/')[dl_path_length:]
+            if len(path) > 0:
+                path += '/'
+
+            ext = '.' + file_split[-1]
+            name = file[:-len(ext)]
+
+            # if file not already in db
+            if not len(query_db_threaded('SELECT ROWID FROM video '
+                                         'WHERE path = :path AND name = :name AND ext = :ext',
+                                         {'path': path, 'name': name, 'ext': ext})):
+
+                # insert without id
+                video_rowid = query_db_threaded('INSERT INTO video(path, name, ext) VALUES (:path, :name, :ext) '
+                                                'RETURNING ROWID',
+                                                {'path': path, 'name': name, 'ext': ext},
+                                                True)[0]
+
+                # skip next if title not in playlist but in downloads root
+                if not path:
+                    continue
+
+                query = query_db_threaded('SELECT ROWID FROM playlist WHERE folder = :path',
+                                          {'path': path})
+
+                # if title added to existing playlist / folder
+                query_db_threaded('INSERT INTO collection(playlist, video) VALUES (:path, :rowid)',
+                                  {'path': path, 'rowid': video_rowid})
+
+                # if title in new playlist / folder (supposed to happen max once)
+                if not len(query):
+                    path_split = path.split('/')
+                    playlist_name = ''.join([p if type(p) is not int else '' for p in path_split])
+                    # create new playlist
+                    query_db_threaded('INSERT INTO playlist(folder, name) VALUES (:path, :name)',
+                                      {'path': path, 'name': playlist_name})
+
+    queued_downloads.pop(0)
+    thread_queue.remove(current_thread)
+    return
+
+
 # does what it says; does not need thread of its own since it's reasonably fast
 def delete_file_or_playlist(file_name):
    # deleting single download is simple enough
@ -494,7 +563,9 @@ def delete_file_or_playlist(file_name):
 # checks if file is somewhere in downloads directory and returns true if so
 def check_file_path(path):
    downloads = downloads_path()
-    return downloads in os.path.abspath(downloads + path)
+    abspath = os.path.abspath(downloads + path)
+    # need to check for backslash also for debugging on windows
+    return path in abspath or path.replace('/', '\\') in abspath


 # sanitizes file names for windows fs
--- a/frontend.py
+++ b/frontend.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import os.path
-
 from flask import (
    Blueprint,
    request,
@ -17,6 +15,7 @@ from backend import (
    enqueue_download,
    internet_available,
    delete_file_or_playlist,
+    enqueue_ingest,
    check_file_path
 )
 from forms.download import DownloadForm
@ -52,7 +51,6 @@ def downloader():

    # if there has been a problem with the form (empty or error) or the link is not valid
    if not form.validate_on_submit() or not valid_link:
-        valid_link = True if url == 'None' else False  # if url is empty, don't show error
        return render_template('downloader.html', form=form, amount=len(urls))

    if not internet_available():
@ -70,7 +68,8 @@ def downloader():
@frontend.route('/library', methods=['GET'])
 def library():
    videos = query_db("SELECT name, ext, path FROM video "
-                      "LEFT JOIN collection ON video.id = collection.video WHERE collection.video IS NULL ")
+                      "LEFT JOIN collection ON video.id = collection.video "
+                      "WHERE video.path = '' ")
    playlists = query_db("SELECT name, ROWID FROM playlist")
    if not playlists and not videos:
        flash('Library ist currently empty. Try downloading something!', 'primary')
@ -84,7 +83,7 @@ def library():
 def library_playlist():
    playlist = request.args.get('playlist', None)
    videos = query_db('SELECT video.name, video.ext, video.path FROM video '
-                      'LEFT JOIN collection ON video.id = collection.video '
+                      'LEFT JOIN collection ON video.id = collection.video OR video.ROWID = collection.video '
                      'LEFT JOIN playlist ON collection.playlist=playlist.folder '
                      'WHERE playlist.ROWID = :playlist',
                      {'playlist': playlist})
@ -145,6 +144,10 @@ def update():
                   {'url_rowid': url_rowid},
                   True)[0]

+    if url is None:
+        flash('Playlist has no URL. It probably was added from disk.', 'danger')
+        return redirect(request.args.get('from'))
+
    # kick off download process
    enqueue_download(url, update=True)

@ -179,3 +182,10 @@ def serve():
            'video/mp4',
            True
        )
+
+
+@frontend.route('/ingest', methods=['GET'])
+def ingest():
+    enqueue_ingest()
+    flash('Ingest started', 'primary')
+    return redirect('/library')
--- a/schema.sql
+++ b/schema.sql
@ -1,6 +1,6 @@
 /*
    - unique youtube id / watch key identifies the song
-    - id is nullable for todo: ingest from local dev
+    - id is nullable for ingest from filesystem
    - name is title of video
    - ext is the file extension / type
    - path is relative to 'project_root/downloads/'
@ -24,16 +24,19 @@ CREATE TABLE IF NOT EXISTS video (
        example for folder:
            - 'playlist_name/'
            - 'playlist_name/playlist_ROWID/'
+    - url is nullable for manual ingest
 */
 CREATE TABLE IF NOT EXISTS playlist (
    folder TEXT PRIMARY KEY,
    name TEXT NOT NULL,
-    url TEXT UNIQUE NOT NULL
+    url TEXT UNIQUE
 );

 /*
    - playlist equals folder
-    - video equals id
+    - video equals id or rowid if from manual ingest
+        - you better hope you dont have a video id with
+            only numbers
    - simple n-m mapping
        (playlist contains multiple songs)
        (song can be in multiple playlists)