Source code for selfclean_audio.datasets.utils
# Copyright (c) Lucerne University of Applied Sciences and Arts.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import os
import torch
[docs]
def fast_scandir(path: str, exts: list[str], recursive: bool = False):
"""
Quickly scan a directory for files with specified extensions.
From github.com/drscotthawley/aeiou/blob/main/aeiou/core.py
Args:
path (str): Directory path to scan.
exts (list[str]): List of file extensions to filter (e.g., ['.wav', '.mp3']).
recursive (bool): If True, scan subdirectories recursively. Defaults to False.
Returns:
tuple[list[str], list[str]]: A tuple containing a list of subfolder paths
and a list of matched file paths.
"""
subfolders, files = [], []
try: # hope to avoid 'permission denied' by this try
for f in os.scandir(path):
try: # 'hope to avoid too many levels of symbolic links' error
if f.is_dir():
subfolders.append(f.path)
elif f.is_file():
if os.path.splitext(f.name)[1].lower() in exts:
files.append(f.path)
except Exception:
pass
except Exception:
pass
if recursive:
for path in list(subfolders):
sf, f = fast_scandir(path, exts, recursive=recursive)
subfolders.extend(sf)
files.extend(f) # type: ignore
return subfolders, files