Skip to content

Commit

Permalink
fixup! Make built-in adapters' identifiers configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
lafrenierejm committed Sep 9, 2024
1 parent 3f8efba commit 43243e0
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 232 deletions.
197 changes: 106 additions & 91 deletions src/adapters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ pub struct AdapterMeta {
/// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
pub recurses: bool,
/// list of matchers (interpreted as a OR b OR ...)
pub fast_matchers: Option<Vec<FastFileMatcher>>,
pub fast_matchers: Vec<FastFileMatcher>,
/// list of matchers when we have mime type detection active (interpreted as ORed)
/// warning: this *overrides* the fast matchers
pub slow_matchers: Option<Vec<FileMatcher>>,
pub slow_matchers: Vec<FileMatcher>,
/// if true, slow_matchers is merged with fast matchers if accurate is enabled
/// for example, in sqlite you want this disabled since the db extension can mean other things and the mime type matching is very accurate for sqlite.
/// but for tar you want it enabled, since the tar extension is very accurate but the tar mime matcher can have false negatives
Expand All @@ -57,19 +57,18 @@ impl AdapterMeta {
&self.slow_matchers,
&self.fast_matchers,
) {
(true, false, Some(ref sm), _) => Box::new(sm.iter().map(Cow::Borrowed)),
(true, true, Some(ref sm), Some(ref fm)) => Box::new(
(true, false, sm, _) => Box::new(sm.iter().map(Cow::Borrowed)),
(true, true, sm, fm) => Box::new(
sm.iter().map(Cow::Borrowed).chain(
fm.iter()
.map(|e| Cow::Owned(FileMatcher::Fast(e.clone())))
.collect::<Vec<_>>(),
),
),
// don't have slow matchers or slow matching disabled
(true, _, None, Some(ref fm)) | (false, _, _, Some(ref fm)) => {
// slow matching disabled
(false, _, _, fm) => {
Box::new(fm.iter().map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))))
}
_ => Box::new(::std::iter::empty()),
}
}
}
Expand All @@ -81,26 +80,25 @@ pub trait Adapter {
fn recurses(&self) -> bool;
fn disabled_by_default(&self) -> bool;
fn keep_fast_matchers_if_accurate(&self) -> bool;
fn extensions(&self) -> Option<Vec<String>>;
fn mimetypes(&self) -> Option<Vec<String>>;
fn extensions(&self) -> Vec<String>;
fn mimetypes(&self) -> Vec<String>;

fn metadata(&self) -> AdapterMeta {
return AdapterMeta {
name: self.name(),
version: self.version(),
description: self.description(),
recurses: true,
fast_matchers: self.extensions().map(|exts| {
exts.iter()
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect()
}),
slow_matchers: self.mimetypes().map(|mimetypes| {
mimetypes
.iter()
.map(|mimetype| FileMatcher::MimeType(mimetype.to_string()))
.collect()
}),
fast_matchers: self
.extensions()
.iter()
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(),
slow_matchers: self
.mimetypes()
.iter()
.map(|mimetype| FileMatcher::MimeType(mimetype.to_string()))
.collect(),
disabled_by_default: self.disabled_by_default(),
keep_fast_matchers_if_accurate: self.keep_fast_matchers_if_accurate(),
};
Expand Down Expand Up @@ -141,6 +139,80 @@ pub fn get_all_adapters(
custom_identifiers: Option<CustomIdentifiers>,
custom_adapters: Option<Vec<CustomAdapterConfig>>,
) -> AdaptersTuple {
let mut ffmpeg_extensions = strs(ffmpeg::EXTENSIONS);
let mut ffmpeg_mimetypes = strs(ffmpeg::MIMETYPES);
let mut gz_extensions = strs(decompress::EXTENSIONS_GZ);
let mut gz_mimetypes = strs(decompress::MIMETYPES_GZ);
let mut bz2_extensions = strs(decompress::EXTENSIONS_BZ2);
let mut bz2_mimetypes = strs(decompress::MIMETYPES_BZ2);
let mut xz_extensions = strs(decompress::EXTENSIONS_XZ);
let mut xz_mimetypes = strs(decompress::MIMETYPES_XZ);
let mut zst_extensions = strs(decompress::EXTENSIONS_ZST);
let mut zst_mimetypes = strs(decompress::MIMETYPES_ZST);
let mut zip_extensions = strs(zip::EXTENSIONS);
let mut zip_mimetypes = strs(zip::MIMETYPES);
let mut mbox_extensions = strs(mbox::EXTENSIONS);
let mut mbox_mimetypes = strs(mbox::MIMETYPES);

if let Some(identifiers) = custom_identifiers {
if let Some(identifier) = identifiers.ffmpeg {
if let Some(extensions) = identifier.extensions {
ffmpeg_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
ffmpeg_mimetypes = mimetypes;
}
}
if let Some(identifier) = identifiers.gz {
if let Some(extensions) = identifier.extensions {
gz_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
gz_mimetypes = mimetypes;
}
}
if let Some(identifier) = identifiers.bz2 {
if let Some(extensions) = identifier.extensions {
bz2_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
bz2_mimetypes = mimetypes;
}
}
if let Some(identifier) = identifiers.xz {
if let Some(extensions) = identifier.extensions {
xz_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
xz_mimetypes = mimetypes;
}
}
if let Some(identifier) = identifiers.zst {
if let Some(extensions) = identifier.extensions {
zst_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
zst_mimetypes = mimetypes;
}
}
if let Some(identifier) = identifiers.zip {
if let Some(extensions) = identifier.extensions {
zip_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
zip_mimetypes = mimetypes;
}
}
if let Some(identifier) = identifiers.mbox {
if let Some(extensions) = identifier.extensions {
mbox_extensions = extensions;
}
if let Some(mimetypes) = identifier.mimetypes {
mbox_mimetypes = mimetypes;
}
}
}

// order in descending priority
let mut adapters: Vec<Arc<dyn FileAdapter>> = vec![];
if let Some(custom_adapters) = custom_adapters {
Expand All @@ -149,86 +221,29 @@ pub fn get_all_adapters(
}
}

let custom_identifiers = custom_identifiers.unwrap_or_default();
let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![
Arc::new(PostprocPageBreaks::default()),
Arc::new(ffmpeg::FFmpegAdapter {
extensions: custom_identifiers
.ffmpeg
.extensions
.clone()
.unwrap_or_else(|| strs(ffmpeg::EXTENSIONS)),
mimetypes: custom_identifiers
.ffmpeg
.mimetypes
.clone()
.unwrap_or_else(|| strs(ffmpeg::MIMETYPES)),
extensions: ffmpeg_extensions,
mimetypes: ffmpeg_mimetypes,
}),
Arc::new(zip::ZipAdapter {
extensions: custom_identifiers
.zip
.extensions
.clone()
.unwrap_or_else(|| strs(zip::EXTENSIONS)),
mimetypes: custom_identifiers
.zip
.mimetypes
.clone()
.unwrap_or_else(|| strs(zip::MIMETYPES)),
extensions: zip_extensions,
mimetypes: zip_mimetypes,
}),
Arc::new(decompress::DecompressAdapter {
extensions_gz: custom_identifiers
.gz
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::EXTENSIONS_GZ)),
extensions_bz2: custom_identifiers
.bz2
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::EXTENSIONS_BZ2)),
extensions_xz: custom_identifiers
.xz
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::EXTENSIONS_XZ)),
extensions_zst: custom_identifiers
.zst
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::EXTENSIONS_ZST)),
mimetypes_gz: custom_identifiers
.gz
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::MIMETYPES_GZ)),
mimetypes_bz2: custom_identifiers
.bz2
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::MIMETYPES_BZ2)),
mimetypes_xz: custom_identifiers
.xz
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::MIMETYPES_XZ)),
mimetypes_zst: custom_identifiers
.zst
.extensions
.clone()
.unwrap_or_else(|| strs(decompress::MIMETYPES_ZST)),
extensions_gz: gz_extensions,
extensions_bz2: bz2_extensions,
extensions_xz: xz_extensions,
extensions_zst: zst_extensions,
mimetypes_gz: gz_mimetypes,
mimetypes_bz2: bz2_mimetypes,
mimetypes_xz: xz_mimetypes,
mimetypes_zst: zst_mimetypes,
}),
Arc::new(mbox::MboxAdapter {
extensions: custom_identifiers
.mbox
.extensions
.clone()
.unwrap_or_else(|| strs(mbox::EXTENSIONS)),
mimetypes: custom_identifiers
.mbox
.mimetypes
.clone()
.unwrap_or_else(|| strs(mbox::MIMETYPES)),
extensions: mbox_extensions,
mimetypes: mbox_mimetypes,
}),
Arc::new(tar::TarAdapter::new()),
Arc::new(sqlite::SqliteAdapter::new()),
Expand Down
Loading

0 comments on commit 43243e0

Please sign in to comment.