diff --git a/Cargo.lock b/Cargo.lock
index 7df0a88..14a7cdb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11,6 +11,15 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "anes"
version = "0.1.6"
@@ -73,6 +82,45 @@ version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
+[[package]]
+name = "asn1-rs"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0"
+dependencies = [
+ "asn1-rs-derive",
+ "asn1-rs-impl",
+ "displaydoc",
+ "nom",
+ "num-traits",
+ "rusticata-macros",
+ "thiserror",
+ "time",
+]
+
+[[package]]
+name = "asn1-rs-derive"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "synstructure",
+]
+
+[[package]]
+name = "asn1-rs-impl"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
[[package]]
name = "autocfg"
version = "1.5.0"
@@ -107,12 +155,53 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+[[package]]
+name = "cbom-generator"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "regex",
+ "scanner-core",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "toml",
+ "uuid",
+ "walkdir",
+ "x509-parser",
+]
+
+[[package]]
+name = "cc"
+version = "1.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65193589c6404eb80b450d618eaf9a2cafaaafd57ecce47370519ef674a7bd44"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
[[package]]
name = "cfg-if"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+[[package]]
+name = "chrono"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link 0.2.0",
+]
+
[[package]]
name = "ciborium"
version = "0.2.2"
@@ -144,18 +233,11 @@ dependencies = [
name = "cipherscope"
version = "0.1.0"
dependencies = [
- "aho-corasick",
"anyhow",
+ "cbom-generator",
"clap",
- "crossbeam-channel",
- "detector-kotlin",
- "detector-objc",
- "detector-swift",
- "ignore",
"indicatif",
- "once_cell",
"rayon",
- "regex",
"scanner-core",
"serde",
"serde_json",
@@ -193,7 +275,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -221,6 +303,12 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
[[package]]
name = "criterion"
version = "0.5.1"
@@ -298,91 +386,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
-name = "detector-c"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-cpp"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-erlang"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-go"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-java"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-kotlin"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-objc"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
-
-[[package]]
-name = "detector-php"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "scanner-core",
-]
+name = "data-encoding"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
[[package]]
-name = "detector-python"
-version = "0.1.0"
+name = "der-parser"
+version = "8.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e"
dependencies = [
- "anyhow",
- "scanner-core",
+ "asn1-rs",
+ "displaydoc",
+ "nom",
+ "num-bigint",
+ "num-traits",
+ "rusticata-macros",
]
[[package]]
-name = "detector-rust"
-version = "0.1.0"
+name = "deranged"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc"
dependencies = [
- "anyhow",
- "scanner-core",
+ "powerfmt",
]
[[package]]
-name = "detector-swift"
-version = "0.1.0"
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
- "anyhow",
- "scanner-core",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
]
[[package]]
@@ -419,6 +459,12 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d"
+
[[package]]
name = "getrandom"
version = "0.3.3"
@@ -472,6 +518,30 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+[[package]]
+name = "iana-time-zone"
+version = "0.1.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
[[package]]
name = "ignore"
version = "0.4.23"
@@ -553,6 +623,12 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
[[package]]
name = "libc"
version = "0.2.175"
@@ -578,12 +654,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
-name = "memmap2"
-version = "0.9.8"
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
- "libc",
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
]
[[package]]
@@ -611,6 +719,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+[[package]]
+name = "oid-registry"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9bedf36ffb6ba96c2eb7144ef6270557b52e54b20c0a8e1eb2ff99a6c6959bff"
+dependencies = [
+ "asn1-rs",
+]
+
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -663,6 +780,12 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
[[package]]
name = "proc-macro2"
version = "1.0.101"
@@ -736,6 +859,15 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
+[[package]]
+name = "rusticata-macros"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632"
+dependencies = [
+ "nom",
+]
+
[[package]]
name = "rustix"
version = "1.1.2"
@@ -780,15 +912,12 @@ dependencies = [
"crossbeam-channel",
"globset",
"ignore",
- "memmap2",
"num_cpus",
- "once_cell",
"rayon",
"regex",
"serde",
"serde_json",
"tempfile",
- "thiserror",
"toml",
]
@@ -809,7 +938,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -833,12 +962,35 @@ dependencies = [
"serde",
]
+[[package]]
+name = "sha1_smol"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
[[package]]
name = "syn"
version = "2.0.106"
@@ -850,6 +1002,18 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "synstructure"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "unicode-xid",
+]
+
[[package]]
name = "tempfile"
version = "3.22.0"
@@ -880,7 +1044,37 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "time"
+version = "0.3.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031"
+dependencies = [
+ "deranged",
+ "num-conv",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
+
+[[package]]
+name = "time-macros"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
+dependencies = [
+ "num-conv",
+ "time-core",
]
[[package]]
@@ -946,12 +1140,31 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+[[package]]
+name = "uuid"
+version = "1.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
+dependencies = [
+ "getrandom",
+ "js-sys",
+ "serde",
+ "sha1_smol",
+ "wasm-bindgen",
+]
+
[[package]]
name = "walkdir"
version = "2.5.0"
@@ -1003,7 +1216,7 @@ dependencies = [
"log",
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
"wasm-bindgen-shared",
]
@@ -1025,7 +1238,7 @@ checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -1068,6 +1281,41 @@ dependencies = [
"windows-sys 0.61.0",
]
+[[package]]
+name = "windows-core"
+version = "0.62.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link 0.2.0",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
[[package]]
name = "windows-link"
version = "0.1.3"
@@ -1080,6 +1328,24 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
+[[package]]
+name = "windows-result"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
+dependencies = [
+ "windows-link 0.2.0",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
+dependencies = [
+ "windows-link 0.2.0",
+]
+
[[package]]
name = "windows-sys"
version = "0.59.0"
@@ -1250,3 +1516,20 @@ name = "wit-bindgen"
version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36"
+
+[[package]]
+name = "x509-parser"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7069fba5b66b9193bd2c5d3d4ff12b839118f6bcbef5328efafafb5395cf63da"
+dependencies = [
+ "asn1-rs",
+ "data-encoding",
+ "der-parser",
+ "lazy_static",
+ "nom",
+ "oid-registry",
+ "rusticata-macros",
+ "thiserror",
+ "time",
+]
diff --git a/Cargo.toml b/Cargo.toml
index 1b27eb1..e86d4e4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,17 +1,7 @@
[workspace]
members = [
"crates/scanner-core",
- "crates/detector-go",
- "crates/detector-java",
- "crates/detector-c",
- "crates/detector-cpp",
- "crates/detector-rust",
- "crates/detector-python",
- "crates/detector-php",
- "crates/detector-swift",
- "crates/detector-objc",
- "crates/detector-kotlin",
- "crates/detector-erlang",
+ "crates/cbom-generator",
"crates/cli",
]
resolver = "2"
@@ -26,20 +16,20 @@ repository = "https://example.com/cipherscope/repo"
[workspace.dependencies]
anyhow = "1"
-thiserror = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
toml = "0.8"
regex = "1"
aho-corasick = "1"
-once_cell = "1"
rayon = "1"
ignore = "0.4"
-memmap2 = "0.9"
clap = { version = "4", features = ["derive"] }
-humantime = "2"
globset = "0.4"
crossbeam-channel = "0.5"
walkdir = "2"
num_cpus = "1"
+uuid = { version = "1", features = ["v4", "v5", "serde"] }
+x509-parser = "0.15"
+chrono = { version = "0.4", features = ["serde"] }
+tempfile = "3"
diff --git a/README.md b/README.md
index f21f70e..c97cfa7 100644
--- a/README.md
+++ b/README.md
@@ -1,166 +1,100 @@
-## CipherScope
+# CipherScope
-Fast, low-false-positive static scanner that finds third-party cryptographic libraries and call sites across 11 programming languages: Go, Java, C, C++, Rust, Python, PHP, Swift, Objective-C, Kotlin, and Erlang.
+Fast cryptographic inventory generator that creates Minimal Viable Cryptographic Bill of Materials (MV-CBOM) documents. Scans codebases to identify cryptographic algorithms, certificates, and assess post-quantum cryptography readiness.
-### Install & Run
+## Quick Start
```bash
cargo build --release
-./target/release/cipherscope .
+./target/release/cipherscope --patterns patterns.toml --progress /path/to/scan [... paths]
```
-JSONL and SARIF:
+## What It Does
-```bash
-./target/release/cipherscope . --json > findings.jsonl
-./target/release/cipherscope . --sarif findings.sarif
-```
-
-Key flags:
-- `--threads N`: set thread pool size
-- `--max-file-size MB`: skip large files (default 2)
-- `--patterns PATH`: specify patterns file (default: `patterns.toml`)
-- `--progress`: show progress bar during scanning
-- `--include-glob GLOB` / `--exclude-glob GLOB`
-- `--deterministic`: stable output ordering
-- `--print-config`: print loaded `patterns.toml`
-- `--dry-run`: list files to be scanned
-
-### Output
-
-Pretty table to stdout (default) and optional JSONL/SARIF.
-
-Example table:
-
-```text
-Language | Library | Count | Example
----------|---------|-------|--------
-Rust | RustCrypto | 2 | src/main.rs:12 aes_gcm::Aes256Gcm
-```
+- **Detects** cryptographic usage across 11 languages
+- **Identifies** many cryptographic algorithms (AES, SHA, RSA, ECDSA, ChaCha20, etc.)
+- **Outputs** JSON inventory with NIST quantum security levels
+- **Runs fast** - GiB/s throughput with parallel scanning
-JSONL example:
+## Example Output
```json
-{"language":"Rust","library":"RustCrypto","file":"src/main.rs","span":{"line":12,"column":5},"symbol":"aes_gcm::Aes256Gcm","snippet":"use aes_gcm::Aes256Gcm;","detector_id":"detector-rust"}
+{
+ "bomFormat": "MV-CBOM",
+ "specVersion": "1.0",
+ "cryptoAssets": [{
+ "name": "RSA",
+ "assetProperties": {
+ "primitive": "signature",
+ "parameterSet": {"keySize": 2048},
+ "nistQuantumSecurityLevel": 0
+ }
+ }]
+}
```
-SARIF snippet:
-
-```json
-{"version":"2.1.0","runs":[{"tool":{"driver":{"name":"cipherscope"}},"results":[{"ruleId":"detector-rust","message":{"text":"RustCrypto in Rust"}}]}]}
-```
-
-### Configuration & Patterns
-
-Patterns are loaded from `patterns.toml` (and optional `patterns.local.toml`, if you add it). The schema supports per-language `include`/`import`/`namespace`/`apis` anchored regexes. The engine strips comments and avoids string literals to reduce false positives.
-
-#### Supported Languages & File Extensions
-
-The scanner automatically detects and processes files with these extensions:
+## Options
-- **C/C++**: `.c`, `.h`, `.cc`, `.cpp`, `.cxx`, `.c++`, `.hpp`, `.hxx`, `.h++`, `.hh`
-- **Java**: `.java`
-- **Go**: `.go`
-- **Rust**: `.rs`
-- **Python**: `.py`, `.pyw`, `.pyi`
-- **PHP**: `.php`, `.phtml`, `.php3`, `.php4`, `.php5`, `.phps`
-- **Swift**: `.swift`
-- **Objective-C**: `.m`, `.mm`, `.M`
-- **Kotlin**: `.kt`, `.kts`
-- **Erlang**: `.erl`, `.hrl`, `.beam`
+### Core Options
+- `--patterns PATH` - Custom patterns file (default: `patterns.toml`)
+- `--progress` - Show progress bar during scanning
+- `--deterministic` - Reproducible output for testing/ground-truth generation
+- `--output FILE` - Output file for single-project CBOM (default: stdout)
+- `--recursive` - Generate MV-CBOMs for all discovered projects
+- `--output-dir DIR` - Output directory for recursive CBOMs
-#### High-Performance Architecture
+### Filtering & Performance
+- `--threads N` - Number of processing threads
+- `--max-file-size MB` - Maximum file size to scan (default: 2MB)
+- `--include-glob GLOB` - Include files matching glob pattern(s)
+- `--exclude-glob GLOB` - Exclude files matching glob pattern(s)
-CipherScope uses a **producer-consumer model** inspired by ripgrep to achieve maximum throughput on large codebases:
+### Certificate Scanning
+- `--skip-certificates` - Skip certificate scanning during CBOM generation
-**Producer (Parallel Directory Walker)**:
-- Uses `ignore::WalkParallel` for parallel filesystem traversal
-- Automatically respects `.gitignore` files and skips hidden directories
-- Critical optimization: avoids descending into `node_modules`, `.git`, and other irrelevant directories
-- Language detection happens early to filter files before expensive operations
+### Configuration
+- `--print-config` - Print merged patterns/config and exit
-**Consumers (Parallel File Processors)**:
-- Uses `rayon` thread pools for parallel file processing
-- Batched processing (1000 files per batch) for better cache locality
-- Comment stripping and preprocessing shared across all detectors
-- Lockless atomic counters for progress tracking
+## Languages Supported
-**Key Optimizations**:
-- **Ultra-fast language detection**: Direct byte comparison, no string allocations
-- **Syscall reduction**: 90% fewer `metadata()` calls through early filtering
-- **Aho-Corasick prefiltering**: Skip expensive regex matching when no keywords found
-- **Batched channel communication**: Reduces overhead between producer/consumer threads
-- **Optimal thread configuration**: Automatically uses `num_cpus` for directory traversal
+C, C++, Go, Java, Kotlin, Python, Rust, Swift, Objective-C, PHP, Erlang
-#### Performance Benchmarks
+## Configuration
-**File Discovery Performance**:
-- **5M file directory**: ~20-30 seconds (previously 90+ seconds)
-- **Throughput**: 150,000-250,000 files/second discovery rate
-- **Processing**: 4+ GiB/s content scanning throughput
+Edit `patterns.toml` to add new libraries or algorithms. No code changes needed.
-**Scalability**:
-- Linear scaling with CPU cores for file processing
-- Efficient memory usage through batched processing
-- Progress reporting accuracy: 100% (matches `find` command results)
+## How It Works (High-Level)
-### Detector Architecture
+1. Workspace discovery and prefilter
+ - Walks files respecting .gitignore
+ - Cheap Aho-Corasick prefilter using language-specific substrings derived from patterns
+2. Language detection and comment stripping
+ - Detects language by extension; strips comments once for fast regex matching
+3. Library identification (anchors)
+ - Per-language detector loads compiled patterns for that language (from `patterns.toml`)
+ - Looks for include/import/namespace/API anchors to confirm a library is present in a file
+4. Algorithm matching
+ - For each identified library, matches algorithm `symbol_patterns` (regex) against the file
+ - Extracts parameters via `parameter_patterns` (e.g., key size, curve) with defaults when absent
+ - Emits findings with file, line/column, library, algorithm, primitive, and NIST quantum level
+5. Deep static analysis (fallback/enrichment)
+ - For small scans, analyzes files directly with the registry to find additional algorithms even if no library finding was produced
+6. CBOM generation
+ - Findings are deduplicated and merged
+ - Final MV-CBOM JSON is printed or written per CLI options
-The scanner uses a modular detector architecture with dedicated crates for each language:
+All behavior is driven by `patterns.toml` — adding new libraries/algorithms is a data-only change.
-- **detector-c**: C language support
-- **detector-cpp**: C++ language support
-- **detector-go**: Go language support
-- **detector-java**: Java language support
-- **detector-rust**: Rust language support
-- **detector-python**: Python language support
-- **detector-php**: PHP language support
-- **detector-swift**: Swift language support
-- **detector-objc**: Objective-C language support
-- **detector-kotlin**: Kotlin language support
-- **detector-erlang**: Erlang language support
-
-Each detector implements the `Detector` trait and can be extended independently. To add support for a new language, create a new detector crate under `crates/` or extend the `patterns.toml` to cover additional libraries. See `crates/scanner-core/src/lib.rs` for the trait definition and pattern-driven detector implementation.
-
-### Tests & Benchmarks
-
-Run unit tests and integration tests (fixtures):
+## Testing
```bash
cargo test
```
-Benchmark scan throughput on test fixtures:
-
-```bash
-cargo bench
-```
-
-**Expected benchmark results** (on modern hardware):
-- **Throughput**: ~4.2 GiB/s content processing
-- **File discovery**: 150K-250K files/second
-- **Memory efficient**: Batched processing prevents memory spikes
-
-**Real-world performance** (5M file Java codebase):
-- **Discovery phase**: 20-30 seconds (down from 90+ seconds)
-- **Processing phase**: Depends on file content and pattern complexity
-- **Progress accuracy**: Exact match with `find` command results
-
-To test progress reporting accuracy on your codebase:
-
-```bash
-# Count files that match your glob patterns
-find /path/to/code -name "*.java" | wc -l
-
-# Run cipherscope with same pattern - numbers should match
-./target/release/cipherscope /path/to/code --include-glob "*.java" --progress
-```
-
-### Contributing
-
-See `CONTRIBUTING.md` for guidelines on adding languages, libraries, and improving performance.
+## License
+MIT
diff --git a/crates/cbom-generator/Cargo.toml b/crates/cbom-generator/Cargo.toml
new file mode 100644
index 0000000..7920bfd
--- /dev/null
+++ b/crates/cbom-generator/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "cbom-generator"
+version = "0.1.0"
+edition = "2021"
+license = "Apache-2.0"
+
+[dependencies]
+scanner-core = { path = "../scanner-core" }
+anyhow = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+toml = { workspace = true }
+uuid = { workspace = true }
+x509-parser = { workspace = true }
+chrono = { workspace = true }
+regex = { workspace = true }
+walkdir = { workspace = true }
+
+[dev-dependencies]
+tempfile = { workspace = true }
+
+[lib]
+name = "cbom_generator"
+path = "src/lib.rs"
\ No newline at end of file
diff --git a/crates/cbom-generator/src/algorithm_detector.rs b/crates/cbom-generator/src/algorithm_detector.rs
new file mode 100644
index 0000000..3a1edaf
--- /dev/null
+++ b/crates/cbom-generator/src/algorithm_detector.rs
@@ -0,0 +1,485 @@
+//! Algorithm detection functionality for extracting cryptographic algorithms from source code
+
+use anyhow::{Context, Result};
+use scanner_core::{CompiledAlgorithm, Finding, LineIndex, PatternRegistry, Scanner};
+use serde_json::json;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::path::Path;
+use uuid::Uuid;
+use walkdir::WalkDir;
+
+use crate::{
+ AlgorithmProperties, AssetEvidence, AssetProperties, AssetType, CryptoAsset,
+ CryptographicPrimitive,
+};
+
+/// Detector for cryptographic algorithms in source code
+#[derive(Default)]
+pub struct AlgorithmDetector {
+ /// Reference to the pattern registry for algorithm definitions
+ registry: Option>,
+ /// Deterministic mode for stable IDs during tests/ground-truth generation
+ deterministic: bool,
+}
+
+impl AlgorithmDetector {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ pub fn with_registry(registry: std::sync::Arc) -> Self {
+ Self {
+ registry: Some(registry),
+ deterministic: false,
+ }
+ }
+
+ pub fn with_registry_and_mode(
+ registry: std::sync::Arc,
+ deterministic: bool,
+ ) -> Self {
+ Self {
+ registry: Some(registry),
+ deterministic,
+ }
+ }
+
+ /// Detect algorithms from scanner findings using pattern registry
+ pub fn detect_algorithms(
+ &self,
+ scan_path: &Path,
+ findings: &[Finding],
+ ) -> Result> {
+ let registry = match &self.registry {
+ Some(registry) => registry,
+ None => return Ok(Vec::new()),
+ };
+
+ let mut algorithms = Vec::new();
+ let mut seen_algorithms = HashSet::new();
+
+ // Extract algorithms from findings using registry patterns
+ for finding in findings {
+ if let Some(algorithm_assets) =
+ self.extract_algorithms_from_finding_with_registry(finding, registry)?
+ {
+ for asset in algorithm_assets {
+ let key = self.create_deduplication_key(&asset);
+ if seen_algorithms.insert(key) {
+ algorithms.push(asset);
+ }
+ }
+ }
+ }
+
+ // Always perform deep static analysis regardless of findings count
+ let additional_algorithms =
+ self.perform_deep_static_analysis_with_registry(scan_path, registry)?;
+ for asset in additional_algorithms {
+ let key = self.create_deduplication_key(&asset);
+ if seen_algorithms.insert(key) {
+ algorithms.push(asset);
+ }
+ }
+
+ // Merge duplicate algorithms with different parameter specificity
+ Ok(self.merge_algorithm_assets(algorithms))
+ }
+
+ /// Extract algorithms from finding using pattern registry
+ fn extract_algorithms_from_finding_with_registry(
+ &self,
+ finding: &Finding,
+ registry: &PatternRegistry,
+ ) -> Result