diff --git a/Pipfile.lock b/Pipfile.lock index aaf687a6..f3af7d31 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -18,12 +18,12 @@ "default": { "cachetools": { "hashes": [ - "sha256:69a7a52634fed8b8bf6e24a050fb60bff1c9bd8f6d24572b99c32d4e71e62a51", - "sha256:82c5c05585e70b6ba2d3ae09ea60b79548872185d2f24ae1f2709d37299fd607" + "sha256:a9abf18ff3b86c7d05b27ead412e235e16ae045925e531fae38d5fada5ed5b08", + "sha256:d52fef60e6e964a1969cfb61ccf6242a801b432790fe520d78720d757c81cbd2" ], "index": "pypi", - "markers": "python_version >= '3.9'", - "version": "==6.2.4" + "markers": "python_version >= '3.10'", + "version": "==7.0.0" }, "certifi": { "hashes": [ @@ -33,6 +33,96 @@ "markers": "python_version >= '3.7'", "version": "==2026.1.4" }, + "cffi": { + "hashes": [ + "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", + "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", + "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", + "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", + "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44", + "sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2", + "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", + "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", + "sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65", + "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", + "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a", + "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", + "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", + "sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a", + "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", + "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", + "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", + "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", + "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", + "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", + "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", + "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", + "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", + "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb", + "sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165", + "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", + "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", + "sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c", + "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", + "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c", + "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0", + "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", + "sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63", + "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", + "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", + "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", + "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", + "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", + "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", + "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", + "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", + "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", + "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", + "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", + "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", + "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", + "sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322", + "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb", + "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", + "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", + "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4", + "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", + "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", + "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", + "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", + "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", + "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739", + "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", + "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", + "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", + "sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9", + "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", + "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", + "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", + "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", + "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", + "sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f", + "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495", + "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", + "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", + "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", + "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", + "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", + "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", + "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", + "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", + "sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7", + "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5", + "sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534", + "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49", + "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", + "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", + "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453", + "sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf" + ], + "markers": "python_version >= '3.9'", + "version": "==2.0.0" + }, "charset-normalizer": { "hashes": [ "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", @@ -230,6 +320,61 @@ "markers": "python_version >= '3.11'", "version": "==1.3.3" }, + "cryptography": { + "hashes": [ + "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa", + "sha256:0563655cb3c6d05fb2afe693340bc050c30f9f34e15763361cf08e94749401fc", + "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da", + "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255", + "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2", + "sha256:281526e865ed4166009e235afadf3a4c4cba6056f99336a99efba65336fd5485", + "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0", + "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d", + "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616", + "sha256:44cc0675b27cadb71bdbb96099cca1fa051cd11d2ade09e5cd3a2edb929ed947", + "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0", + "sha256:485e2b65d25ec0d901bca7bcae0f53b00133bf3173916d8e421f6fddde103908", + "sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81", + "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc", + "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd", + "sha256:6225d3ebe26a55dbc8ead5ad1265c0403552a63336499564675b29eb3184c09b", + "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019", + "sha256:728fedc529efc1439eb6107b677f7f7558adab4553ef8669f0d02d42d7b959a7", + "sha256:766330cce7416c92b5e90c3bb71b1b79521760cdcfc3a6a1a182d4c9fab23d2b", + "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973", + "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b", + "sha256:82a62483daf20b8134f6e92898da70d04d0ef9a75829d732ea1018678185f4f5", + "sha256:8a15fb869670efa8f83cbffbc8753c1abf236883225aed74cd179b720ac9ec80", + "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef", + "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0", + "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b", + "sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e", + "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c", + "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2", + "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af", + "sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4", + "sha256:a9556ba711f7c23f77b151d5798f3ac44a13455cc68db7697a1096e6d0563cab", + "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82", + "sha256:be8c01a7d5a55f9a47d1888162b76c8f49d62b234d88f0ff91a9fbebe32ffbc3", + "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59", + "sha256:c236a44acfb610e70f6b3e1c3ca20ff24459659231ef2f8c48e879e2d32b73da", + "sha256:c411f16275b0dea722d76544a61d6421e2cc829ad76eec79280dbdc9ddf50061", + "sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085", + "sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b", + "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263", + "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e", + "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829", + "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4", + "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c", + "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f", + "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095", + "sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32", + "sha256:fa0900b9ef9c49728887d1576fd8d9e7e3ea872fa9b25ef9b64888adc434e976", + "sha256:fdc3daab53b212472f1524d070735b2f0c214239df131903bae1d598016fa822" + ], + "markers": "python_version >= '3.8' and python_full_version not in '3.9.0, 3.9.1'", + "version": "==46.0.4" + }, "cycler": { "hashes": [ "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", @@ -330,20 +475,20 @@ }, "google-api-python-client": { "hashes": [ - "sha256:d8d0f6d85d7d1d10bdab32e642312ed572bdc98919f72f831b44b9a9cebba32f", - "sha256:e98e8e8f49e1b5048c2f8276473d6485febc76c9c47892a8b4d1afa2c9ec8278" + "sha256:45f2d8559b5c895dde6ad3fb33de025f5cb2c197fa5862f18df7f5295a172741", + "sha256:a258c09660a49c6159173f8bbece171278e917e104a11f0640b34751b79c8a1a" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==2.187.0" + "version": "==2.189.0" }, "google-auth": { "hashes": [ - "sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da", - "sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498" + "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", + "sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce" ], "markers": "python_version >= '3.8'", - "version": "==2.47.0" + "version": "==2.48.0" }, "google-auth-httplib2": { "hashes": [ @@ -363,11 +508,11 @@ }, "httplib2": { "hashes": [ - "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c", - "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24" + "sha256:385e0869d7397484f4eab426197a4c020b606edd43372492337c0b4010ae5d24", + "sha256:dbf0c2fa3862acf3c55c078ea9c0bc4481d7dc5117cae71be9514912cf9f8349" ], "markers": "python_version >= '3.6'", - "version": "==0.31.0" + "version": "==0.31.2" }, "idna": { "hashes": [ @@ -695,151 +840,144 @@ }, "numpy": { "hashes": [ - "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", - "sha256:09aa8a87e45b55a1c2c205d42e2808849ece5c484b2aab11fecabec3841cafba", - "sha256:0cce2a669e3c8ba02ee563c7835f92c153cf02edff1ae05e1823f1dde21b16a5", - "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", - "sha256:0f118ce6b972080ba0758c6087c3617b5ba243d806268623dc34216d69099ba0", - "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", - "sha256:18e14c4d09d55eef39a6ab5b08406e84bc6869c1e34eef45564804f90b7e0574", - "sha256:2023ef86243690c2791fd6353e5b4848eedaa88ca8a2d129f462049f6d484696", - "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", - "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", - "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", - "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", - "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", - "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", - "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", - "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", - "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", - "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", - "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", - "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", - "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", - "sha256:4f1b68ff47680c2925f8063402a693ede215f0257f02596b1318ecdfb1d79e33", - "sha256:4f9c360ecef085e5841c539a9a12b883dff005fbd7ce46722f5e9cef52634d82", - "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", - "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", - "sha256:538bf4ec353709c765ff75ae616c34d3c3dca1a68312727e8f2676ea644f8509", - "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", - "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", - "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", - "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", - "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", - "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", - "sha256:6461de5113088b399d655d45c3897fa188766415d0f568f175ab071c8873bd73", - "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", - "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", - "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", - "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", - "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", - "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", - "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", - "sha256:8097529164c0f3e32bb89412a0905d9100bf434d9692d9fc275e18dcf53c9344", - "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", - "sha256:8361ea4220d763e54cff2fbe7d8c93526b744f7cd9ddab47afeff7e14e8503be", - "sha256:899d2c18024984814ac7e83f8f49d8e8180e2fbe1b2e252f2e7f1d06bea92425", - "sha256:8ad35f20be147a204e28b6a0575fbf3540c5e5f802634d4258d55b1ff5facce1", - "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", - "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", - "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", - "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", - "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", - "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", - "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", - "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", - "sha256:a92f227dbcdc9e4c3e193add1a189a9909947d4f8504c576f4a732fd0b54240a", - "sha256:ac08c63cb7779b85e9d5318e6c3518b424bc1f364ac4cb2c6136f12e5ff2dccc", - "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", - "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", - "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", - "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", - "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", - "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", - "sha256:d3f8f0df9f4b8be57b3bf74a1d087fec68f927a2fab68231fdb442bf2c12e426", - "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", - "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", - "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", - "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", - "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", - "sha256:ea66d2b41ca4a1630aae5507ee0a71647d3124d1741980138aa8f28f44dac36e", - "sha256:edee228f76ee2dab4579fad6f51f6a305de09d444280109e0f75df247ff21501", - "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", - "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", - "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c" + "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", + "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", + "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", + "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", + "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", + "sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413", + "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", + "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", + "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", + "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", + "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", + "sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390", + "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", + "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", + "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", + "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", + "sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3", + "sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1", + "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", + "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", + "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", + "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", + "sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0", + "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", + "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", + "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", + "sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7", + "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", + "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", + "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", + "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1", + "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", + "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", + "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", + "sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082", + "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", + "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", + "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", + "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", + "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", + "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", + "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", + "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", + "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", + "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", + "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", + "sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695", + "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", + "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", + "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", + "sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32", + "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", + "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", + "sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73", + "sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920", + "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", + "sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821", + "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", + "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", + "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", + "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", + "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", + "sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda", + "sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb", + "sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a", + "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", + "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", + "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", + "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", + "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", + "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", + "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1" ], "markers": "python_version >= '3.11'", - "version": "==2.4.1" + "version": "==2.4.2" }, "packaging": { "hashes": [ - "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", - "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", + "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529" ], "markers": "python_version >= '3.8'", - "version": "==25.0" + "version": "==26.0" }, "pandas": { "hashes": [ - "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", - "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", - "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", - "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", - "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73", - "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", - "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", - "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", - "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", - "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", - "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", - "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", - "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", - "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", - "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", - "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", - "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9", - "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", - "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", - "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", - "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", - "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", - "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", - "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", - "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", - "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", - "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff", - "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", - "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", - "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", - "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", - "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", - "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", - "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8", - "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", - "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", - "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", - "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", - "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", - "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29", - "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", - "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", - "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2", - "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", - "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa", - "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", - "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", - "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", - "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", - "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", - "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", - "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", - "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", - "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", - "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee" + "sha256:0192fee1f1a8e743b464a6607858ee4b071deb0b118eb143d71c2a1d170996d5", + "sha256:083b11415b9970b6e7888800c43c82e81a06cd6b06755d84804444f0007d6bb7", + "sha256:0facf7e87d38f721f0af46fe70d97373a37701b1c09f7ed7aeeb292ade5c050f", + "sha256:113b4cca2614ff7e5b9fee9b6f066618fe73c5a83e99d721ffc41217b2bf57dd", + "sha256:125eb901e233f155b268bbef9abd9afb5819db74f0e677e89a61b246228c71ac", + "sha256:14c2a4099cd38a1d18ff108168ea417909b2dea3bd1ebff2ccf28ddb6a74d740", + "sha256:15d59f885ee5011daf8335dff47dcb8a912a27b4ad7826dc6cbe809fd145d327", + "sha256:177d9df10b3f43b70307a149d7ec49a1229a653f907aa60a48f1877d0e6be3be", + "sha256:1c39eab3ad38f2d7a249095f0a3d8f8c22cc0f847e98ccf5bbe732b272e2d9fa", + "sha256:1fbbb5a7288719e36b76b4f18d46ede46e7f916b6c8d9915b756b0a6c3f792b3", + "sha256:24e6547fb64d2c92665dd2adbfa4e85fa4fd70a9c070e7cfb03b629a0bbab5eb", + "sha256:2713810ad3806767b89ad3b7b69ba153e1c6ff6d9c20f9c2140379b2a98b6c98", + "sha256:33fd3e6baa72899746b820c31e4b9688c8e1b7864d7aec2de7ab5035c285277a", + "sha256:3c9a1a149aed3b6c9bf246033ff91e1b02d529546c5d6fb6b74a28fea0cf4c70", + "sha256:412d1a89aab46889f3033a386912efcdfa0f1131c5705ff5b668dda88305e986", + "sha256:447b2d68ac5edcbf94655fe909113a6dba6ef09ad7f9f60c80477825b6c489fe", + "sha256:48ee04b90e2505c693d3f8e8f524dab8cb8aaf7ddcab52c92afa535e717c4812", + "sha256:4a4a400ca18230976724a5066f20878af785f36c6756e498e94c2a5e5d57779c", + "sha256:4a66384f017240f3858a4c8a7cf21b0591c3ac885cddb7758a589f0f71e87ebb", + "sha256:597c08fb9fef0edf1e4fa2f9828dd27f3d78f9b8c9b4a748d435ffc55732310b", + "sha256:5db1e62cb99e739fa78a28047e861b256d17f88463c76b8dafc7c1338086dca8", + "sha256:613e13426069793aa1ec53bdcc3b86e8d32071daea138bbcf4fa959c9cdaa2e2", + "sha256:66f72fb172959af42a459e27a8d8d2c7e311ff4c1f7db6deb3b643dbc382ae08", + "sha256:69780c98f286076dcafca38d8b8eee1676adf220199c0a39f0ecbf976b68151a", + "sha256:697b8f7d346c68274b1b93a170a70974cdc7d7354429894d5927c1effdcccd73", + "sha256:707a9a877a876c326ae2cb640fbdc4ef63b0a7b9e2ef55c6df9942dcee8e2af9", + "sha256:783ac35c4d0fe0effdb0d67161859078618b1b6587a1af15928137525217a721", + "sha256:8cb3120f0d9467ed95e77f67a75e030b67545bcfa08964e349252d674171def2", + "sha256:8e8b9808590fa364416b49b2a35c1f4cf2785a6c156935879e57f826df22038e", + "sha256:940eebffe55528074341a5a36515f3e4c5e25e958ebbc764c9502cfc35ba3faa", + "sha256:95683af6175d884ee89471842acfca29172a85031fccdabc35e50c0984470a0e", + "sha256:9803b31f5039b3c3b10cc858c5e40054adb4b29b4d81cb2fd789f4121c8efbcd", + "sha256:98212a38a709feb90ae658cb6227ea3657c22ba8157d4b8f913cd4c950de5e7e", + "sha256:a453aad8c4f4e9f166436994a33884442ea62aa8b27d007311e87521b97246e1", + "sha256:a8942e333dc67ceda1095227ad0febb05a3b36535e520154085db632c40ad084", + "sha256:afd0aa3d0b5cda6e0b8ffc10dbcca3b09ef3cbcd3fe2b27364f85fdc04e1989d", + "sha256:b78d646249b9a2bc191040988c7bb524c92fa8534fb0898a0741d7e6f2ffafa6", + "sha256:b86d113b6c109df3ce0ad5abbc259fe86a1bd4adfd4a31a89da42f84f65509bb", + "sha256:bc9cba7b355cb4162442a88ce495e01cb605f17ac1e27d6596ac963504e0305f", + "sha256:be8c515c9bc33989d97b89db66ea0cececb0f6e3c2a87fcc8b69443a6923e95f", + "sha256:c14837eba8e99a8da1527c0280bba29b0eb842f64aa94982c5e21227966e164b", + "sha256:d257699b9a9960e6125686098d5714ac59d05222bef7a5e6af7a7fd87c650801", + "sha256:d64ce01eb9cdca96a15266aa679ae50212ec52757c79204dbc7701a222401850", + "sha256:da768007b5a33057f6d9053563d6b74dd6d029c337d93c6d0d22a763a5c2ecc0", + "sha256:debb95c77ff3ed3ba0d9aa20c3a2f19165cc7956362f9873fce1ba0a53819d70", + "sha256:e979d22316f9350c516479dd3a92252be2937a9531ed3a26ec324198a99cdd49", + "sha256:f0b853319dec8d5e0c8b875374c078ef17f2269986a78168d9bd57e49bf650ae", + "sha256:fedabf175e7cd82b69b74c30adbaa616de301291a5231138d7242596fc296a8d" ], "index": "pypi", - "markers": "python_version >= '3.9'", - "version": "==2.3.3" + "markers": "python_version >= '3.11'", + "version": "==3.0.0" }, "pillow": { "hashes": [ @@ -940,11 +1078,11 @@ }, "proto-plus": { "hashes": [ - "sha256:1baa7f81cf0f8acb8bc1f6d085008ba4171eaf669629d1b6d1673b21ed1c0a82", - "sha256:873af56dd0d7e91836aee871e5799e1c6f1bda86ac9a983e0bb9f0c266a568c4" + "sha256:912a7460446625b792f6448bade9e55cd4e41e6ac10e27009ef71a7f317fa147", + "sha256:e4643061f3a4d0de092d62aa4ad09fa4756b2cbb89d4627f3985018216f9fefc" ], "markers": "python_version >= '3.7'", - "version": "==1.27.0" + "version": "==1.27.1" }, "protobuf": { "hashes": [ @@ -980,6 +1118,14 @@ "markers": "python_version >= '3.8'", "version": "==0.4.2" }, + "pycparser": { + "hashes": [ + "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", + "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992" + ], + "markers": "python_version >= '3.10'", + "version": "==3.0" + }, "pygments": { "hashes": [ "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", @@ -991,11 +1137,11 @@ }, "pyparsing": { "hashes": [ - "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82", - "sha256:47fad0f17ac1e2cad3de3b458570fbc9b03560aa029ed5e16ee5554da9a2251c" + "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", + "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc" ], "markers": "python_version >= '3.9'", - "version": "==3.3.1" + "version": "==3.3.2" }, "python-dateutil": { "hashes": [ @@ -1145,37 +1291,37 @@ "develop": { "black": { "hashes": [ - "sha256:05dd459a19e218078a1f98178c13f861fe6a9a5f88fc969ca4d9b49eb1809783", - "sha256:09524b0e6af8ba7a3ffabdfc7a9922fb9adef60fed008c7cd2fc01f3048e6e6f", - "sha256:0a0953b134f9335c2434864a643c842c44fba562155c738a2a37a4d61f00cad5", - "sha256:0e509c858adf63aa61d908061b52e580c40eae0dfa72415fa47ac01b12e29baf", - "sha256:169506ba91ef21e2e0591563deda7f00030cb466e747c4b09cb0a9dae5db2f43", - "sha256:17dcc893da8d73d8f74a596f64b7c98ef5239c2cd2b053c0f25912c4494bf9ea", - "sha256:1a2f578ae20c19c50a382286ba78bfbeafdf788579b053d8e4980afb079ab9be", - "sha256:2355bbb6c3b76062870942d8cc450d4f8ac71f9c93c40122762c8784df49543f", - "sha256:252678f07f5bac4ff0d0e9b261fbb029fa530cfa206d0a636a34ab445ef8ca9d", - "sha256:274f940c147ddab4442d316b27f9e332ca586d39c85ecf59ebdea82cc9ee8892", - "sha256:31f96b7c98c1ddaeb07dc0f56c652e25bdedaac76d5b68a059d998b57c55594a", - "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", - "sha256:51e267458f7e650afed8445dc7edb3187143003d52a1b710c7321aef22aa9655", - "sha256:546eecfe9a3a6b46f9d69d8a642585a6eaf348bcbbc4d87a19635570e02d9f4a", - "sha256:778285d9ea197f34704e3791ea9404cd6d07595745907dd2ce3da7a13627b29b", - "sha256:8d3dd9cea14bff7ddc0eb243c811cdb1a011ebb4800a5f0335a01a68654796a7", - "sha256:9678bd991cc793e81d19aeeae57966ee02909877cb65838ccffef24c3ebac08f", - "sha256:97596189949a8aad13ad12fcbb4ae89330039b96ad6742e6f6b45e75ad5cfd83", - "sha256:9ec77439ef3e34896995503865a85732c94396edcc739f302c5673a2315e1e7f", - "sha256:a05ddeb656534c3e27a05a29196c962877c83fa5503db89e68857d1161ad08a5", - "sha256:a3fa71e3b8dd9f7c6ac4d818345237dfb4175ed3bf37cd5a581dbc4c034f1ec5", - "sha256:b162653ed89eb942758efeb29d5e333ca5bb90e5130216f8369857db5955a7da", - "sha256:bc5b1c09fe3c931ddd20ee548511c64ebf964ada7e6f0763d443947fd1c603ce", - "sha256:c1f68c5eff61f226934be6b5b80296cf6939e5d2f0c2f7d543ea08b204bfaf59", - "sha256:d0cfa263e85caea2cff57d8f917f9f51adae8e20b610e2b23de35b5b11ce691a", - "sha256:d3e1b65634b0e471d07ff86ec338819e2ef860689859ef4501ab7ac290431f9b", - "sha256:f85ba1ad15d446756b4ab5f3044731bf68b777f8f9ac9cdabd2425b97cd9c4e8" + "sha256:101540cb2a77c680f4f80e628ae98bd2bd8812fb9d72ade4f8995c5ff019e82c", + "sha256:1054e8e47ebd686e078c0bb0eaf31e6ce69c966058d122f2c0c950311f9f3ede", + "sha256:1de0f7d01cc894066a1153b738145b194414cc6eeaad8ef4397ac9abacf40f6b", + "sha256:2b807c240b64609cb0e80d2200a35b23c7df82259f80bef1b2c96eb422b4aac9", + "sha256:3cee1487a9e4c640dc7467aaa543d6c0097c391dc8ac74eb313f2fbf9d7a7cb5", + "sha256:53c62883b3f999f14e5d30b5a79bd437236658ad45b2f853906c7cbe79de00af", + "sha256:5e8e75dabb6eb83d064b0db46392b25cabb6e784ea624219736e8985a6b3675d", + "sha256:643d27fb5facc167c0b1b59d0315f2674a6e950341aed0fc05cf307d22bf4954", + "sha256:66912475200b67ef5a0ab665011964bf924745103f51977a78b4fb92a9fc1bf0", + "sha256:6eeca41e70b5f5c84f2f913af857cf2ce17410847e1d54642e658e078da6544f", + "sha256:6f3977a16e347f1b115662be07daa93137259c711e526402aa444d7a88fdc9d4", + "sha256:7ed300200918147c963c87700ccf9966dceaefbbb7277450a8d646fc5646bf24", + "sha256:91a68ae46bf07868963671e4d05611b179c2313301bd756a89ad4e3b3db2325b", + "sha256:9459ad0d6cd483eacad4c6566b0f8e42af5e8b583cee917d90ffaa3778420a0a", + "sha256:9dc8c71656a79ca49b8d3e2ce8103210c9481c57798b48deeb3a8bb02db5f115", + "sha256:a19915ec61f3a8746e8b10adbac4a577c6ba9851fa4a9e9fbfbcf319887a5791", + "sha256:b22b3810451abe359a964cc88121d57f7bce482b53a066de0f1584988ca36e79", + "sha256:ba1d768fbfb6930fc93b0ecc32a43d8861ded16f47a40f14afa9bb04ab93d304", + "sha256:be5e2fe860b9bd9edbf676d5b60a9282994c03fbbd40fe8f5e75d194f96064ca", + "sha256:c5b7713daea9bf943f79f8c3b46f361cc5229e0e604dcef6a8bb6d1c37d9df89", + "sha256:ca699710dece84e3ebf6e92ee15f5b8f72870ef984bf944a57a777a48357c168", + "sha256:d294ac3340eef9c9eb5d29288e96dc719ff269a88e27b396340459dd85da4c58", + "sha256:d62d14ca31c92adf561ebb2e5f2741bf8dea28aef6deb400d49cca011d186c68", + "sha256:dd39eef053e58e60204f2cdf059e2442e2eb08f15989eefe259870f89614c8b6", + "sha256:eb07665d9a907a1a645ee41a0df8a25ffac8ad9c26cdb557b7b88eeeeec934e0", + "sha256:f016baaadc423dc960cdddf9acae679e71ee02c4c341f78f3179d7e4819c095f", + "sha256:fb1dafbbaa3b1ee8b4550a84425aac8874e5f390200f5502cf3aee4a2acb2f14" ], "index": "pypi", "markers": "python_version >= '3.10'", - "version": "==25.12.0" + "version": "==26.1.0" }, "cfgv": { "hashes": [ @@ -1219,11 +1365,11 @@ }, "identify": { "hashes": [ - "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", - "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf" + "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", + "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980" ], - "markers": "python_version >= '3.9'", - "version": "==2.6.15" + "markers": "python_version >= '3.10'", + "version": "==2.6.16" }, "isort": { "hashes": [ @@ -1260,19 +1406,19 @@ }, "packaging": { "hashes": [ - "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", - "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", + "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529" ], "markers": "python_version >= '3.8'", - "version": "==25.0" + "version": "==26.0" }, "pathspec": { "hashes": [ - "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", - "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c" + "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", + "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723" ], "markers": "python_version >= '3.9'", - "version": "==1.0.3" + "version": "==1.0.4" }, "platformdirs": { "hashes": [ @@ -1309,11 +1455,51 @@ }, "pytokens": { "hashes": [ - "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", - "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3" + "sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1", + "sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009", + "sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083", + "sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1", + "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", + "sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2", + "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a", + "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1", + "sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5", + "sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a", + "sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3", + "sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db", + "sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68", + "sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037", + "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321", + "sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc", + "sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7", + "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f", + "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918", + "sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9", + "sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c", + "sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1", + "sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1", + "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3", + "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b", + "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb", + "sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1", + "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a", + "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4", + "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa", + "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78", + "sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe", + "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9", + "sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d", + "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975", + "sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440", + "sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16", + "sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc", + "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d", + "sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6", + "sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6", + "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324" ], "markers": "python_version >= '3.8'", - "version": "==0.3.0" + "version": "==0.4.1" }, "pyyaml": { "hashes": [ diff --git a/scripts/1-fetch/arxiv_fetch.py b/scripts/1-fetch/arxiv_fetch.py index 93249652..23215261 100755 --- a/scripts/1-fetch/arxiv_fetch.py +++ b/scripts/1-fetch/arxiv_fetch.py @@ -1,24 +1,28 @@ #!/usr/bin/env python """ -Fetch ArXiv papers with CC license information and generate count reports. +Fetch arXiv articles that use a CC legal tool using the OAI-PMH API. +OAI-PMH: Open Archives Initiative Protocol for Metadata Havesting. + +Note: This fetch script is ready to fetch data, but is not ready for +automation. It currently requires approximately 6 hours to execute. """ + # Standard library import argparse import csv import os -import re import sys import textwrap import time import traceback -import urllib.parse from collections import Counter, defaultdict +from copy import copy from operator import itemgetter # Third-party -import feedparser import requests import yaml +from lxml import etree from pygments import highlight from pygments.formatters import TerminalFormatter from pygments.lexers import PythonTracebackLexer @@ -33,223 +37,10 @@ LOGGER, PATHS = shared.setup(__file__) # Constants -# API Configuration -BASE_URL = "https://export.arxiv.org/api/query?" -DEFAULT_FETCH_LIMIT = 800 # Default total papers to fetch - -# CSV Headers -HEADER_AUTHOR_BUCKET = ["TOOL_IDENTIFIER", "AUTHOR_BUCKET", "COUNT"] -HEADER_CATEGORY_REPORT = [ - "TOOL_IDENTIFIER", - "CATEGORY_CODE", - "CATEGORY_LABEL", - "COUNT", -] -HEADER_COUNT = ["TOOL_IDENTIFIER", "COUNT"] -HEADER_YEAR = ["TOOL_IDENTIFIER", "YEAR", "COUNT"] - -# Search Queries -SEARCH_QUERIES = [ - 'all:"creative commons"', - 'all:"CC BY"', - 'all:"CC-BY"', - 'all:"CC BY-NC"', - 'all:"CC-BY-NC"', - 'all:"CC BY-SA"', - 'all:"CC-BY-SA"', - 'all:"CC BY-ND"', - 'all:"CC-BY-ND"', - 'all:"CC BY-NC-SA"', - 'all:"CC-BY-NC-SA"', - 'all:"CC BY-NC-ND"', - 'all:"CC-BY-NC-ND"', - 'all:"CC0"', - 'all:"CC 0"', - 'all:"CC-0"', -] - -# Compiled regex patterns for CC license detection -CC_PATTERNS = [ - (re.compile(r"\bCC[-\s]?0\b", re.IGNORECASE), "CC0"), - ( - re.compile(r"\bCC[-\s]?BY[-\s]?NC[-\s]?ND\b", re.IGNORECASE), - "CC BY-NC-ND", - ), - ( - re.compile(r"\bCC[-\s]?BY[-\s]?NC[-\s]?SA\b", re.IGNORECASE), - "CC BY-NC-SA", - ), - (re.compile(r"\bCC[-\s]?BY[-\s]?ND\b", re.IGNORECASE), "CC BY-ND"), - (re.compile(r"\bCC[-\s]?BY[-\s]?SA\b", re.IGNORECASE), "CC BY-SA"), - (re.compile(r"\bCC[-\s]?BY[-\s]?NC\b", re.IGNORECASE), "CC BY-NC"), - (re.compile(r"\bCC[-\s]?BY\b", re.IGNORECASE), "CC BY"), - ( - re.compile(r"\bCREATIVE\s+COMMONS\b", re.IGNORECASE), - "UNKNOWN CC legal tool", - ), -] - -# ArXiv Categories - manually curated from ArXiv official taxonomy -# Source: https://arxiv.org/category_taxonomy -CATEGORIES = { - # Computer Science - "cs.AI": "Artificial Intelligence", - "cs.AR": "Hardware Architecture", - "cs.CC": "Computational Complexity", - "cs.CE": "Computational Engineering, Finance, and Science", - "cs.CG": "Computational Geometry", - "cs.CL": "Computation and Language", - "cs.CR": "Cryptography and Security", - "cs.CV": "Computer Vision and Pattern Recognition", - "cs.CY": "Computers and Society", - "cs.DB": "Databases", - "cs.DC": "Distributed, Parallel, and Cluster Computing", - "cs.DL": "Digital Libraries", - "cs.DM": "Discrete Mathematics", - "cs.DS": "Data Structures and Algorithms", - "cs.ET": "Emerging Technologies", - "cs.FL": "Formal Languages and Automata Theory", - "cs.GL": "General Literature", - "cs.GR": "Graphics", - "cs.GT": "Computer Science and Game Theory", - "cs.HC": "Human-Computer Interaction", - "cs.IR": "Information Retrieval", - "cs.IT": "Information Theory", - "cs.LG": "Machine Learning", - "cs.LO": "Logic in Computer Science", - "cs.MA": "Multiagent Systems", - "cs.MM": "Multimedia", - "cs.MS": "Mathematical Software", - "cs.NA": "Numerical Analysis", - "cs.NE": "Neural and Evolutionary Computing", - "cs.NI": "Networking and Internet Architecture", - "cs.OH": "Other Computer Science", - "cs.OS": "Operating Systems", - "cs.PF": "Performance", - "cs.PL": "Programming Languages", - "cs.RO": "Robotics", - "cs.SC": "Symbolic Computation", - "cs.SD": "Sound", - "cs.SE": "Software Engineering", - "cs.SI": "Social and Information Networks", - "cs.SY": "Systems and Control", - # Mathematics - "math.AC": "Commutative Algebra", - "math.AG": "Algebraic Geometry", - "math.AP": "Analysis of PDEs", - "math.AT": "Algebraic Topology", - "math.CA": "Classical Analysis and ODEs", - "math.CO": "Combinatorics", - "math.CT": "Category Theory", - "math.CV": "Complex Variables", - "math.DG": "Differential Geometry", - "math.DS": "Dynamical Systems", - "math.FA": "Functional Analysis", - "math.GM": "General Mathematics", - "math.GN": "General Topology", - "math.GR": "Group Theory", - "math.GT": "Geometric Topology", - "math.HO": "History and Overview", - "math.IT": "Information Theory", - "math.KT": "K-Theory and Homology", - "math.LO": "Logic", - "math.MG": "Metric Geometry", - "math.MP": "Mathematical Physics", - "math.NA": "Numerical Analysis", - "math.NT": "Number Theory", - "math.OA": "Operator Algebras", - "math.OC": "Optimization and Control", - "math.PR": "Probability", - "math.QA": "Quantum Algebra", - "math.RA": "Rings and Algebras", - "math.RT": "Representation Theory", - "math.SG": "Symplectic Geometry", - "math.SP": "Spectral Theory", - "math.ST": "Statistics Theory", - # Physics - "physics.acc-ph": "Accelerator Physics", - "physics.ao-ph": "Atmospheric and Oceanic Physics", - "physics.app-ph": "Applied Physics", - "physics.atm-clus": "Atomic and Molecular Clusters", - "physics.atom-ph": "Atomic Physics", - "physics.bio-ph": "Biological Physics", - "physics.chem-ph": "Chemical Physics", - "physics.class-ph": "Classical Physics", - "physics.comp-ph": "Computational Physics", - "physics.data-an": "Data Analysis, Statistics and Probability", - "physics.ed-ph": "Physics Education", - "physics.flu-dyn": "Fluid Dynamics", - "physics.gen-ph": "General Physics", - "physics.geo-ph": "Geophysics", - "physics.hist-ph": "History and Philosophy of Physics", - "physics.ins-det": "Instrumentation and Detectors", - "physics.med-ph": "Medical Physics", - "physics.optics": "Optics", - "physics.plasm-ph": "Plasma Physics", - "physics.pop-ph": "Popular Physics", - "physics.soc-ph": "Physics and Society", - "physics.space-ph": "Space Physics", - # Statistics - "stat.AP": "Applications", - "stat.CO": "Computation", - "stat.ME": "Methodology", - "stat.ML": "Machine Learning", - "stat.OT": "Other Statistics", - "stat.TH": "Statistics Theory", - # Quantitative Biology - "q-bio.BM": "Biomolecules", - "q-bio.CB": "Cell Behavior", - "q-bio.GN": "Genomics", - "q-bio.MN": "Molecular Networks", - "q-bio.NC": "Neurons and Cognition", - "q-bio.OT": "Other Quantitative Biology", - "q-bio.PE": "Populations and Evolution", - "q-bio.QM": "Quantitative Methods", - "q-bio.SC": "Subcellular Processes", - "q-bio.TO": "Tissues and Organs", - # Economics - "econ.EM": "Econometrics", - "econ.GN": "General Economics", - "econ.TH": "Theoretical Economics", - # Electrical Engineering - "eess.AS": "Audio and Speech Processing", - "eess.IV": "Image and Video Processing", - "eess.SP": "Signal Processing", - "eess.SY": "Systems and Control", - # High Energy Physics - "hep-ex": "High Energy Physics - Experiment", - "hep-lat": "High Energy Physics - Lattice", - "hep-ph": "High Energy Physics - Phenomenology", - "hep-th": "High Energy Physics - Theory", - # Other Physics - "astro-ph": "Astrophysics", - "astro-ph.CO": "Cosmology and Nongalactic Astrophysics", - "astro-ph.EP": "Earth and Planetary Astrophysics", - "astro-ph.GA": "Astrophysics of Galaxies", - "astro-ph.HE": "High Energy Astrophysical Phenomena", - "astro-ph.IM": "Instrumentation and Methods for Astrophysics", - "astro-ph.SR": "Solar and Stellar Astrophysics", - "cond-mat.dis-nn": "Disordered Systems and Neural Networks", - "cond-mat.mes-hall": "Mesoscale and Nanoscale Physics", - "cond-mat.mtrl-sci": "Materials Science", - "cond-mat.other": "Other Condensed Matter", - "cond-mat.quant-gas": "Quantum Gases", - "cond-mat.soft": "Soft Condensed Matter", - "cond-mat.stat-mech": "Statistical Mechanics", - "cond-mat.str-el": "Strongly Correlated Electrons", - "cond-mat.supr-con": "Superconductivity", - "gr-qc": "General Relativity and Quantum Cosmology", - "nlin.AO": "Adaptation and Self-Organizing Systems", - "nlin.CD": "Chaotic Dynamics", - "nlin.CG": "Cellular Automata and Lattice Gases", - "nlin.PS": "Pattern Formation and Solitons", - "nlin.SI": "Exactly Solvable and Integrable Systems", - "nucl-ex": "Nuclear Experiment", - "nucl-th": "Nuclear Theory", - "quant-ph": "Quantum Physics", -} - -# File Paths +BASE_URL = "https://oaipmh.arxiv.org/oai" +# Defaults should result in quick operation (not complete operation) +DEFAULT_FETCH_LIMIT = 4500 # Fetch 3 batches of 1,500 articles each +# CSV file paths FILE_ARXIV_AUTHOR_BUCKET = shared.path_join( PATHS["data_1-fetch"], "arxiv_4_count_by_author_bucket.csv" ) @@ -260,38 +51,49 @@ FILE_ARXIV_YEAR = shared.path_join( PATHS["data_1-fetch"], "arxiv_3_count_by_year.csv" ) -# records metadata for each run for audit, reproducibility, and provenance FILE_PROVENANCE = shared.path_join( PATHS["data_1-fetch"], "arxiv_provenance.yaml" ) - -# Runtime variables +# CSV headers +HEADER_AUTHOR_BUCKET = ["TOOL_IDENTIFIER", "AUTHOR_BUCKET", "COUNT"] +HEADER_CATEGORY_REPORT = [ + "TOOL_IDENTIFIER", + "CATEGORY_CODE", + "CATEGORY_NAME", + "COUNT", +] +HEADER_COUNT = ["TOOL_IDENTIFIER", "COUNT"] +HEADER_YEAR = ["TOOL_IDENTIFIER", "YEAR", "COUNT"] QUARTER = os.path.basename(PATHS["data_quarter"]) +SUBSUMED_CATEGORIES = { + # https://arxiv.org/archive/alg-geom + # "The alg-geom archive has been subsumed into Algebraic Geometry + # (math.AG)." + "alg-geom": "math.AG", + # https://arxiv.org/archive/chao-dyn + # "The chao-dyn archive has been subsumed into Chaotic Dynamics (nlin.CD)." + "chao-dyn": "nlin.CD", + # https://arxiv.org/archive/dg-ga + # "The dg-ga archive has been subsumed into Differential Geometry + # (math.DG)." + "dg-ga": "math.DG", + # https://arxiv.org/archive/solv-int + # "The solv-int archive has been subsumed into Exactly Solvable and + # Integrable Systems (nlin.SI)." + "solv-int": "nlin.SI", + # https://arxiv.org/archive/q-alg + # "The q-alg archive has been subsumed into Quantum Algebra (math.QA)." + "q-alg": "math.QA", +} # parsing arguments function def parse_arguments(): - """Parse command-line options, returns parsed argument namespace. - - Note: The --limit parameter sets the total number of papers to fetch - across all search queries, not per query. ArXiv API recommends - maximum of 30000 results per session for optimal performance. + """ + Parse command-line options, returns parsed argument namespace. """ LOGGER.info("Parsing command-line options") parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--limit", - type=int, - default=DEFAULT_FETCH_LIMIT, - help=( - f"Total limit of papers to fetch across all search queries " - f"(default: {DEFAULT_FETCH_LIMIT}). Maximum recommended: 30000. " - f"Note: Individual queries limited to 500 results " - f"(implementation choice). " - f"See ArXiv API documentation: " - f"https://info.arxiv.org/help/api/user-manual.html" - ), - ) parser.add_argument( "--enable-save", action="store_true", @@ -302,6 +104,21 @@ def parse_arguments(): action="store_true", help="Enable git actions (fetch, merge, add, commit, and push)", ) + parser.add_argument( + "--limit", + type=int, + default=DEFAULT_FETCH_LIMIT, + help=( + "Limit number of fetched articles (default:" + f" {DEFAULT_FETCH_LIMIT}). Use a value of -1 to fetch all articles" + " (remove limit)." + ), + ) + parser.add_argument( + "--show-added", + action="store_true", + help="Log additional information about when articles were added", + ) args = parser.parse_args() if not args.enable_save and args.enable_git: parser.error("--enable-git requires --enable-save") @@ -333,318 +150,452 @@ def initialize_all_data_files(args): initialize_data_file(FILE_ARXIV_AUTHOR_BUCKET, HEADER_AUTHOR_BUCKET) -def normalize_license_text(raw_text): - """ - Convert raw license text to standardized CC license identifiers. +def get_identifier_mapping(): + global IDENTIER_MAPPING + LOGGER.info("Loading CC Legal Tool metadata for CC identifer mapping") + file_path = shared.path_join(PATHS["data"], "cc-legal-tools.csv") + identifier_mapping = {} + with open(file_path, "r", encoding="utf-8") as file_obj: + rows = csv.DictReader(file_obj, dialect="unix") + for row in rows: + simple_url = row["CANONICAL_URL"].replace("https://", "") + simple_url = simple_url.rstrip("/") + identifier = row["IDENTIFIER"] + identifier_mapping[simple_url] = identifier + + # Add legacy entry + simple_url = "creativecommons.org/licenses/publicdomain" + identifier_mapping[simple_url] = "CERTIFICATION 1.0 US" + + IDENTIER_MAPPING = dict( + sorted(identifier_mapping.items(), key=lambda item: item[1]) + ) + - Uses regex patterns to identify CC licenses from paper text. - Returns specific license (e.g., "CC BY", "CC0") or "Unknown". +def query_category_mapping(args, session): """ - if not raw_text: - return "Unknown" + Query to establish mapping of category codes and names. - for pattern, license_type in CC_PATTERNS: - if pattern.search(raw_text): - return license_type + Also see https://arxiv.org/category_taxonomy + """ + global CATEGORY_MAPPING - return "Unknown" + params = {"verb": "ListSets"} + try: + response = session.get(BASE_URL, params=params, timeout=60) + response.raise_for_status() + except requests.HTTPError as e: + raise shared.QuantifyingException(f"HTTP Error: {e}", 1) + except requests.RequestException as e: + raise shared.QuantifyingException(f"Request Exception: {e}", 1) + + root = etree.fromstring(response.content) + CATEGORY_MAPPING = {} + sets = root.findall(".//{http://www.openarchives.org/OAI/2.0/}set") + for set_ in sets: + spec, name = set_.getchildren() + # Ensure category code (key) matches code used in articles + spec_list = spec.text.split(":") + if len(spec_list) > 1: + # Remove parent category and replace colon with period + # 3 part examples: + # match:math:AC => math.AC + # physics:astro-ph:CO => astro-ph.CO + # 2 part examples + # physics:astro-ph => astro-ph + # physics:quant-ph => quant-ph + spec_text = ".".join(spec_list[1:]) + else: + spec_text = spec.text + CATEGORY_MAPPING[spec_text] = name.text + CATEGORY_MAPPING = dict(sorted(CATEGORY_MAPPING.items())) -def extract_license_info(entry): +def extract_record_cc_legal_tool_identifier(record): """ - Extract CC license information from ArXiv paper entry. + Extract CC legal tool identifier from OAI-PMH XML record. - Checks rights field first, then summary field for license patterns. - Returns normalized license identifier or "Unknown". + Returns normalized legal tool identifier or specific error indicator. """ - # checking through the rights field first then summary - if hasattr(entry, "rights") and entry.rights: - license_info = normalize_license_text(entry.rights) - if license_info != "Unknown": - return license_info - if hasattr(entry, "summary") and entry.summary: - license_info = normalize_license_text(entry.summary) - if license_info != "Unknown": - return license_info - return "Unknown" - - -def extract_category_from_entry(entry): - """Extract primary category from ArXiv entry.""" - if ( - hasattr(entry, "arxiv_primary_category") - and entry.arxiv_primary_category - ): - return entry.arxiv_primary_category.get("term", "Unknown") - if hasattr(entry, "tags") and entry.tags: - # Get first category from tags - for tag in entry.tags: - if hasattr(tag, "term"): - return tag.term - return "Unknown" - - -def extract_year_from_entry(entry): - """Extract publication year from ArXiv entry.""" - if hasattr(entry, "published") and entry.published: - try: - return entry.published[:4] # Extract year from date string - except (AttributeError, IndexError) as e: - LOGGER.debug( - f"Failed to extract year from '{entry.published}': {e}" - ) - return "Unknown" - - -def extract_author_count_from_entry(entry): - """Extract number of authors from ArXiv entry.""" - if hasattr(entry, "authors") and entry.authors: - try: - return len(entry.authors) - except Exception as e: - LOGGER.debug(f"Failed to count authors from entry.authors: {e}") - if hasattr(entry, "author") and entry.author: - return 1 - return "Unknown" + # Find license element in arXiv namespace + license_element = record.find(".//{http://arxiv.org/OAI/arXiv/}license") + + if license_element is not None and license_element.text: + license_url = license_element.text.strip() + simple_url = copy(license_url).replace("http://", "") + simple_url = simple_url.replace("https://", "") + simple_url = simple_url.rstrip("/") + # Check exact mapping first + if simple_url in IDENTIER_MAPPING: + identifer = IDENTIER_MAPPING[simple_url] + # Validate CC URLs more strictly + elif "creativecommons.org" in license_url.lower(): + identifer = f"CC (ambiguous): {license_url}" + else: + identifer = "N/A: non-CC" + else: + identifer = "N/A: article missing license field" + return identifer -def bucket_author_count(n): - """ - Convert author count to predefined buckets for analysis. - Buckets: "1", "2", "3", "4", "5+", "Unknown" - Reduces granularity for better statistical analysis. +def extract_record_metadata(args, record): """ - if n == 1: - return "1" - if n == 2: - return "2" - if n == 3: - return "3" - if n == 4: - return "4" - if n >= 5: - return "5+" - return "Unknown" - - -def save_count_data( - license_counts, category_counts, year_counts, author_counts -): - """ - Save all collected data to CSV files. + Extract paper metadata from OAI-PMH XML record. + Returns metadata dictionary. """ - # license_counts: {license: count} - # category_counts: {license: {category_code: count}} - # year_counts: {license: {year: count}} - # author_counts: {license: {author_count(int|None): count}} - - # Save license counts - data = [] - for lic, c in license_counts.items(): - data.append({"TOOL_IDENTIFIER": lic, "COUNT": c}) - data.sort(key=itemgetter("TOOL_IDENTIFIER")) - with open(FILE_ARXIV_COUNT, "w", encoding="utf-8", newline="\n") as fh: - writer = csv.DictWriter(fh, fieldnames=HEADER_COUNT, dialect="unix") - writer.writeheader() - for row in data: - writer.writerow(row) - - # Save category report with labels - data = [] - for lic, cats in category_counts.items(): - for code, c in cats.items(): - label = CATEGORIES.get(code, code) - data.append( - { - "TOOL_IDENTIFIER": lic, - "CATEGORY_CODE": code, - "CATEGORY_LABEL": label, - "COUNT": c, - } - ) - data.sort(key=itemgetter("TOOL_IDENTIFIER", "CATEGORY_CODE")) - with open( - FILE_ARXIV_CATEGORY_REPORT, "w", encoding="utf-8", newline="\n" - ) as fh: - writer = csv.DictWriter( - fh, fieldnames=HEADER_CATEGORY_REPORT, dialect="unix" + metadata = {} + + # Extract identifer first to avoid unnecessary work + identifer = extract_record_cc_legal_tool_identifier(record) + if not identifer.startswith("CC"): + return {} + # metadata value set below to ensure natural order of keys + + if args.show_added: + # Extract added on + added_on_elem = record.find( + ".//{http://www.openarchives.org/OAI/2.0/}datestamp" ) - writer.writeheader() - for row in data: - writer.writerow(row) - - # Save year counts - data = [] - for lic, years in year_counts.items(): - for year, c in years.items(): - data.append({"TOOL_IDENTIFIER": lic, "YEAR": year, "COUNT": c}) - data.sort(key=itemgetter("TOOL_IDENTIFIER", "YEAR")) - with open(FILE_ARXIV_YEAR, "w", encoding="utf-8", newline="\n") as fh: - writer = csv.DictWriter(fh, fieldnames=HEADER_YEAR, dialect="unix") - writer.writeheader() - for row in data: - writer.writerow(row) - - # Save author buckets summary - data = [] - for lic, acs in author_counts.items(): - # build buckets across licenses - bucket_counts = Counter() - for ac, c in acs.items(): - b = bucket_author_count(ac) - bucket_counts[b] += c - for b, c in bucket_counts.items(): - data.append( - {"TOOL_IDENTIFIER": lic, "AUTHOR_BUCKET": b, "COUNT": c} + if added_on_elem is not None and added_on_elem.text: + metadata["added_on"] = added_on_elem.text.strip() + else: + metadata["added_on"] = False + + # Extract author count + authors = record.findall(".//{http://arxiv.org/OAI/arXiv/}author") + metadata["author_count"] = len(authors) if authors else 0 + + # Extract categories + categories_elem = record.find(".//{http://arxiv.org/OAI/arXiv/}categories") + if categories_elem is not None and categories_elem.text: + metadata["categories"] = categories_elem.text.strip().split() + for index, code in enumerate(metadata["categories"]): + metadata["categories"][index] = SUBSUMED_CATEGORIES.get(code, code) + metadata["categories"] = list(set(metadata["categories"])) + metadata["categories"].sort() + else: + metadata["categories"] = False + + # Set identifer + metadata["identifer"] = identifer + + # Extract year from 1) updated, 2) created + updated_elem = record.find(".//{http://arxiv.org/OAI/arXiv/}updated") + if updated_elem is not None and updated_elem.text: + try: + metadata["year"] = updated_elem.text.strip()[:4] # Extract year + except (AttributeError, IndexError) as e: + LOGGER.error( + f"Failed to extract year from '{updated_elem.text}': {e}" ) - data.sort(key=itemgetter("TOOL_IDENTIFIER", "AUTHOR_BUCKET")) - with open( - FILE_ARXIV_AUTHOR_BUCKET, "w", encoding="utf-8", newline="\n" - ) as fh: - writer = csv.DictWriter( - fh, fieldnames=HEADER_AUTHOR_BUCKET, dialect="unix" - ) - writer.writeheader() - for row in data: - writer.writerow(row) + metadata["year"] = "Unknown" + else: + created_elem = record.find(".//{http://arxiv.org/OAI/arXiv/}created") + if created_elem is not None and created_elem.text: + try: + metadata["year"] = created_elem.text.strip()[ + :4 + ] # Extract year + except (AttributeError, IndexError) as e: + LOGGER.error( + f"Failed to extract year from '{created_elem.text}': {e}" + ) + metadata["year"] = "Unknown" + else: + metadata["year"] = "Unknown" + return metadata -def query_arxiv(args): - """ - Main function to query ArXiv API and collect CC license data. +def bucket_author_count(author_count): """ + Convert author count to predefined buckets: "1", "2", "3", "4", "5+". + """ + if author_count <= 4: + return str(author_count) + return "5+" - LOGGER.info("Beginning to fetch results from ArXiv API") - session = shared.get_session() - results_per_iteration = 50 +def query_arxiv(args, session): + """ + Query arXiv OAI-PMH API starting from addition date 2008-02-05 and return + information about articles using a CC legal tool. - search_queries = SEARCH_QUERIES + 2008-02-05 was the first date that articles using a CC legal tool were + added to arXiv. + """ + if args.limit == -1: + count_desc = "all" + else: + count_desc = f"a maximum of {args.limit}" + LOGGER.info( + f"Fetching {count_desc} articles starting form add date 2008-02-05" + ) # Data structures for counting - license_counts = defaultdict(int) + tool_counts = defaultdict(int) category_counts = defaultdict(lambda: defaultdict(int)) year_counts = defaultdict(lambda: defaultdict(int)) author_counts = defaultdict(lambda: defaultdict(int)) + batch = 1 total_fetched = 0 - - for search_query in search_queries: - if total_fetched >= args.limit: + cc_articles_found = 0 + if args.show_added: + cc_articles_added = [] + resumption_token = None + + # Proceed is set to False when limit reached or end of records (missing + # resumption token) + proceed = True + while proceed: + if args.limit > 0 and args.limit <= total_fetched: + proceed = False break - LOGGER.info(f"Searching for: {search_query}") - papers_found_for_query = 0 - - for start in range( - 0, - min(args.limit - total_fetched, 500), - results_per_iteration, - ): - encoded_query = urllib.parse.quote_plus(search_query) - query = ( - f"search_query={encoded_query}&start={start}" - f"&max_results={results_per_iteration}" + if resumption_token: + # Continue with resumption token + params = { + "verb": "ListRecords", + "resumptionToken": resumption_token, + } + verb = "resuming" + else: + # Initial request with date range + params = { + "verb": "ListRecords", + "metadataPrefix": "arXiv", + "from": "2008-02-05", # First addition of articles using CC + } + verb = "starting" + + # Make API request + LOGGER.info( + f"Fetching batch {batch} {verb} from record {total_fetched}" + ) + batch += 1 + + try: + # Build OAI-PMH request URL + response = session.get(BASE_URL, params=params, timeout=60) + response.raise_for_status() + except requests.HTTPError as e: + raise shared.QuantifyingException(f"HTTP Error: {e}", 1) + except requests.RequestException as e: + raise shared.QuantifyingException(f"Request Exception: {e}", 1) + + root = etree.fromstring(response.content) + + # Check for errors + error_element = root.find( + ".//{http://www.openarchives.org/OAI/2.0/}error" + ) + if error_element is not None: + raise shared.QuantifyingException( + f"OAI-PMH Error: {error_element.text}", 1 ) - papers_found_in_batch = 0 + # Process batch of article records + records = root.findall( + ".//{http://www.openarchives.org/OAI/2.0/}record" + ) + batch_cc_count = 0 + for record in records: + if args.limit > 0 and args.limit <= total_fetched: + proceed = False + break + total_fetched += 1 - try: - LOGGER.info( - f"Fetching results {start} - " - f"{start + results_per_iteration}" - ) - response = session.get(BASE_URL + query, timeout=30) - response.raise_for_status() - feed = feedparser.parse(response.content) + metadata = extract_record_metadata(args, record) + if not metadata: # Only true for articles using a CC legal tool + continue + + if args.show_added and metadata["added_on"]: + cc_articles_added.append(metadata["added_on"]) + identifer = metadata["identifer"] + + # Count by author count and identifer + author_count = metadata["author_count"] + author_counts[identifer][author_count] += 1 - for entry in feed.entries: - if total_fetched >= args.limit: - break + # Count by category and identifer + categories = metadata["categories"] + if metadata["categories"]: + for category in categories: + category_counts[identifer][category] += 1 - license_info = extract_license_info(entry) + # Count by identifer + tool_counts[identifer] += 1 - if license_info != "Unknown": + # Count by year and identifer + year = metadata["year"] + year_counts[identifer][year] += 1 - category = extract_category_from_entry(entry) - year = extract_year_from_entry(entry) - author_count = extract_author_count_from_entry(entry) + batch_cc_count += 1 + cc_articles_found += 1 - # Count by license - license_counts[license_info] += 1 + if args.show_added and cc_articles_added: + cc_articles_added = list(set(cc_articles_added)) + cc_articles_added.sort() + LOGGER.info(f" CC articles added: {', '.join(cc_articles_added)}") - # Count by category and license - category_counts[license_info][category] += 1 + LOGGER.info( + f" Batch CC legal tool articles: {batch_cc_count}, Total" + f" CC legal tool articles: {cc_articles_found}" + ) - # Count by year and license - year_counts[license_info][year] += 1 + # Check for resumption token + resumption_element = root.find( + ".//{http://www.openarchives.org/OAI/2.0/}resumptionToken" + ) + if not proceed: + break + elif resumption_element is not None and resumption_element.text: + resumption_token = resumption_element.text + else: + LOGGER.info("No more records available") + proceed = False + break - # Count by author count and license - author_counts[license_info][author_count] += 1 + # OAI-PMH requires a 3 second delay between requests + # https://info.arxiv.org/help/api/tou.html#rate-limits + time.sleep(3) - total_fetched += 1 - papers_found_in_batch += 1 - papers_found_for_query += 1 + data = { + "author_counts": author_counts, + "category_counts": category_counts, + "tool_counts": tool_counts, + "year_counts": year_counts, + } + return data, cc_articles_found - # arXiv recommends a 3-seconds delay between consecutive - # api calls for efficiency - time.sleep(3) - except requests.HTTPError as e: - raise shared.QuantifyingException(f"HTTP Error: {e}", 1) - except requests.RequestException as e: - raise shared.QuantifyingException(f"Request Exception: {e}", 1) - except KeyError as e: - raise shared.QuantifyingException(f"KeyError: {e}", 1) - if papers_found_in_batch == 0: - break +def rows_to_csv(args, fieldnames, rows, file_path): + if not args.enable_save: + return args - LOGGER.info( - f"Query '{search_query}' completed: " - f"{papers_found_for_query} papers found" + with open(file_path, "w", encoding="utf-8", newline="\n") as file_handle: + writer = csv.DictWriter( + file_handle, fieldnames=fieldnames, dialect="unix" ) + writer.writeheader() + for row in rows: + writer.writerow(row) + + +def write_data(args, data): + """ + Write fetched data to CSV files. + """ + # Save author buckets report + # fetched_data["author_counts"]: {identifer: {author_count: count}} + rows = [] + for identifier, author_count_data in data["author_counts"].items(): + # build buckets across CC legal tool identifiers + bucket_counts = Counter() + for author_count, count in author_count_data.items(): + bucket = bucket_author_count(author_count) + bucket_counts[bucket] += count + # add rows + for bucket, count in bucket_counts.items(): + rows.append( + { + "TOOL_IDENTIFIER": identifier, + "AUTHOR_BUCKET": bucket, + "COUNT": count, + } + ) + rows.sort(key=itemgetter("TOOL_IDENTIFIER", "AUTHOR_BUCKET")) + rows_to_csv(args, HEADER_AUTHOR_BUCKET, rows, FILE_ARXIV_AUTHOR_BUCKET) + + # Save category report + # fetched_data["category_counts"]: {identifer: {category_code: count}} + rows = [] + for identifier, categories in data["category_counts"].items(): + for code, count in categories.items(): + # map category codes to names + name = CATEGORY_MAPPING.get(code, code) + # append row + rows.append( + { + "TOOL_IDENTIFIER": identifier, + "CATEGORY_CODE": code, + "CATEGORY_NAME": name, + "COUNT": count, + } + ) + rows.sort(key=itemgetter("TOOL_IDENTIFIER", "CATEGORY_CODE")) + rows_to_csv(args, HEADER_CATEGORY_REPORT, rows, FILE_ARXIV_CATEGORY_REPORT) + + # Save tool counts report + # fetched_data["tool_counts"]: {identfier: count} + rows = [] + for identifier, count in data["tool_counts"].items(): + rows.append({"TOOL_IDENTIFIER": identifier, "COUNT": count}) + rows.sort(key=itemgetter("TOOL_IDENTIFIER")) + rows_to_csv(args, HEADER_COUNT, rows, FILE_ARXIV_COUNT) + + # Save year count report + # fetched_data["year_counts"]: {identifer: {year: count}} + rows = [] + for identifier, years in data["year_counts"].items(): + for year, count in years.items(): + rows.append( + {"TOOL_IDENTIFIER": identifier, "YEAR": year, "COUNT": count} + ) + rows.sort(key=itemgetter("TOOL_IDENTIFIER", "YEAR")) + rows_to_csv(args, HEADER_YEAR, rows, FILE_ARXIV_YEAR) - # Save results - if args.enable_save: - save_count_data( - license_counts, category_counts, year_counts, author_counts - ) - # save provenance +def write_provence(args, cc_articles_found): + """ + Write provenance information to YAML file. + """ + if not args.enable_save: + return args + + # Save provenance + desc = "Open Archives Initiative Protocol for Metadata Havesting (OAI-PMH)" provenance_data = { - "total_fetched": total_fetched, - "queries": search_queries, - "limit": args.limit, + "api_description": desc, + "api_endpoint": BASE_URL, + "cc_articles_found": cc_articles_found, + "fetch_limit": args.limit, + "from_add_date": "2008-02-05", "quarter": QUARTER, "script": os.path.basename(__file__), } - # write provenance YAML for auditing - try: - with open(FILE_PROVENANCE, "w", encoding="utf-8", newline="\n") as fh: - yaml.dump(provenance_data, fh, default_flow_style=False, indent=2) - except Exception as e: - LOGGER.warning("Failed to write provenance file: %s", e) - - LOGGER.info(f"Total CC licensed papers fetched: {total_fetched}") + # Write provenance YAML for auditing + with open( + FILE_PROVENANCE, "w", encoding="utf-8", newline="\n" + ) as file_handle: + yaml.dump( + provenance_data, + file_handle, + default_flow_style=False, + indent=2, + ) def main(): - """Main function.""" - LOGGER.info("Script execution started.") args = parse_arguments() shared.paths_log(LOGGER, PATHS) shared.git_fetch_and_merge(args, PATHS["repo"]) initialize_all_data_files(args) - query_arxiv(args) + get_identifier_mapping() + session = shared.get_session() + query_category_mapping(args, session) + data, cc_articles_found = query_arxiv(args, session) + write_data(args, data) + write_provence(args, cc_articles_found) args = shared.git_add_and_commit( args, PATHS["repo"], PATHS["data_quarter"], - f"Add and commit new ArXiv CC license data for {QUARTER}", + f"Add and commit new arXiv data for {QUARTER}", ) shared.git_push_changes(args, PATHS["repo"]) diff --git a/scripts/1-fetch/gcs_fetch.py b/scripts/1-fetch/gcs_fetch.py index 0f8cdd39..c3ad313a 100755 --- a/scripts/1-fetch/gcs_fetch.py +++ b/scripts/1-fetch/gcs_fetch.py @@ -2,6 +2,7 @@ """ Fetch CC Legal Tool usage data from Google Custom Search (GCS) API. """ + # Standard library import argparse import csv diff --git a/scripts/2-process/gcs_process.py b/scripts/2-process/gcs_process.py index 9f830d56..12fd3942 100755 --- a/scripts/2-process/gcs_process.py +++ b/scripts/2-process/gcs_process.py @@ -2,6 +2,7 @@ """ Process Google Custom Search (GCS) data. """ + # Standard library import argparse import os diff --git a/scripts/2-process/github_process.py b/scripts/2-process/github_process.py index 5d8911b0..85c0f285 100755 --- a/scripts/2-process/github_process.py +++ b/scripts/2-process/github_process.py @@ -3,6 +3,7 @@ This file is dedicated to processing GitHub data for analysis and comparison between quarters. """ + # Standard library import argparse import os diff --git a/scripts/2-process/wikipedia_process.py b/scripts/2-process/wikipedia_process.py index b7e7c023..085956de 100755 --- a/scripts/2-process/wikipedia_process.py +++ b/scripts/2-process/wikipedia_process.py @@ -3,6 +3,7 @@ This file is dedicated to processing Wikipedia data for analysis and comparison between quarters. """ + # Standard library import argparse import os diff --git a/scripts/3-report/gcs_report.py b/scripts/3-report/gcs_report.py index f8a7d23c..5bd1f218 100755 --- a/scripts/3-report/gcs_report.py +++ b/scripts/3-report/gcs_report.py @@ -3,6 +3,7 @@ This file is dedicated to visualizing and analyzing the data collected from Google Custom Search (GCS). """ + # Standard library import argparse import os diff --git a/scripts/3-report/github_report.py b/scripts/3-report/github_report.py index f8b29022..5fac04ac 100755 --- a/scripts/3-report/github_report.py +++ b/scripts/3-report/github_report.py @@ -3,6 +3,7 @@ This file is dedicated to visualizing and analyzing the data collected from GitHub. """ + # Standard library import argparse import os diff --git a/scripts/3-report/wikipedia_report.py b/scripts/3-report/wikipedia_report.py index 2458bd19..dc9d3188 100755 --- a/scripts/3-report/wikipedia_report.py +++ b/scripts/3-report/wikipedia_report.py @@ -3,6 +3,7 @@ This file is dedicated to visualizing and analyzing the data collected from Wikipedia. """ + # Standard library import argparse import os diff --git a/scripts/3-report/zzz-notes.py b/scripts/3-report/zzz-notes.py index 1ddcc64d..d6f6d632 100755 --- a/scripts/3-report/zzz-notes.py +++ b/scripts/3-report/zzz-notes.py @@ -2,6 +2,7 @@ """ Add project references. """ + # Standard library import argparse import os diff --git a/sources.md b/sources.md index 04d6ada1..0b6f57fe 100644 --- a/sources.md +++ b/sources.md @@ -6,21 +6,23 @@ public domain. Below are the sources and their respective information: ## arXiv -**Description:** arXiv is a free distribution service and an open-access archive for scholarly articles in physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics. All arXiv articles are available under various open licenses or are in the public domain. +**Description:** arXiv is a free distribution service and an open-access +archive for scholarly articles in physics, mathematics, computer science, +quantitative biology, quantitative finance, statistics, electrical engineering +and systems science, and economics. All arXiv articles are available under +various open licenses or are in the public domain. **API documentation link:** -- [arXiv API User Manual](https://arxiv.org/help/api/user-manual) -- [arXiv API Reference](https://arxiv.org/help/api) -- [Base URL](http://export.arxiv.org/api/query) +- [arXiv OAI-PMH Interface](https://info.arxiv.org/help/oa/index.html) +- [Base URL (OAI-PMH)](https://oaipmh.arxiv.org/oai) - [arXiv Subject Classifications](https://arxiv.org/category_taxonomy) - [Terms of Use for arXiv APIs](https://info.arxiv.org/help/api/tou.html) **API information:** - No API key required -- Query limit: No official limit, but requests should be made responsibly -- Data available through Atom XML format -- Supports search by fields: title (ti), author (au), abstract (abs), comment (co), journal reference (jr), subject category (cat), report number (rn), id, all (searches all fields), and submittedDate (date filter) -- Metadata includes licensing information for each paper +- Query limit: 3 second delay between requests +- **Data format**: OAI-PMH XML format with structured metadata fields +- Metadata includes comprehensive licensing information for each paper ## CC Legal Tools @@ -51,7 +53,10 @@ the `./dev/update_legal_tools_data.sh` command. ## Europeana **Description:** -The **Europeana Search API** provides access to digital cultural heritage metadata records aggregated from museums, libraries, and archives across Europe. This project uses the API to fetch aggregated counts of cultural heritage records by data provider, rights statement, and theme. +The **Europeana Search API** provides access to digital cultural heritage +metadata records aggregated from museums, libraries, and archives across +Europe. This project uses the API to fetch aggregated counts of cultural +heritage records by data provider, rights statement, and theme. **Official API Documentation:** - [Search API Documentation](https://europeana.atlassian.net/wiki/spaces/EF/pages/2385739812/Search+API+Documentation)